|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#version 460 |
|
#extension GL_EXT_shader_8bit_storage : require |
|
#extension GL_EXT_shader_16bit_storage : require |
|
#extension GL_EXT_shader_explicit_arithmetic_types : require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_float32 : require |
|
#extension GL_GOOGLE_include_directive : enable |
|
|
|
|
|
#define SCALE_1_0X 0 |
|
#define SCALE_1_3X 1 |
|
#define SCALE_1_5X 2 |
|
#define SCALE_2_0X 3 |
|
|
|
|
|
#define HISTORY_CATMULL |
|
#define SCALE_MODE SCALE_2_0X |
|
|
|
|
|
#include "typedefs.h" |
|
#include "common.h" |
|
#include "kernel_lut.h" |
|
|
|
|
|
layout (set=0, binding=0) uniform mediump sampler2D _ColourTex; |
|
layout (set=0, binding=1) uniform mediump sampler2D _MotionVectorTex; |
|
layout (set=0, binding=2) uniform mediump sampler2D _HistoryTex; |
|
layout (set=0, binding=3) uniform lowp sampler2D _K0Tensor; |
|
layout (set=0, binding=4) uniform lowp sampler2D _K1Tensor; |
|
layout (set=0, binding=5) uniform lowp sampler2D _K2Tensor; |
|
layout (set=0, binding=6) uniform lowp sampler2D _K3Tensor; |
|
layout (set=0, binding=7) uniform lowp sampler2D _TemporalTensor; |
|
layout (set=0, binding=8) uniform lowp sampler2D _NearestDepthCoordTex; |
|
|
|
|
|
layout (set=1, binding=0, r11f_g11f_b10f) uniform writeonly mediump image2D _UpsampledColourOut; |
|
|
|
|
|
layout(push_constant, std430) uniform PushConstants { |
|
|
|
layout(offset = 0) int32_t2 _OutputDims; |
|
layout(offset = 8) int32_t2 _InputDims; |
|
layout(offset = 16) float2 _InvOutputDims; |
|
layout(offset = 24) float2 _InvInputDims; |
|
layout(offset = 32) float2 _Scale; |
|
layout(offset = 40) float2 _InvScale; |
|
|
|
|
|
layout(offset = 48) int16_t2 _IndexModulo; |
|
layout(offset = 52) half2 _QuantParams; |
|
layout(offset = 56) int16_t2 _LutOffset; |
|
layout(offset = 60) half2 _ExposurePair; |
|
layout(offset = 64) half2 _HistoryPad; |
|
layout(offset = 68) half2 _MotionThreshPad; |
|
layout(offset = 72) int32_t _Padding0; |
|
|
|
}; |
|
|
|
|
|
#define _Exposure _ExposurePair.x |
|
#define _InvExposure _ExposurePair.y |
|
#define _NotHistoryReset _HistoryPad.x |
|
#define _MotionThresh _MotionThreshPad.x |
|
|
|
|
|
|
|
|
|
|
|
#ifndef _K0QuantParams |
|
|
|
#define _K0QuantParams _QuantParams.xy |
|
#endif |
|
#ifndef _K1QuantParams |
|
|
|
#define _K1QuantParams _QuantParams.xy |
|
#endif |
|
#ifndef _K2QuantParams |
|
|
|
#define _K2QuantParams _QuantParams.xy |
|
#endif |
|
#ifndef _K3QuantParams |
|
|
|
#define _K3QuantParams _QuantParams.xy |
|
#endif |
|
#ifndef _TemporalQuantParams |
|
|
|
#define _TemporalQuantParams _QuantParams.xy |
|
#endif |
|
|
|
|
|
|
|
|
|
half2 LoadMotion(int32_t2 pixel) |
|
{ |
|
return half2(texelFetch(_MotionVectorTex, pixel, 0).rg); |
|
} |
|
|
|
|
|
half3 LoadHistory(float2 uv) |
|
{ |
|
return half3(textureLod(_HistoryTex, uv, 0).rgb); |
|
} |
|
|
|
half3 LoadHistoryCatmull(float2 uv) |
|
{ |
|
|
|
|
|
|
|
float2 scaledUV = uv * _OutputDims; |
|
float2 baseFloor = floor(scaledUV - 0.5) + 0.5; |
|
|
|
half2 f = half2(scaledUV - baseFloor); |
|
half2 f2 = f * f; |
|
half2 f3 = f2 * f; |
|
|
|
|
|
half2 w0 = f2 - 0.5HF * (f3 + f); |
|
half2 w1 = 1.5HF * f3 - 2.5HF * f2 + 1.0HF; |
|
half2 w3 = 0.5HF * (f3 - f2); |
|
half2 w2 = (1.0HF - w0) - w1 - w3; |
|
|
|
|
|
half2 w12 = w1 + w2; |
|
half wx0 = w0.x, wy0 = w0.y; |
|
half wx1 = w12.x, wy1 = w12.y; |
|
half wx2 = w3.x, wy2 = w3.y; |
|
|
|
|
|
half wUp = wx1 * wy0; |
|
half wDown = wx1 * wy2; |
|
half wLeft = wx0 * wy1; |
|
half wRight = wx2 * wy1; |
|
half wCenter = wx1 * wy1; |
|
|
|
|
|
half dx = w2.x / wx1; |
|
half dy = w2.y / wy1; |
|
|
|
|
|
|
|
|
|
half4 left = half4(LoadHistory((baseFloor + float2(-1.0, dy)) * _InvOutputDims ), 1.HF); |
|
half4 up = half4(LoadHistory((baseFloor + float2(dx, -1.0)) * _InvOutputDims ), 1.HF); |
|
half4 center = half4(LoadHistory((baseFloor + float2(dx, dy)) * _InvOutputDims ), 1.HF); |
|
half4 right = half4(LoadHistory((baseFloor + float2(2.0, dy)) * _InvOutputDims ), 1.HF); |
|
half4 down = half4(LoadHistory((baseFloor + float2(dx, 2.0)) * _InvOutputDims ), 1.HF); |
|
|
|
|
|
|
|
|
|
half4 accum = up * wUp + |
|
left * wLeft + |
|
center* wCenter + |
|
right * wRight + |
|
down * wDown; |
|
half3 cmin3 = min(up.rgb, |
|
min(left.rgb, |
|
min(center.rgb, |
|
min(right.rgb, down.rgb)))); |
|
half3 cmax3 = max(up.rgb, |
|
max(left.rgb, |
|
max(center.rgb, |
|
max(right.rgb, down.rgb)))); |
|
|
|
|
|
|
|
|
|
half3 color = accum.rgb * rcp(accum.w); |
|
|
|
|
|
|
|
return any(lessThan(color, half3(0.HF))) |
|
? clamp(color, cmin3, cmax3) |
|
: color; |
|
} |
|
|
|
|
|
int32_t2 LoadNearestDepthOffset(int32_t2 pixel) |
|
{ |
|
half encNorm = half(texelFetch(_NearestDepthCoordTex, pixel, 0).r); |
|
int32_t code = int32_t(encNorm * 255.0 + 0.5); |
|
|
|
|
|
return DecodeNearestDepthCoord(code); |
|
} |
|
|
|
|
|
half3 LoadWarpedHistory(float2 uv, int32_t2 input_pixel, out half onscreen) |
|
{ |
|
|
|
int32_t2 nearest_offset = LoadNearestDepthOffset(input_pixel); |
|
half2 motion = LoadMotion(input_pixel + nearest_offset); |
|
|
|
|
|
half2 motion_pix = motion * half2(_OutputDims); |
|
motion *= half(dot(motion_pix, motion_pix) > _MotionThresh); |
|
|
|
|
|
float2 reproj_uv = uv - float2(motion); |
|
|
|
|
|
onscreen = half( |
|
all(greaterThanEqual(reproj_uv, float2(0.0))) && |
|
all(lessThan(reproj_uv, float2(1.0))) |
|
); |
|
|
|
#ifdef HISTORY_CATMULL |
|
half3 warped_history = LoadHistoryCatmull(reproj_uv); |
|
#else |
|
half3 warped_history = LoadHistory(reproj_uv); |
|
#endif |
|
|
|
return SafeColour(warped_history * _Exposure); |
|
} |
|
|
|
#if SCALE_MODE == SCALE_2_0X |
|
|
|
|
|
|
|
|
|
|
|
|
|
half4 LoadKPNWeight(float2 uv, int16_t lut_idx) |
|
{ |
|
|
|
half4 k0 = Dequantize(half4(textureLod(_K0Tensor, uv, 0)), _K0QuantParams); |
|
half4 k1 = Dequantize(half4(textureLod(_K1Tensor, uv, 0)), _K1QuantParams); |
|
half4 k2 = Dequantize(half4(textureLod(_K2Tensor, uv, 0)), _K2QuantParams); |
|
half4 k3 = Dequantize(half4(textureLod(_K3Tensor, uv, 0)), _K3QuantParams); |
|
|
|
|
|
half4 p0 = half4(k0.x, k2.x, k0.z, k2.z); |
|
half4 p1 = half4(k1.x, k3.x, k1.z, k3.z); |
|
half4 p2 = half4(k0.y, k2.y, k0.w, k2.w); |
|
half4 p3 = half4(k1.y, k3.y, k1.w, k3.w); |
|
|
|
|
|
return (lut_idx == 0) ? p0 : |
|
(lut_idx == 1) ? p1 : |
|
(lut_idx == 2) ? p2 : |
|
p3; |
|
} |
|
|
|
|
|
half3 LoadAndFilterColour(int32_t2 output_pixel, float2 uv, out half4 col_to_accum) |
|
{ |
|
|
|
|
|
|
|
float2 out_tex = float2(output_pixel) + 0.5f; |
|
|
|
|
|
int16_t2 tiled_idx = (int16_t2(output_pixel) + _LutOffset) % int16_t2(_IndexModulo); |
|
int16_t lut_idx = tiled_idx.y * int16_t(_IndexModulo) + tiled_idx.x; |
|
KernelTile lut = kernelLUT[lut_idx]; |
|
|
|
|
|
|
|
|
|
|
|
half4 kpn_weights = clamp(LoadKPNWeight(uv, lut_idx), half4(EPS), half4(1.HF)); |
|
|
|
|
|
int16_t4 tap_x = clamp(int16_t4(floor((float4(out_tex.x) + float4(lut.dx)) * _InvScale.x)), int16_t4(0), int16_t4(_InputDims.x - 1)); |
|
int16_t4 tap_y = clamp(int16_t4(floor((float4(out_tex.y) + float4(lut.dy)) * _InvScale.y)), int16_t4(0), int16_t4(_InputDims.y - 1)); |
|
|
|
|
|
f16mat4x4 interm; |
|
interm[0] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[0], tap_y[0]), 0).rgb) * half3(_Exposure)), 1.HF); |
|
interm[1] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[1], tap_y[1]), 0).rgb) * half3(_Exposure)), 1.HF); |
|
interm[2] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[2], tap_y[2]), 0).rgb) * half3(_Exposure)), 1.HF); |
|
interm[3] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[3], tap_y[3]), 0).rgb) * half3(_Exposure)), 1.HF); |
|
|
|
|
|
half match = half(lut.dx[CENTER_TAP] == 0 && lut.dy[CENTER_TAP] == 0); |
|
col_to_accum = interm[CENTER_TAP] * match; |
|
|
|
|
|
half4 out_colour = interm * kpn_weights; |
|
|
|
return half3(out_colour.rgb * rcp(out_colour.w)); |
|
} |
|
#else |
|
#error "Unsupported SCALE_MODE" |
|
#endif // SCALE_MODE == SCALE_2_0X |
|
|
|
|
|
void LoadTemporalParameters(float2 uv, out half theta, out half alpha) |
|
{ |
|
half2 tp = Dequantize(half2(textureLod(_TemporalTensor, uv, 0).xy), _TemporalQuantParams); |
|
theta = tp.x * _NotHistoryReset; |
|
alpha = tp.y * 0.35HF + 0.05HF; |
|
} |
|
|
|
|
|
void WriteUpsampledColour(int32_t2 pixel, half3 colour) |
|
{ |
|
half3 to_write = SafeColour(colour); |
|
|
|
imageStore(_UpsampledColourOut, pixel, half4(to_write, 1.0)); |
|
} |
|
|
|
|
|
|
|
layout(local_size_x = 16, local_size_y = 16) in; |
|
void main() |
|
{ |
|
int32_t2 output_pixel = int32_t2(gl_GlobalInvocationID.xy); |
|
if (any(greaterThanEqual(output_pixel, _OutputDims))) return; |
|
|
|
float2 uv = (float2(output_pixel) + 0.5) * _InvOutputDims; |
|
int32_t2 input_pixel = int32_t2(uv * _InputDims); |
|
|
|
|
|
|
|
|
|
half onscreen; |
|
half3 history = LoadWarpedHistory(uv, input_pixel, onscreen); |
|
|
|
|
|
|
|
|
|
half4 col_to_accum; |
|
half3 colour = LoadAndFilterColour(output_pixel, uv, col_to_accum); |
|
|
|
|
|
|
|
|
|
half theta, alpha; |
|
LoadTemporalParameters(uv, theta, alpha); |
|
|
|
|
|
|
|
|
|
half3 rectified = lerp(colour, history, theta * onscreen); |
|
|
|
|
|
|
|
|
|
half3 accumulated = lerp(Tonemap(rectified), Tonemap(col_to_accum.rgb), alpha * col_to_accum.a); |
|
|
|
|
|
|
|
|
|
half3 out_linear = InverseTonemap(accumulated) * _InvExposure; |
|
WriteUpsampledColour(output_pixel, out_linear); |
|
} |
|
|