New upstream version 24.0.1+dfsg1

This commit is contained in:
Sebastian Ramacher 2019-09-22 23:19:10 +02:00
parent b14f9eae6d
commit 5a730d6ec3
842 changed files with 42245 additions and 33385 deletions

View file

@ -1,13 +1,29 @@
uniform float4x4 ViewProj;
uniform float2 base_dimension;
uniform float2 base_dimension_i;
uniform texture2d image;
struct VertInOut {
sampler_state textureSampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
};
struct VertData {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
VertInOut VSDefault(VertInOut vert_in)
struct VertInOut {
float2 uv : TEXCOORD0;
float4 pos : POSITION;
};
struct FragData {
float2 uv : TEXCOORD0;
};
VertInOut VSDefault(VertData vert_in)
{
VertInOut vert_out;
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
@ -15,43 +31,97 @@ VertInOut VSDefault(VertInOut vert_in)
return vert_out;
}
float4 PSDrawAreaRGBA(VertInOut vert_in) : TARGET
float4 DrawArea(float2 uv)
{
float4 totalcolor = float4(0.0, 0.0, 0.0, 0.0);
float2 uv = vert_in.uv;
float2 uvdelta = float2(ddx(uv.x), ddy(uv.y));
float2 uv_delta = float2(ddx(uv.x), ddy(uv.y));
// Handle potential OpenGL flip.
uvdelta.y = abs(uvdelta.y);
if (obs_glsl_compile)
uv_delta.y = abs(uv_delta.y);
float2 uvhalfdelta = 0.5 * uvdelta;
float2 uvmin = uv - uvhalfdelta;
float2 uvmax = uv + uvhalfdelta;
float2 uv_min = uv - 0.5 * uv_delta;
float2 uv_max = uv_min + uv_delta;
int2 loadindexmin = int2(uvmin / base_dimension_i);
int2 loadindexmax = int2(uvmax / base_dimension_i);
float2 load_index_begin = floor(uv_min * base_dimension);
float2 load_index_end = ceil(uv_max * base_dimension);
float2 targetpos = uv / uvdelta;
float2 targetposmin = targetpos - 0.5;
float2 targetposmax = targetpos + 0.5;
float2 scale = base_dimension_i / uvdelta;
for (int loadindexy = loadindexmin.y; loadindexy <= loadindexmax.y; ++loadindexy)
{
for (int loadindexx = loadindexmin.x; loadindexx <= loadindexmax.x; ++loadindexx)
{
int2 loadindex = int2(loadindexx, loadindexy);
float2 potentialtargetmin = float2(loadindex) * scale;
float2 potentialtargetmax = potentialtargetmin + scale;
float2 targetmin = max(potentialtargetmin, targetposmin);
float2 targetmax = min(potentialtargetmax, targetposmax);
float area = (targetmax.x - targetmin.x) * (targetmax.y - targetmin.y);
float4 sample = image.Load(int3(loadindex, 0));
totalcolor += area * sample;
}
}
float2 target_dimension = 1.0 / uv_delta;
float2 target_pos = uv * target_dimension;
float2 target_pos_min = target_pos - 0.5;
float2 target_pos_max = target_pos + 0.5;
float2 scale = base_dimension_i * target_dimension;
return totalcolor;
float4 total_color = float4(0.0, 0.0, 0.0, 0.0);
float load_index_y = load_index_begin.y;
do {
float source_y_min = load_index_y * scale.y;
float source_y_max = source_y_min + scale.y;
float y_min = max(source_y_min, target_pos_min.y);
float y_max = min(source_y_max, target_pos_max.y);
float height = y_max - y_min;
float load_index_x = load_index_begin.x;
do {
float source_x_min = load_index_x * scale.x;
float source_x_max = source_x_min + scale.x;
float x_min = max(source_x_min, target_pos_min.x);
float x_max = min(source_x_max, target_pos_max.x);
float width = x_max - x_min;
float area = width * height;
float4 color = image.Load(int3(load_index_x, load_index_y, 0));
total_color += area * color;
++load_index_x;
} while (load_index_x < load_index_end.x);
++load_index_y;
} while (load_index_y < load_index_end.y);
return total_color;
}
float4 PSDrawAreaRGBA(FragData frag_in) : TARGET
{
return DrawArea(frag_in.uv);
}
float4 PSDrawAreaRGBADivide(FragData frag_in) : TARGET
{
float4 rgba = DrawArea(frag_in.uv);
float alpha = rgba.a;
float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0;
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawAreaRGBAUpscale(FragData frag_in) : TARGET
{
float2 uv = frag_in.uv;
float2 uv_delta = float2(ddx(uv.x), ddy(uv.y));
// Handle potential OpenGL flip.
if (obs_glsl_compile)
uv_delta.y = abs(uv_delta.y);
float2 uv_min = uv - 0.5 * uv_delta;
float2 uv_max = uv_min + uv_delta;
float2 load_index_first = floor(uv_min * base_dimension);
float2 load_index_last = ceil(uv_max * base_dimension) - 1.0;
if (load_index_first.x < load_index_last.x) {
float uv_boundary_x = load_index_last.x * base_dimension_i.x;
uv.x = ((uv.x - uv_boundary_x) / uv_delta.x) * base_dimension_i.x + uv_boundary_x;
} else
uv.x = (load_index_first.x + 0.5) * base_dimension_i.x;
if (load_index_first.y < load_index_last.y) {
float uv_boundary_y = load_index_last.y * base_dimension_i.y;
uv.y = ((uv.y - uv_boundary_y) / uv_delta.y) * base_dimension_i.y + uv_boundary_y;
} else
uv.y = (load_index_first.y + 0.5) * base_dimension_i.y;
return image.Sample(textureSampler, uv);
}
technique Draw
@ -59,6 +129,24 @@ technique Draw
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawAreaRGBA(vert_in);
pixel_shader = PSDrawAreaRGBA(frag_in);
}
}
technique DrawAlphaDivide
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawAreaRGBADivide(frag_in);
}
}
technique DrawUpscale
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawAreaRGBAUpscale(frag_in);
}
}

View file

@ -6,7 +6,7 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float2 base_dimension;
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
@ -21,45 +21,31 @@ struct VertData {
float2 uv : TEXCOORD0;
};
VertData VSDefault(VertData v_in)
struct VertOut {
float2 uv : TEXCOORD0;
float4 pos : POSITION;
};
struct FragData {
float2 uv : TEXCOORD0;
};
VertOut VSDefault(VertData v_in)
{
VertData vert_out;
VertOut vert_out;
vert_out.uv = v_in.uv * base_dimension;
vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj);
vert_out.uv = v_in.uv;
return vert_out;
}
float weight(float x)
{
float ax = abs(x);
/* Sharper version. May look better in some cases. */
const float B = 0.0;
const float C = 0.75;
if (ax < 1.0)
return (pow(x, 2.0) *
((12.0 - 9.0 * B - 6.0 * C) * ax +
(-18.0 + 12.0 * B + 6.0 * C)) +
(6.0 - 2.0 * B))
/ 6.0;
else if ((ax >= 1.0) && (ax < 2.0))
return (pow(x, 2.0) *
((-B - 6.0 * C) * ax + (6.0 * B + 30.0 * C)) +
(-12.0 * B - 48.0 * C) * ax +
(8.0 * B + 24.0 * C))
/ 6.0;
else
return 0.0;
}
float4 weight4(float x)
{
/* Sharper version. May look better in some cases. B=0, C=0.75 */
return float4(
weight(x - 2.0),
weight(x - 1.0),
weight(x),
weight(x + 1.0));
((-0.75 * x + 1.5) * x - 0.75) * x,
(1.25 * x - 2.25) * x * x + 1.0,
((-1.25 * x + 1.5) * x + 0.75) * x,
(0.75 * x - 0.75) * x * x);
}
float AspectUndistortX(float x, float a)
@ -74,83 +60,94 @@ float AspectUndistortU(float u)
return AspectUndistortX((u - 0.5) * 2.0, undistort_factor) * 0.5 + 0.5;
}
float2 pixel_coord(float xpos, float ypos)
float2 undistort_coord(float xpos, float ypos)
{
return float2(AspectUndistortU(xpos), ypos);
}
float4 pixel(float xpos, float ypos, bool undistort)
float4 undistort_pixel(float xpos, float ypos)
{
if (undistort)
return image.Sample(textureSampler, pixel_coord(xpos, ypos));
else
return image.Sample(textureSampler, float2(xpos, ypos));
return image.Sample(textureSampler, undistort_coord(xpos, ypos));
}
float4 get_line(float ypos, float4 xpos, float4 linetaps, bool undistort)
float4 undistort_line(float4 xpos, float ypos, float4 rowtaps)
{
return
pixel(xpos.r, ypos, undistort) * linetaps.r +
pixel(xpos.g, ypos, undistort) * linetaps.g +
pixel(xpos.b, ypos, undistort) * linetaps.b +
pixel(xpos.a, ypos, undistort) * linetaps.a;
return undistort_pixel(xpos.x, ypos) * rowtaps.x +
undistort_pixel(xpos.y, ypos) * rowtaps.y +
undistort_pixel(xpos.z, ypos) * rowtaps.z +
undistort_pixel(xpos.w, ypos) * rowtaps.w;
}
float4 DrawBicubic(VertData v_in, bool undistort)
float4 DrawBicubic(FragData f_in, bool undistort)
{
float2 stepxy = base_dimension_i;
float2 pos = v_in.uv + stepxy * 0.5;
float2 f = frac(pos / stepxy);
float2 pos = f_in.uv;
float2 pos1 = floor(pos - 0.5) + 0.5;
float2 f = pos - pos1;
float4 rowtaps = weight4(1.0 - f.x);
float4 coltaps = weight4(1.0 - f.y);
float4 rowtaps = weight4(f.x);
float4 coltaps = weight4(f.y);
/* make sure all taps added together is exactly 1.0, otherwise some
* (very small) distortion can occur */
rowtaps /= rowtaps.r + rowtaps.g + rowtaps.b + rowtaps.a;
coltaps /= coltaps.r + coltaps.g + coltaps.b + coltaps.a;
float2 uv1 = pos1 * base_dimension_i;
float2 uv0 = uv1 - base_dimension_i;
float2 uv2 = uv1 + base_dimension_i;
float2 uv3 = uv2 + base_dimension_i;
float2 xystart = (-1.5 - f) * stepxy + pos;
float4 xpos = float4(
xystart.x,
xystart.x + stepxy.x,
xystart.x + stepxy.x * 2.0,
xystart.x + stepxy.x * 3.0
);
if (undistort) {
float4 xpos = float4(uv0.x, uv1.x, uv2.x, uv3.x);
return undistort_line(xpos, uv0.y, rowtaps) * coltaps.x +
undistort_line(xpos, uv1.y, rowtaps) * coltaps.y +
undistort_line(xpos, uv2.y, rowtaps) * coltaps.z +
undistort_line(xpos, uv3.y, rowtaps) * coltaps.w;
}
return
get_line(xystart.y , xpos, rowtaps, undistort) * coltaps.r +
get_line(xystart.y + stepxy.y , xpos, rowtaps, undistort) * coltaps.g +
get_line(xystart.y + stepxy.y * 2.0, xpos, rowtaps, undistort) * coltaps.b +
get_line(xystart.y + stepxy.y * 3.0, xpos, rowtaps, undistort) * coltaps.a;
float u_weight_sum = rowtaps.y + rowtaps.z;
float u_middle_offset = rowtaps.z * base_dimension_i.x / u_weight_sum;
float u_middle = uv1.x + u_middle_offset;
float v_weight_sum = coltaps.y + coltaps.z;
float v_middle_offset = coltaps.z * base_dimension_i.y / v_weight_sum;
float v_middle = uv1.y + v_middle_offset;
int2 coord_top_left = int2(max(uv0 * base_dimension, 0.5));
int2 coord_bottom_right = int2(min(uv3 * base_dimension, base_dimension - 0.5));
float4 top = image.Load(int3(coord_top_left, 0)) * rowtaps.x;
top += image.Sample(textureSampler, float2(u_middle, uv0.y)) * u_weight_sum;
top += image.Load(int3(coord_bottom_right.x, coord_top_left.y, 0)) * rowtaps.w;
float4 total = top * coltaps.x;
float4 middle = image.Sample(textureSampler, float2(uv0.x, v_middle)) * rowtaps.x;
middle += image.Sample(textureSampler, float2(u_middle, v_middle)) * u_weight_sum;
middle += image.Sample(textureSampler, float2(uv3.x, v_middle)) * rowtaps.w;
total += middle * v_weight_sum;
float4 bottom = image.Load(int3(coord_top_left.x, coord_bottom_right.y, 0)) * rowtaps.x;
bottom += image.Sample(textureSampler, float2(u_middle, uv3.y)) * u_weight_sum;
bottom += image.Load(int3(coord_bottom_right, 0)) * rowtaps.w;
total += bottom * coltaps.w;
return total;
}
float4 PSDrawBicubicRGBA(VertData v_in, bool undistort) : TARGET
float4 PSDrawBicubicRGBA(FragData f_in, bool undistort) : TARGET
{
return DrawBicubic(v_in, undistort);
return DrawBicubic(f_in, undistort);
}
float4 PSDrawBicubicRGBADivide(VertData v_in) : TARGET
float4 PSDrawBicubicRGBADivide(FragData f_in) : TARGET
{
float4 rgba = DrawBicubic(v_in, false);
float4 rgba = DrawBicubic(f_in, false);
float alpha = rgba.a;
float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0;
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawBicubicMatrix(VertData v_in) : TARGET
{
float3 rgb = DrawBicubic(v_in, false).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicRGBA(v_in, false);
pixel_shader = PSDrawBicubicRGBA(f_in, false);
}
}
@ -159,7 +156,7 @@ technique DrawAlphaDivide
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicRGBADivide(v_in);
pixel_shader = PSDrawBicubicRGBADivide(f_in);
}
}
@ -168,15 +165,6 @@ technique DrawUndistort
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicRGBA(v_in, true);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicMatrix(v_in);
pixel_shader = PSDrawBicubicRGBA(f_in, true);
}
}

View file

@ -1,12 +1,10 @@
/*
* bilinear low res scaling, samples 9 pixels of a larger image to scale to a
* bilinear low res scaling, samples 8 pixels of a larger image to scale to a
* low resolution image below half size
*/
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float2 base_dimension_i;
sampler_state textureSampler {
Filter = Linear;
@ -34,19 +32,24 @@ float4 pixel(float2 uv)
float4 DrawLowresBilinear(VertData v_in)
{
float2 stepxy = base_dimension_i;
float4 out_color;
float2 uv = v_in.uv;
float2 stepxy = float2(ddx(uv.x), ddy(uv.y));
float2 stepxy1 = stepxy * 0.0625;
float2 stepxy3 = stepxy * 0.1875;
float2 stepxy5 = stepxy * 0.3125;
float2 stepxy7 = stepxy * 0.4375;
out_color = pixel(v_in.uv);
out_color += pixel(v_in.uv + float2(-stepxy.x, -stepxy.y));
out_color += pixel(v_in.uv + float2(-stepxy.x, 0.0));
out_color += pixel(v_in.uv + float2(-stepxy.x, stepxy.y));
out_color += pixel(v_in.uv + float2( 0.0, -stepxy.y));
out_color += pixel(v_in.uv + float2( 0.0, stepxy.y));
out_color += pixel(v_in.uv + float2( stepxy.x, -stepxy.y));
out_color += pixel(v_in.uv + float2( stepxy.x, 0.0));
out_color += pixel(v_in.uv + float2( stepxy.x, stepxy.y));
return out_color / float4(9.0, 9.0, 9.0, 9.0);
// Simulate Direct3D 8-sample pattern
float4 out_color;
out_color = pixel(uv + float2( stepxy1.x, -stepxy3.y));
out_color += pixel(uv + float2(-stepxy1.x, stepxy3.y));
out_color += pixel(uv + float2( stepxy5.x, stepxy1.y));
out_color += pixel(uv + float2(-stepxy3.x, -stepxy5.y));
out_color += pixel(uv + float2(-stepxy5.x, stepxy5.y));
out_color += pixel(uv + float2(-stepxy7.x, -stepxy1.y));
out_color += pixel(uv + float2( stepxy3.x, stepxy7.y));
out_color += pixel(uv + float2( stepxy7.x, -stepxy7.y));
return out_color * 0.125;
}
float4 PSDrawLowresBilinearRGBA(VertData v_in) : TARGET
@ -62,13 +65,6 @@ float4 PSDrawLowresBilinearRGBADivide(VertData v_in) : TARGET
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawLowresBilinearMatrix(VertData v_in) : TARGET
{
float3 rgb = DrawLowresBilinear(v_in).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
@ -87,12 +83,3 @@ technique DrawAlphaDivide
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLowresBilinearMatrix(v_in);
}
}

View file

@ -1,5 +1,4 @@
uniform float4x4 ViewProj;
uniform float4x4 color_matrix;
uniform texture2d image;
sampler_state def_sampler {
@ -34,13 +33,6 @@ float4 PSDrawAlphaDivide(VertInOut vert_in) : TARGET
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawMatrix(VertInOut vert_in) : TARGET
{
float3 rgb = image.Sample(def_sampler, vert_in.uv).rgb;
float3 yuv = mul(float4(rgb, 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
@ -58,12 +50,3 @@ technique DrawAlphaDivide
pixel_shader = PSDrawAlphaDivide(vert_in);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawMatrix(vert_in);
}
}

View file

@ -15,38 +15,23 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
//#define DEBUGGING
uniform float4x4 ViewProj;
uniform float u_plane_offset;
uniform float v_plane_offset;
uniform float width;
uniform float height;
uniform float width_i;
uniform float height_i;
uniform float width_d2;
uniform float height_d2;
uniform float width_d2_i;
uniform float height_d2_i;
uniform float input_width;
uniform float input_height;
uniform float input_width_i;
uniform float input_height_i;
uniform float input_width_i_d2;
uniform float input_height_i_d2;
uniform float width_x2_i;
uniform int int_width;
uniform int int_input_width;
uniform int int_u_plane_offset;
uniform int int_v_plane_offset;
uniform float4x4 color_matrix;
uniform float4 color_vec0;
uniform float4 color_vec1;
uniform float4 color_vec2;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform texture2d image;
uniform texture2d image1;
uniform texture2d image2;
uniform texture2d image3;
sampler_state def_sampler {
Filter = Linear;
@ -54,354 +39,385 @@ sampler_state def_sampler {
AddressV = Clamp;
};
struct VertInOut {
struct FragPos {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
VertInOut VSDefault(VertInOut vert_in)
struct VertTexPos {
float2 uv : TEXCOORD0;
float4 pos : POSITION;
};
struct VertPosWide {
float3 pos_wide : TEXCOORD0;
float4 pos : POSITION;
};
struct VertTexPosWide {
float3 uuv : TEXCOORD0;
float4 pos : POSITION;
};
struct FragTex {
float2 uv : TEXCOORD0;
};
struct FragPosWide {
float3 pos_wide : TEXCOORD0;
};
struct FragTexWide {
float3 uuv : TEXCOORD0;
};
FragPos VSPos(uint id : VERTEXID)
{
VertInOut vert_out;
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
vert_out.uv = vert_in.uv;
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
FragPos vert_out;
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
/* used to prevent internal GPU precision issues width fmod in particular */
#define PRECISION_OFFSET 0.2
float4 PSNV12(VertInOut vert_in) : TARGET
VertTexPosWide VSTexPos_Left(uint id : VERTEXID)
{
float v_mul = floor(vert_in.uv.y * input_height);
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float2 sample_pos[4];
float u_right = idHigh * 2.0;
float u_left = u_right - width_i;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
return transpose(out_val)[1];
} else {
#ifdef DEBUGGING
return float4(0.5, 0.2, 0.5, 0.2);
#endif
float new_offset = byte_offset - u_plane_offset;
float ch_u = floor(fmod(new_offset, width)) * width_i;
float ch_v = floor(new_offset * width_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u + width_i2, ch_v);
return float4(
image.Sample(def_sampler, sample_pos[0]).rb,
image.Sample(def_sampler, sample_pos[1]).rb
);
}
VertTexPosWide vert_out;
vert_out.uuv = float3(u_left, u_right, v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
float PSNV12_Y(VertInOut vert_in) : TARGET
VertTexPos VSTexPosHalf_Reverse(uint id : VERTEXID)
{
return image.Sample(def_sampler, vert_in.uv.xy).y;
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPos vert_out;
vert_out.uv = float2(width_d2 * u, height * v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
float2 PSNV12_UV(VertInOut vert_in) : TARGET
VertTexPos VSTexPosHalfHalf_Reverse(uint id : VERTEXID)
{
return image.Sample(def_sampler, vert_in.uv.xy).xz;
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPos vert_out;
vert_out.uv = float2(width_d2 * u, height_d2 * v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
float4 PSPlanar420(VertInOut vert_in) : TARGET
VertPosWide VSPosWide_Reverse(uint id : VERTEXID)
{
float v_mul = floor(vert_in.uv.y * input_height);
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float2 sample_pos[4];
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
} else {
#ifdef DEBUGGING
return ((byte_offset < v_plane_offset) ?
float4(0.5, 0.5, 0.5, 0.5) :
float4(0.2, 0.2, 0.2, 0.2));
#endif
float new_offset = byte_offset -
((byte_offset < v_plane_offset) ?
u_plane_offset : v_plane_offset);
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
/* set up coordinates for next chroma line, in case
* (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
* between the current and the next chroma line; do note that the next
* chroma line is two source lines below the current source line */
float ch_u_n = 0. + width_i;
float ch_v_n = ch_v + height_i * 3;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u += width_i2, ch_v);
ch_u += width_i2;
// check if ch_u overflowed the current source and chroma line
if (ch_u > 1.0) {
sample_pos[2] = float2(ch_u_n, ch_v_n);
sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
} else {
sample_pos[2] = float2(ch_u, ch_v);
sample_pos[3] = float2(ch_u + width_i2, ch_v);
}
}
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
if (byte_offset < u_plane_offset)
return out_val[1];
else if (byte_offset < v_plane_offset)
return out_val[0];
else
return out_val[2];
VertPosWide vert_out;
vert_out.pos_wide = float3(float2(width, width_d2) * u, height * v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
float4 PSPlanar444(VertInOut vert_in) : TARGET
float PS_Y(FragPos frag_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float new_byte_offset = byte_offset;
if (byte_offset >= v_plane_offset)
new_byte_offset -= v_plane_offset;
else if (byte_offset >= u_plane_offset)
new_byte_offset -= u_plane_offset;
float2 sample_pos[4];
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
float v_val = floor(new_byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
u_val += width_i * 0.5;
v_val += height_i * 0.5;
sample_pos[0] = float2(u_val, v_val);
sample_pos[1] = float2(u_val += width_i, v_val);
sample_pos[2] = float2(u_val += width_i, v_val);
sample_pos[3] = float2(u_val + width_i, v_val);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
if (byte_offset < u_plane_offset)
return out_val[1];
else if (byte_offset < v_plane_offset)
return out_val[0];
else
return out_val[2];
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float y = dot(color_vec0.xyz, rgb) + color_vec0.w;
return y;
}
float GetIntOffsetColor(int offset)
float2 PS_UV_Wide(FragTexWide frag_in) : TARGET
{
return image.Load(int3(offset % int_input_width,
offset / int_input_width,
0)).r;
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float u = dot(color_vec1.xyz, rgb) + color_vec1.w;
float v = dot(color_vec2.xyz, rgb) + color_vec2.w;
return float2(u, v);
}
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
int y0_pos, int y1_pos) : TARGET
float PS_U(FragPos frag_in) : TARGET
{
float y = vert_in.uv.y;
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
width_d2_i;
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float u = dot(color_vec1.xyz, rgb) + color_vec1.w;
return u;
}
x += input_width_i_d2;
float PS_V(FragPos frag_in) : TARGET
{
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float v = dot(color_vec2.xyz, rgb) + color_vec2.w;
return v;
}
float4 texel = image.Sample(def_sampler, float2(x, y));
float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
texel[u_pos], texel[v_pos]);
float PS_U_Wide(FragTexWide frag_in) : TARGET
{
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float u = dot(color_vec1.xyz, rgb) + color_vec1.w;
return u;
}
float PS_V_Wide(FragTexWide frag_in) : TARGET
{
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float v = dot(color_vec2.xyz, rgb) + color_vec2.w;
return v;
}
float3 YUV_to_RGB(float3 yuv)
{
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float r = dot(color_vec0.xyz, yuv) + color_vec0.w;
float g = dot(color_vec1.xyz, yuv) + color_vec1.w;
float b = dot(color_vec2.xyz, yuv) + color_vec2.w;
return float3(r, g, b);
}
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
float3 PSUYVY_Reverse(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float4 y2uv = image.Load(int3(frag_in.uv.xy, 0));
float2 y01 = y2uv.yw;
float2 cbcr = y2uv.zx;
float leftover = frac(frag_in.uv.x);
float y = (leftover < 0.5) ? y01.x : y01.y;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
float3 PSYUY2_Reverse(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = y * int_width + x;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float4 y2uv = image.Load(int3(frag_in.uv.xy, 0));
float2 y01 = y2uv.zx;
float2 cbcr = y2uv.yw;
float leftover = frac(frag_in.uv.x);
float y = (leftover < 0.5) ? y01.x : y01.y;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
float3 PSYVYU_Reverse(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma = int_u_plane_offset + chroma_offset * 2;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma),
GetIntOffsetColor(chroma + 1)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float4 y2uv = image.Load(int3(frag_in.uv.xy, 0));
float2 y01 = y2uv.zx;
float2 cbcr = y2uv.wy;
float leftover = frac(frag_in.uv.x);
float y = (leftover < 0.5) ? y01.x : y01.y;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSY800_Limited(VertInOut vert_in) : TARGET
float3 PSPlanar420_Reverse(VertTexPos frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
float limited = image.Load(int3(x, y, 0)).x;
float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
return float4(full, full, full, 1.0);
float y = image.Load(int3(frag_in.pos.xy, 0)).x;
int3 xy0_chroma = int3(frag_in.uv, 0);
float cb = image1.Load(xy0_chroma).x;
float cr = image2.Load(xy0_chroma).x;
float3 yuv = float3(y, cb, cr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSY800_Full(VertInOut vert_in) : TARGET
float4 PSPlanar420A_Reverse(VertTexPos frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
float3 full = image.Load(int3(x, y, 0)).xxx;
return float4(full, 1.0);
}
float4 PSRGB_Limited(VertInOut vert_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
float4 rgba = image.Load(int3(x, y, 0));
rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
int3 xy0_luma = int3(frag_in.pos.xy, 0);
float y = image.Load(xy0_luma).x;
int3 xy0_chroma = int3(frag_in.uv, 0);
float cb = image1.Load(xy0_chroma).x;
float cr = image2.Load(xy0_chroma).x;
float alpha = image3.Load(xy0_luma).x;
float3 yuv = float3(y, cb, cr);
float4 rgba = float4(YUV_to_RGB(yuv), alpha);
return rgba;
}
technique Planar420
float3 PSPlanar422_Reverse(FragPosWide frag_in) : TARGET
{
float y = image.Load(int3(frag_in.pos_wide.xz, 0)).x;
int3 xy0_chroma = int3(frag_in.pos_wide.yz, 0);
float cb = image1.Load(xy0_chroma).x;
float cr = image2.Load(xy0_chroma).x;
float3 yuv = float3(y, cb, cr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSPlanar422A_Reverse(FragPosWide frag_in) : TARGET
{
int3 xy0_luma = int3(frag_in.pos_wide.xz, 0);
float y = image.Load(xy0_luma).x;
int3 xy0_chroma = int3(frag_in.pos_wide.yz, 0);
float cb = image1.Load(xy0_chroma).x;
float cr = image2.Load(xy0_chroma).x;
float alpha = image3.Load(xy0_luma).x;
float3 yuv = float3(y, cb, cr);
float4 rgba = float4(YUV_to_RGB(yuv), alpha);
return rgba;
}
float3 PSPlanar444_Reverse(FragPos frag_in) : TARGET
{
int3 xy0 = int3(frag_in.pos.xy, 0);
float y = image.Load(xy0).x;
float cb = image1.Load(xy0).x;
float cr = image2.Load(xy0).x;
float3 yuv = float3(y, cb, cr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSPlanar444A_Reverse(FragPos frag_in) : TARGET
{
int3 xy0 = int3(frag_in.pos.xy, 0);
float y = image.Load(xy0).x;
float cb = image1.Load(xy0).x;
float cr = image2.Load(xy0).x;
float alpha = image3.Load(xy0).x;
float3 yuv = float3(y, cb, cr);
float4 rgba = float4(YUV_to_RGB(yuv), alpha);
return rgba;
}
float4 PSAYUV_Reverse(FragPos frag_in) : TARGET
{
float4 yuva = image.Load(int3(frag_in.pos.xy, 0));
float4 rgba = float4(YUV_to_RGB(yuva.xyz), yuva.a);
return rgba;
}
float3 PSNV12_Reverse(VertTexPos frag_in) : TARGET
{
float y = image.Load(int3(frag_in.pos.xy, 0)).x;
float2 cbcr = image1.Load(int3(frag_in.uv, 0)).xy;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float3 PSY800_Limited(FragPos frag_in) : TARGET
{
float limited = image.Load(int3(frag_in.pos.xy, 0)).x;
float full = (255.0 / 219.0) * limited - (16.0 / 219.0);
return float3(full, full, full);
}
float3 PSY800_Full(FragPos frag_in) : TARGET
{
float3 full = image.Load(int3(frag_in.pos.xy, 0)).xxx;
return full;
}
float4 PSRGB_Limited(FragPos frag_in) : TARGET
{
float4 rgba = image.Load(int3(frag_in.pos.xy, 0));
rgba.rgb = (255.0 / 219.0) * rgba.rgb - (16.0 / 219.0);
return rgba;
}
float3 PSBGR3_Limited(FragPos frag_in) : TARGET
{
float x = frag_in.pos.x * 3.0;
float y = frag_in.pos.y;
float b = image.Load(int3(x - 1.0, y, 0)).x;
float g = image.Load(int3(x, y, 0)).x;
float r = image.Load(int3(x + 1.0, y, 0)).x;
float3 rgb = float3(r, g, b);
rgb = (255.0 / 219.0) * rgb - (16.0 / 219.0);
return rgb;
}
float3 PSBGR3_Full(FragPos frag_in) : TARGET
{
float x = frag_in.pos.x * 3.0;
float y = frag_in.pos.y;
float b = image.Load(int3(x - 1.0, y, 0)).x;
float g = image.Load(int3(x, y, 0)).x;
float r = image.Load(int3(x + 1.0, y, 0)).x;
float3 rgb = float3(r, g, b);
return rgb;
}
technique Planar_Y
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar420(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PS_Y(frag_in);
}
}
technique Planar444
technique Planar_U
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar444(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PS_U(frag_in);
}
}
technique NV12
technique Planar_V
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PS_V(frag_in);
}
}
technique Planar_U_Left
{
pass
{
vertex_shader = VSTexPos_Left(id);
pixel_shader = PS_U_Wide(frag_in);
}
}
technique Planar_V_Left
{
pass
{
vertex_shader = VSTexPos_Left(id);
pixel_shader = PS_V_Wide(frag_in);
}
}
@ -409,8 +425,8 @@ technique NV12_Y
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12_Y(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PS_Y(frag_in);
}
}
@ -418,8 +434,8 @@ technique NV12_UV
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12_UV(vert_in);
vertex_shader = VSTexPos_Left(id);
pixel_shader = PS_UV_Wide(frag_in);
}
}
@ -427,8 +443,8 @@ technique UYVY_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
vertex_shader = VSTexPosHalf_Reverse(id);
pixel_shader = PSUYVY_Reverse(frag_in);
}
}
@ -436,8 +452,8 @@ technique YUY2_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
vertex_shader = VSTexPosHalf_Reverse(id);
pixel_shader = PSYUY2_Reverse(frag_in);
}
}
@ -445,8 +461,8 @@ technique YVYU_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
vertex_shader = VSTexPosHalf_Reverse(id);
pixel_shader = PSYVYU_Reverse(frag_in);
}
}
@ -454,8 +470,35 @@ technique I420_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar420_Reverse(vert_in);
vertex_shader = VSTexPosHalfHalf_Reverse(id);
pixel_shader = PSPlanar420_Reverse(frag_in);
}
}
technique I40A_Reverse
{
pass
{
vertex_shader = VSTexPosHalfHalf_Reverse(id);
pixel_shader = PSPlanar420A_Reverse(frag_in);
}
}
technique I422_Reverse
{
pass
{
vertex_shader = VSPosWide_Reverse(id);
pixel_shader = PSPlanar422_Reverse(frag_in);
}
}
technique I42A_Reverse
{
pass
{
vertex_shader = VSPosWide_Reverse(id);
pixel_shader = PSPlanar422A_Reverse(frag_in);
}
}
@ -463,8 +506,26 @@ technique I444_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar444_Reverse(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PSPlanar444_Reverse(frag_in);
}
}
technique YUVA_Reverse
{
pass
{
vertex_shader = VSPos(id);
pixel_shader = PSPlanar444A_Reverse(frag_in);
}
}
technique AYUV_Reverse
{
pass
{
vertex_shader = VSPos(id);
pixel_shader = PSAYUV_Reverse(frag_in);
}
}
@ -472,8 +533,8 @@ technique NV12_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12_Reverse(vert_in);
vertex_shader = VSTexPosHalfHalf_Reverse(id);
pixel_shader = PSNV12_Reverse(frag_in);
}
}
@ -481,8 +542,8 @@ technique Y800_Limited
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSY800_Limited(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PSY800_Limited(frag_in);
}
}
@ -490,8 +551,8 @@ technique Y800_Full
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSY800_Full(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PSY800_Full(frag_in);
}
}
@ -499,7 +560,25 @@ technique RGB_Limited
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSRGB_Limited(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PSRGB_Limited(frag_in);
}
}
technique BGR3_Limited
{
pass
{
vertex_shader = VSPos(id);
pixel_shader = PSBGR3_Limited(frag_in);
}
}
technique BGR3_Full
{
pass
{
vertex_shader = VSPos(id);
pixel_shader = PSBGR3_Full(frag_in);
}
}

View file

@ -6,7 +6,7 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float2 base_dimension;
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
@ -22,45 +22,46 @@ struct VertData {
float2 uv : TEXCOORD0;
};
struct FragData {
struct VertOut {
float2 uv : TEXCOORD0;
float4 pos : POSITION;
float2 uv : TEXCOORD0;
float2 scale : TEXCOORD1;
};
FragData VSDefault(VertData v_in)
struct FragData {
float2 uv : TEXCOORD0;
};
VertOut VSDefault(VertData v_in)
{
FragData vert_out;
VertOut vert_out;
vert_out.uv = v_in.uv * base_dimension;
vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj);
vert_out.uv = v_in.uv;
vert_out.scale = min(0.25 + abs(0.75 / mul(float4(1.0 / base_dimension_i.xy, 1.0, 1.0), ViewProj).xy), 1.0);
return vert_out;
}
float sinc(float x)
float weight(float x)
{
const float PIval = 3.1415926535897932384626433832795;
return sin(x * PIval) / (x * PIval);
float x_pi = x * 3.141592654;
return 3.0 * sin(x_pi) * sin(x_pi * (1.0 / 3.0)) / (x_pi * x_pi);
}
float weight(float x, float radius)
void weight6(float f_neg, out float3 tap012, out float3 tap345)
{
float ax = abs(x);
if (x == 0.0)
return 1.0;
else if (ax < radius)
return sinc(x) * sinc(x / radius);
else
return 0.0;
}
tap012 = float3(
weight(f_neg - 2.0),
weight(f_neg - 1.0),
min(1.0, weight(f_neg))); // Replace NaN with 1.0.
tap345 = float3(
weight(f_neg + 1.0),
weight(f_neg + 2.0),
weight(f_neg + 3.0));
float3 weight3(float x, float scale)
{
return float3(
weight((x * 2.0 + 0.0 * 2.0 - 3.0) * scale, 3.0),
weight((x * 2.0 + 1.0 * 2.0 - 3.0) * scale, 3.0),
weight((x * 2.0 + 2.0 * 2.0 - 3.0) * scale, 3.0));
// Normalize weights
float sum = tap012.x + tap012.y + tap012.z + tap345.x + tap345.y + tap345.z;
float sum_i = 1.0 / sum;
tap012 = tap012 * sum_i;
tap345 = tap345 * sum_i;
}
float AspectUndistortX(float x, float a)
@ -75,90 +76,134 @@ float AspectUndistortU(float u)
return AspectUndistortX((u - 0.5) * 2.0, undistort_factor) * 0.5 + 0.5;
}
float2 pixel_coord(float xpos, float ypos)
float2 undistort_coord(float xpos, float ypos)
{
return float2(AspectUndistortU(xpos), ypos);
}
float4 pixel(float xpos, float ypos, bool undistort)
float4 undistort_pixel(float xpos, float ypos)
{
if (undistort)
return image.Sample(textureSampler, pixel_coord(xpos, ypos));
else
return image.Sample(textureSampler, float2(xpos, ypos));
return image.Sample(textureSampler, undistort_coord(xpos, ypos));
}
float4 get_line(float ypos, float3 xpos1, float3 xpos2, float3 rowtap1,
float3 rowtap2, bool undistort)
float4 undistort_line(float3 xpos012, float3 xpos345, float ypos, float3 rowtap012,
float3 rowtap345)
{
return
pixel(xpos1.r, ypos, undistort) * rowtap1.r +
pixel(xpos1.g, ypos, undistort) * rowtap2.r +
pixel(xpos1.b, ypos, undistort) * rowtap1.g +
pixel(xpos2.r, ypos, undistort) * rowtap2.g +
pixel(xpos2.g, ypos, undistort) * rowtap1.b +
pixel(xpos2.b, ypos, undistort) * rowtap2.b;
undistort_pixel(xpos012.x, ypos) * rowtap012.x +
undistort_pixel(xpos012.y, ypos) * rowtap012.y +
undistort_pixel(xpos012.z, ypos) * rowtap012.z +
undistort_pixel(xpos345.x, ypos) * rowtap345.x +
undistort_pixel(xpos345.y, ypos) * rowtap345.y +
undistort_pixel(xpos345.z, ypos) * rowtap345.z;
}
float4 DrawLanczos(FragData v_in, bool undistort)
float4 DrawLanczos(FragData f_in, bool undistort)
{
float2 stepxy = base_dimension_i;
float2 pos = v_in.uv + stepxy * 0.5;
float2 f = frac(pos / stepxy);
float2 pos = f_in.uv;
float2 pos2 = floor(pos - 0.5) + 0.5;
float2 f_neg = pos2 - pos;
float3 rowtap1 = weight3((1.0 - f.x) / 2.0, v_in.scale.x);
float3 rowtap2 = weight3((1.0 - f.x) / 2.0 + 0.5, v_in.scale.x);
float3 coltap1 = weight3((1.0 - f.y) / 2.0, v_in.scale.y);
float3 coltap2 = weight3((1.0 - f.y) / 2.0 + 0.5, v_in.scale.y);
float3 rowtap012, rowtap345;
weight6(f_neg.x, rowtap012, rowtap345);
/* make sure all taps added together is exactly 1.0, otherwise some
* (very small) distortion can occur */
float suml = rowtap1.r + rowtap1.g + rowtap1.b + rowtap2.r + rowtap2.g + rowtap2.b;
float sumc = coltap1.r + coltap1.g + coltap1.b + coltap2.r + coltap2.g + coltap2.b;
rowtap1 /= suml;
rowtap2 /= suml;
coltap1 /= sumc;
coltap2 /= sumc;
float3 coltap012, coltap345;
weight6(f_neg.y, coltap012, coltap345);
float2 xystart = (-2.5 - f) * stepxy + pos;
float3 xpos1 = float3(xystart.x , xystart.x + stepxy.x , xystart.x + stepxy.x * 2.0);
float3 xpos2 = float3(xystart.x + stepxy.x * 3.0, xystart.x + stepxy.x * 4.0, xystart.x + stepxy.x * 5.0);
float2 uv2 = pos2 * base_dimension_i;
float2 uv1 = uv2 - base_dimension_i;
float2 uv0 = uv1 - base_dimension_i;
float2 uv3 = uv2 + base_dimension_i;
float2 uv4 = uv3 + base_dimension_i;
float2 uv5 = uv4 + base_dimension_i;
return
get_line(xystart.y , xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap1.r +
get_line(xystart.y + stepxy.y , xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap2.r +
get_line(xystart.y + stepxy.y * 2.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap1.g +
get_line(xystart.y + stepxy.y * 3.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap2.g +
get_line(xystart.y + stepxy.y * 4.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap1.b +
get_line(xystart.y + stepxy.y * 5.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap2.b;
if (undistort) {
float3 xpos012 = float3(uv0.x, uv1.x, uv2.x);
float3 xpos345 = float3(uv3.x, uv4.x, uv5.x);
return undistort_line(xpos012, xpos345, uv0.y, rowtap012, rowtap345) * coltap012.x +
undistort_line(xpos012, xpos345, uv1.y, rowtap012, rowtap345) * coltap012.y +
undistort_line(xpos012, xpos345, uv2.y, rowtap012, rowtap345) * coltap012.z +
undistort_line(xpos012, xpos345, uv3.y, rowtap012, rowtap345) * coltap345.x +
undistort_line(xpos012, xpos345, uv4.y, rowtap012, rowtap345) * coltap345.y +
undistort_line(xpos012, xpos345, uv5.y, rowtap012, rowtap345) * coltap345.z;
}
float u_weight_sum = rowtap012.z + rowtap345.x;
float u_middle_offset = rowtap345.x * base_dimension_i.x / u_weight_sum;
float u_middle = uv2.x + u_middle_offset;
float v_weight_sum = coltap012.z + coltap345.x;
float v_middle_offset = coltap345.x * base_dimension_i.y / v_weight_sum;
float v_middle = uv2.y + v_middle_offset;
float2 coord_limit = base_dimension - 0.5;
float2 coord0_f = max(uv0 * base_dimension, 0.5);
float2 coord1_f = max(uv1 * base_dimension, 0.5);
float2 coord4_f = min(uv4 * base_dimension, coord_limit);
float2 coord5_f = min(uv5 * base_dimension, coord_limit);
int2 coord0 = int2(coord0_f);
int2 coord1 = int2(coord1_f);
int2 coord4 = int2(coord4_f);
int2 coord5 = int2(coord5_f);
float4 row0 = image.Load(int3(coord0, 0)) * rowtap012.x;
row0 += image.Load(int3(coord1.x, coord0.y, 0)) * rowtap012.y;
row0 += image.Sample(textureSampler, float2(u_middle, uv0.y)) * u_weight_sum;
row0 += image.Load(int3(coord4.x, coord0.y, 0)) * rowtap345.y;
row0 += image.Load(int3(coord5.x, coord0.y, 0)) * rowtap345.z;
float4 total = row0 * coltap012.x;
float4 row1 = image.Load(int3(coord0.x, coord1.y, 0)) * rowtap012.x;
row1 += image.Load(int3(coord1.x, coord1.y, 0)) * rowtap012.y;
row1 += image.Sample(textureSampler, float2(u_middle, uv1.y)) * u_weight_sum;
row1 += image.Load(int3(coord4.x, coord1.y, 0)) * rowtap345.y;
row1 += image.Load(int3(coord5.x, coord1.y, 0)) * rowtap345.z;
total += row1 * coltap012.y;
float4 row23 = image.Sample(textureSampler, float2(uv0.x, v_middle)) * rowtap012.x;
row23 += image.Sample(textureSampler, float2(uv1.x, v_middle)) * rowtap012.y;
row23 += image.Sample(textureSampler, float2(u_middle, v_middle)) * u_weight_sum;
row23 += image.Sample(textureSampler, float2(uv4.x, v_middle)) * rowtap345.y;
row23 += image.Sample(textureSampler, float2(uv5.x, v_middle)) * rowtap345.z;
total += row23 * v_weight_sum;
float4 row4 = image.Load(int3(coord0.x, coord4.y, 0)) * rowtap012.x;
row4 += image.Load(int3(coord1.x, coord4.y, 0)) * rowtap012.y;
row4 += image.Sample(textureSampler, float2(u_middle, uv4.y)) * u_weight_sum;
row4 += image.Load(int3(coord4.x, coord4.y, 0)) * rowtap345.y;
row4 += image.Load(int3(coord5.x, coord4.y, 0)) * rowtap345.z;
total += row4 * coltap345.y;
float4 row5 = image.Load(int3(coord0.x, coord5.y, 0)) * rowtap012.x;
row5 += image.Load(int3(coord1.x, coord5.y, 0)) * rowtap012.y;
row5 += image.Sample(textureSampler, float2(u_middle, uv5.y)) * u_weight_sum;
row5 += image.Load(int3(coord4.x, coord5.y, 0)) * rowtap345.y;
row5 += image.Load(int3(coord5, 0)) * rowtap345.z;
total += row5 * coltap345.z;
return total;
}
float4 PSDrawLanczosRGBA(FragData v_in, bool undistort) : TARGET
float4 PSDrawLanczosRGBA(FragData f_in, bool undistort) : TARGET
{
return DrawLanczos(v_in, undistort);
return DrawLanczos(f_in, undistort);
}
float4 PSDrawLanczosRGBADivide(FragData v_in) : TARGET
float4 PSDrawLanczosRGBADivide(FragData f_in) : TARGET
{
float4 rgba = DrawLanczos(v_in, false);
float4 rgba = DrawLanczos(f_in, false);
float alpha = rgba.a;
float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0;
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawLanczosMatrix(FragData v_in) : TARGET
{
float3 rgb = DrawLanczos(v_in, false).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLanczosRGBA(v_in, false);
pixel_shader = PSDrawLanczosRGBA(f_in, false);
}
}
@ -167,7 +212,7 @@ technique DrawAlphaDivide
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLanczosRGBADivide(v_in);
pixel_shader = PSDrawLanczosRGBADivide(f_in);
}
}
@ -176,15 +221,6 @@ technique DrawUndistort
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLanczosRGBA(v_in, true);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLanczosMatrix(v_in);
pixel_shader = PSDrawLanczosRGBA(f_in, true);
}
}