381 lines
9.5 KiB
Text
381 lines
9.5 KiB
Text
|
/******************************************************************************
|
||
|
Copyright (C) 2014 by Hugh Bailey <obs.jim@gmail.com>
|
||
|
|
||
|
This program is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation, either version 2 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
******************************************************************************/
|
||
|
|
||
|
//#define DEBUGGING
|
||
|
|
||
|
uniform float4x4 ViewProj;
|
||
|
|
||
|
uniform float u_plane_offset;
|
||
|
uniform float v_plane_offset;
|
||
|
|
||
|
uniform float width;
|
||
|
uniform float height;
|
||
|
uniform float width_i;
|
||
|
uniform float height_i;
|
||
|
uniform float width_d2;
|
||
|
uniform float height_d2;
|
||
|
uniform float width_d2_i;
|
||
|
uniform float height_d2_i;
|
||
|
uniform float input_width;
|
||
|
uniform float input_height;
|
||
|
uniform float input_width_i;
|
||
|
uniform float input_height_i;
|
||
|
uniform float input_width_i_d2;
|
||
|
uniform float input_height_i_d2;
|
||
|
|
||
|
uniform texture2d image;
|
||
|
|
||
|
sampler_state def_sampler {
|
||
|
Filter = Linear;
|
||
|
AddressU = Clamp;
|
||
|
AddressV = Clamp;
|
||
|
};
|
||
|
|
||
|
struct VertInOut {
|
||
|
float4 pos : POSITION;
|
||
|
float2 uv : TEXCOORD0;
|
||
|
};
|
||
|
|
||
|
VertInOut VSDefault(VertInOut vert_in)
|
||
|
{
|
||
|
VertInOut vert_out;
|
||
|
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
|
||
|
vert_out.uv = vert_in.uv;
|
||
|
return vert_out;
|
||
|
}
|
||
|
|
||
|
/* used to prevent internal GPU precision issues width fmod in particular */
|
||
|
#define PRECISION_OFFSET 0.2
|
||
|
|
||
|
float4 PSNV12(VertInOut vert_in) : TARGET
|
||
|
{
|
||
|
float v_mul = floor(vert_in.uv.y * input_height);
|
||
|
|
||
|
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
||
|
byte_offset += PRECISION_OFFSET;
|
||
|
|
||
|
float2 sample_pos[4];
|
||
|
|
||
|
if (byte_offset < u_plane_offset) {
|
||
|
#ifdef DEBUGGING
|
||
|
return float4(1.0, 1.0, 1.0, 1.0);
|
||
|
#endif
|
||
|
|
||
|
float lum_u = floor(fmod(byte_offset, width)) * width_i;
|
||
|
float lum_v = floor(byte_offset * width_i) * height_i;
|
||
|
|
||
|
/* move to texel centers to sample the 4 pixels properly */
|
||
|
lum_u += width_i * 0.5;
|
||
|
lum_v += height_i * 0.5;
|
||
|
|
||
|
sample_pos[0] = float2(lum_u, lum_v);
|
||
|
sample_pos[1] = float2(lum_u += width_i, lum_v);
|
||
|
sample_pos[2] = float2(lum_u += width_i, lum_v);
|
||
|
sample_pos[3] = float2(lum_u + width_i, lum_v);
|
||
|
|
||
|
float4x4 out_val = float4x4(
|
||
|
image.Sample(def_sampler, sample_pos[0]),
|
||
|
image.Sample(def_sampler, sample_pos[1]),
|
||
|
image.Sample(def_sampler, sample_pos[2]),
|
||
|
image.Sample(def_sampler, sample_pos[3])
|
||
|
);
|
||
|
|
||
|
return transpose(out_val)[1];
|
||
|
} else {
|
||
|
#ifdef DEBUGGING
|
||
|
return float4(0.5, 0.2, 0.5, 0.2);
|
||
|
#endif
|
||
|
|
||
|
float new_offset = byte_offset - u_plane_offset;
|
||
|
|
||
|
float ch_u = floor(fmod(new_offset, width)) * width_i;
|
||
|
float ch_v = floor(new_offset * width_i) * height_d2_i;
|
||
|
float width_i2 = width_i*2.0;
|
||
|
|
||
|
/* move to the borders of each set of 4 pixels to force it
|
||
|
* to do bilinear averaging */
|
||
|
ch_u += width_i;
|
||
|
ch_v += height_i;
|
||
|
|
||
|
sample_pos[0] = float2(ch_u, ch_v);
|
||
|
sample_pos[1] = float2(ch_u + width_i2, ch_v);
|
||
|
|
||
|
return float4(
|
||
|
image.Sample(def_sampler, sample_pos[0]).rb,
|
||
|
image.Sample(def_sampler, sample_pos[1]).rb
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
float4 PSPlanar420(VertInOut vert_in) : TARGET
|
||
|
{
|
||
|
float v_mul = floor(vert_in.uv.y * input_height);
|
||
|
|
||
|
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
||
|
byte_offset += PRECISION_OFFSET;
|
||
|
|
||
|
float2 sample_pos[4];
|
||
|
|
||
|
if (byte_offset < u_plane_offset) {
|
||
|
#ifdef DEBUGGING
|
||
|
return float4(1.0, 1.0, 1.0, 1.0);
|
||
|
#endif
|
||
|
|
||
|
float lum_u = floor(fmod(byte_offset, width)) * width_i;
|
||
|
float lum_v = floor(byte_offset * width_i) * height_i;
|
||
|
|
||
|
/* move to texel centers to sample the 4 pixels properly */
|
||
|
lum_u += width_i * 0.5;
|
||
|
lum_v += height_i * 0.5;
|
||
|
|
||
|
sample_pos[0] = float2(lum_u, lum_v);
|
||
|
sample_pos[1] = float2(lum_u += width_i, lum_v);
|
||
|
sample_pos[2] = float2(lum_u += width_i, lum_v);
|
||
|
sample_pos[3] = float2(lum_u + width_i, lum_v);
|
||
|
|
||
|
} else {
|
||
|
#ifdef DEBUGGING
|
||
|
return ((byte_offset < v_plane_offset) ?
|
||
|
float4(0.5, 0.5, 0.5, 0.5) :
|
||
|
float4(0.2, 0.2, 0.2, 0.2));
|
||
|
#endif
|
||
|
|
||
|
float new_offset = byte_offset -
|
||
|
((byte_offset < v_plane_offset) ?
|
||
|
u_plane_offset : v_plane_offset);
|
||
|
|
||
|
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
|
||
|
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
|
||
|
float width_i2 = width_i*2.0;
|
||
|
|
||
|
/* move to the borders of each set of 4 pixels to force it
|
||
|
* to do bilinear averaging */
|
||
|
ch_u += width_i;
|
||
|
ch_v += height_i;
|
||
|
|
||
|
sample_pos[0] = float2(ch_u, ch_v);
|
||
|
sample_pos[1] = float2(ch_u += width_i2, ch_v);
|
||
|
sample_pos[2] = float2(ch_u += width_i2, ch_v);
|
||
|
sample_pos[3] = float2(ch_u + width_i2, ch_v);
|
||
|
}
|
||
|
|
||
|
float4x4 out_val = float4x4(
|
||
|
image.Sample(def_sampler, sample_pos[0]),
|
||
|
image.Sample(def_sampler, sample_pos[1]),
|
||
|
image.Sample(def_sampler, sample_pos[2]),
|
||
|
image.Sample(def_sampler, sample_pos[3])
|
||
|
);
|
||
|
|
||
|
out_val = transpose(out_val);
|
||
|
|
||
|
if (byte_offset < u_plane_offset)
|
||
|
return out_val[1];
|
||
|
else if (byte_offset < v_plane_offset)
|
||
|
return out_val[0];
|
||
|
else
|
||
|
return out_val[2];
|
||
|
}
|
||
|
|
||
|
float4 PSPlanar444(VertInOut vert_in) : TARGET
|
||
|
{
|
||
|
float v_mul = floor(vert_in.uv.y * input_height);
|
||
|
|
||
|
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
||
|
byte_offset += PRECISION_OFFSET;
|
||
|
|
||
|
float new_byte_offset = byte_offset;
|
||
|
|
||
|
if (byte_offset >= v_plane_offset)
|
||
|
new_byte_offset -= v_plane_offset;
|
||
|
else if (byte_offset >= u_plane_offset)
|
||
|
new_byte_offset -= u_plane_offset;
|
||
|
|
||
|
float2 sample_pos[4];
|
||
|
|
||
|
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
|
||
|
float v_val = floor(new_byte_offset * width_i) * height_i;
|
||
|
|
||
|
/* move to texel centers to sample the 4 pixels properly */
|
||
|
u_val += width_i * 0.5;
|
||
|
v_val += height_i * 0.5;
|
||
|
|
||
|
sample_pos[0] = float2(u_val, v_val);
|
||
|
sample_pos[1] = float2(u_val += width_i, v_val);
|
||
|
sample_pos[2] = float2(u_val += width_i, v_val);
|
||
|
sample_pos[3] = float2(u_val + width_i, v_val);
|
||
|
|
||
|
float4x4 out_val = float4x4(
|
||
|
image.Sample(def_sampler, sample_pos[0]),
|
||
|
image.Sample(def_sampler, sample_pos[1]),
|
||
|
image.Sample(def_sampler, sample_pos[2]),
|
||
|
image.Sample(def_sampler, sample_pos[3])
|
||
|
);
|
||
|
|
||
|
out_val = transpose(out_val);
|
||
|
|
||
|
if (byte_offset < u_plane_offset)
|
||
|
return out_val[1];
|
||
|
else if (byte_offset < v_plane_offset)
|
||
|
return out_val[0];
|
||
|
else
|
||
|
return out_val[2];
|
||
|
}
|
||
|
|
||
|
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
|
||
|
int y0_pos, int y1_pos) : TARGET
|
||
|
{
|
||
|
float y = vert_in.uv.y;
|
||
|
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
|
||
|
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
|
||
|
width_d2_i;
|
||
|
|
||
|
x += input_width_i_d2;
|
||
|
|
||
|
float4 texel = image.Sample(def_sampler, float2(x, y));
|
||
|
return float4(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
|
||
|
texel[u_pos], texel[v_pos], 1.0);
|
||
|
}
|
||
|
|
||
|
float GetOffsetColor(float offset)
|
||
|
{
|
||
|
float2 uv;
|
||
|
|
||
|
offset += PRECISION_OFFSET;
|
||
|
uv.x = floor(fmod(offset, input_width)) * input_width_i;
|
||
|
uv.y = floor(offset * input_width_i) * input_height_i;
|
||
|
|
||
|
uv.xy += float2(input_width_i_d2, input_height_i_d2);
|
||
|
|
||
|
return image.Sample(def_sampler, uv).r;
|
||
|
}
|
||
|
|
||
|
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
|
||
|
{
|
||
|
float x = vert_in.uv.x;
|
||
|
float y = vert_in.uv.y;
|
||
|
float x_offset = floor(x * width + PRECISION_OFFSET);
|
||
|
float y_offset = floor(y * height + PRECISION_OFFSET);
|
||
|
|
||
|
float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
|
||
|
lum_offset = floor(lum_offset);
|
||
|
|
||
|
float ch_offset = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
|
||
|
(x_offset * 0.5) + PRECISION_OFFSET;
|
||
|
ch_offset = floor(ch_offset);
|
||
|
|
||
|
return float4(
|
||
|
GetOffsetColor(lum_offset),
|
||
|
GetOffsetColor(u_plane_offset + ch_offset),
|
||
|
GetOffsetColor(v_plane_offset + ch_offset),
|
||
|
1.0
|
||
|
);
|
||
|
}
|
||
|
|
||
|
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
|
||
|
{
|
||
|
float x = vert_in.uv.x;
|
||
|
float y = vert_in.uv.y;
|
||
|
float x_offset = floor(x * width + PRECISION_OFFSET);
|
||
|
float y_offset = floor(y * height + PRECISION_OFFSET);
|
||
|
|
||
|
float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
|
||
|
lum_offset = floor(lum_offset);
|
||
|
|
||
|
float ch_offset = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
|
||
|
(x_offset * 0.5);
|
||
|
ch_offset = floor(ch_offset * 2.0 + PRECISION_OFFSET);
|
||
|
|
||
|
return float4(
|
||
|
GetOffsetColor(lum_offset),
|
||
|
GetOffsetColor(u_plane_offset + ch_offset),
|
||
|
GetOffsetColor(u_plane_offset + ch_offset + 1.0),
|
||
|
1.0
|
||
|
);
|
||
|
}
|
||
|
|
||
|
technique Planar420
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSPlanar420(vert_in);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique Planar444
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSPlanar444(vert_in);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique NV12
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSNV12(vert_in);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique UYVY_Reverse
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique YUY2_Reverse
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique YVYU_Reverse
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique I420_Reverse
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSPlanar420_Reverse(vert_in);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
technique NV12_Reverse
|
||
|
{
|
||
|
pass
|
||
|
{
|
||
|
vertex_shader = VSDefault(vert_in);
|
||
|
pixel_shader = PSNV12_Reverse(vert_in);
|
||
|
}
|
||
|
}
|