/****************************************************************************** Copyright (C) 2014 by Hugh Bailey This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ******************************************************************************/ //#define DEBUGGING uniform float4x4 ViewProj; uniform float u_plane_offset; uniform float v_plane_offset; uniform float width; uniform float height; uniform float width_i; uniform float height_i; uniform float width_d2; uniform float height_d2; uniform float width_d2_i; uniform float height_d2_i; uniform float input_width; uniform float input_height; uniform float input_width_i; uniform float input_height_i; uniform float input_width_i_d2; uniform float input_height_i_d2; uniform int int_width; uniform int int_input_width; uniform int int_u_plane_offset; uniform int int_v_plane_offset; uniform float4x4 color_matrix; uniform float3 color_range_min = {0.0, 0.0, 0.0}; uniform float3 color_range_max = {1.0, 1.0, 1.0}; uniform texture2d image; sampler_state def_sampler { Filter = Linear; AddressU = Clamp; AddressV = Clamp; }; struct VertInOut { float4 pos : POSITION; float2 uv : TEXCOORD0; }; VertInOut VSDefault(VertInOut vert_in) { VertInOut vert_out; vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj); vert_out.uv = vert_in.uv; return vert_out; } /* used to prevent internal GPU precision issues width fmod in particular */ #define PRECISION_OFFSET 0.2 float4 PSNV12(VertInOut vert_in) : TARGET { float v_mul = floor(vert_in.uv.y * input_height); float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0; byte_offset += PRECISION_OFFSET; float2 sample_pos[4]; if (byte_offset < u_plane_offset) { #ifdef DEBUGGING return float4(1.0, 1.0, 1.0, 1.0); #endif float lum_u = floor(fmod(byte_offset, width)) * width_i; float lum_v = floor(byte_offset * width_i) * height_i; /* move to texel centers to sample the 4 pixels properly */ lum_u += width_i * 0.5; lum_v += height_i * 0.5; sample_pos[0] = float2(lum_u, lum_v); sample_pos[1] = float2(lum_u += width_i, lum_v); sample_pos[2] = float2(lum_u += width_i, lum_v); sample_pos[3] = float2(lum_u + width_i, lum_v); float4x4 out_val = float4x4( image.Sample(def_sampler, sample_pos[0]), image.Sample(def_sampler, sample_pos[1]), image.Sample(def_sampler, sample_pos[2]), image.Sample(def_sampler, sample_pos[3]) ); return transpose(out_val)[1]; } else { #ifdef DEBUGGING return float4(0.5, 0.2, 0.5, 0.2); #endif float new_offset = byte_offset - u_plane_offset; float ch_u = floor(fmod(new_offset, width)) * width_i; float ch_v = floor(new_offset * width_i) * height_d2_i; float width_i2 = width_i*2.0; /* move to the borders of each set of 4 pixels to force it * to do bilinear averaging */ ch_u += width_i; ch_v += height_i; sample_pos[0] = float2(ch_u, ch_v); sample_pos[1] = float2(ch_u + width_i2, ch_v); return float4( image.Sample(def_sampler, sample_pos[0]).rb, image.Sample(def_sampler, sample_pos[1]).rb ); } } float PSNV12_Y(VertInOut vert_in) : TARGET { return image.Sample(def_sampler, vert_in.uv.xy).y; } float2 PSNV12_UV(VertInOut vert_in) : TARGET { return image.Sample(def_sampler, vert_in.uv.xy).xz; } float4 PSPlanar420(VertInOut vert_in) : TARGET { float v_mul = floor(vert_in.uv.y * input_height); float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0; byte_offset += PRECISION_OFFSET; float2 sample_pos[4]; if (byte_offset < u_plane_offset) { #ifdef DEBUGGING return float4(1.0, 1.0, 1.0, 1.0); #endif float lum_u = floor(fmod(byte_offset, width)) * width_i; float lum_v = floor(byte_offset * width_i) * height_i; /* move to texel centers to sample the 4 pixels properly */ lum_u += width_i * 0.5; lum_v += height_i * 0.5; sample_pos[0] = float2(lum_u, lum_v); sample_pos[1] = float2(lum_u += width_i, lum_v); sample_pos[2] = float2(lum_u += width_i, lum_v); sample_pos[3] = float2(lum_u + width_i, lum_v); } else { #ifdef DEBUGGING return ((byte_offset < v_plane_offset) ? float4(0.5, 0.5, 0.5, 0.5) : float4(0.2, 0.2, 0.2, 0.2)); #endif float new_offset = byte_offset - ((byte_offset < v_plane_offset) ? u_plane_offset : v_plane_offset); float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i; float ch_v = floor(new_offset * width_d2_i) * height_d2_i; float width_i2 = width_i*2.0; /* move to the borders of each set of 4 pixels to force it * to do bilinear averaging */ ch_u += width_i; ch_v += height_i; /* set up coordinates for next chroma line, in case * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split * between the current and the next chroma line; do note that the next * chroma line is two source lines below the current source line */ float ch_u_n = 0. + width_i; float ch_v_n = ch_v + height_i * 3; sample_pos[0] = float2(ch_u, ch_v); sample_pos[1] = float2(ch_u += width_i2, ch_v); ch_u += width_i2; // check if ch_u overflowed the current source and chroma line if (ch_u > 1.0) { sample_pos[2] = float2(ch_u_n, ch_v_n); sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n); } else { sample_pos[2] = float2(ch_u, ch_v); sample_pos[3] = float2(ch_u + width_i2, ch_v); } } float4x4 out_val = float4x4( image.Sample(def_sampler, sample_pos[0]), image.Sample(def_sampler, sample_pos[1]), image.Sample(def_sampler, sample_pos[2]), image.Sample(def_sampler, sample_pos[3]) ); out_val = transpose(out_val); if (byte_offset < u_plane_offset) return out_val[1]; else if (byte_offset < v_plane_offset) return out_val[0]; else return out_val[2]; } float4 PSPlanar444(VertInOut vert_in) : TARGET { float v_mul = floor(vert_in.uv.y * input_height); float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0; byte_offset += PRECISION_OFFSET; float new_byte_offset = byte_offset; if (byte_offset >= v_plane_offset) new_byte_offset -= v_plane_offset; else if (byte_offset >= u_plane_offset) new_byte_offset -= u_plane_offset; float2 sample_pos[4]; float u_val = floor(fmod(new_byte_offset, width)) * width_i; float v_val = floor(new_byte_offset * width_i) * height_i; /* move to texel centers to sample the 4 pixels properly */ u_val += width_i * 0.5; v_val += height_i * 0.5; sample_pos[0] = float2(u_val, v_val); sample_pos[1] = float2(u_val += width_i, v_val); sample_pos[2] = float2(u_val += width_i, v_val); sample_pos[3] = float2(u_val + width_i, v_val); float4x4 out_val = float4x4( image.Sample(def_sampler, sample_pos[0]), image.Sample(def_sampler, sample_pos[1]), image.Sample(def_sampler, sample_pos[2]), image.Sample(def_sampler, sample_pos[3]) ); out_val = transpose(out_val); if (byte_offset < u_plane_offset) return out_val[1]; else if (byte_offset < v_plane_offset) return out_val[0]; else return out_val[2]; } float GetIntOffsetColor(int offset) { return image.Load(int3(offset % int_input_width, offset / int_input_width, 0)).r; } float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos, int y0_pos, int y1_pos) : TARGET { float y = vert_in.uv.y; float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0)); float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) * width_d2_i; x += input_width_i_d2; float4 texel = image.Sample(def_sampler, float2(x, y)); float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos], texel[u_pos], texel[v_pos]); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET { int x = int(vert_in.uv.x * width + PRECISION_OFFSET); int y = int(vert_in.uv.y * height + PRECISION_OFFSET); int lum_offset = y * int_width + x; int chroma_offset = (y / 2) * (int_width / 2) + x / 2; int chroma1 = int_u_plane_offset + chroma_offset; int chroma2 = int_v_plane_offset + chroma_offset; float3 yuv = float3( GetIntOffsetColor(lum_offset), GetIntOffsetColor(chroma1), GetIntOffsetColor(chroma2) ); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET { int x = int(vert_in.uv.x * width + PRECISION_OFFSET); int y = int(vert_in.uv.y * height + PRECISION_OFFSET); int lum_offset = y * int_width + x; int chroma_offset = y * int_width + x; int chroma1 = int_u_plane_offset + chroma_offset; int chroma2 = int_v_plane_offset + chroma_offset; float3 yuv = float3( GetIntOffsetColor(lum_offset), GetIntOffsetColor(chroma1), GetIntOffsetColor(chroma2) ); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSNV12_Reverse(VertInOut vert_in) : TARGET { int x = int(vert_in.uv.x * width + PRECISION_OFFSET); int y = int(vert_in.uv.y * height + PRECISION_OFFSET); int lum_offset = y * int_width + x; int chroma_offset = (y / 2) * (int_width / 2) + x / 2; int chroma = int_u_plane_offset + chroma_offset * 2; float3 yuv = float3( GetIntOffsetColor(lum_offset), GetIntOffsetColor(chroma), GetIntOffsetColor(chroma + 1) ); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSY800_Limited(VertInOut vert_in) : TARGET { int x = int(vert_in.uv.x * width + PRECISION_OFFSET); int y = int(vert_in.uv.y * height + PRECISION_OFFSET); float limited = image.Load(int3(x, y, 0)).x; float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0)); return float4(full, full, full, 1.0); } float4 PSY800_Full(VertInOut vert_in) : TARGET { int x = int(vert_in.uv.x * width + PRECISION_OFFSET); int y = int(vert_in.uv.y * height + PRECISION_OFFSET); float3 full = image.Load(int3(x, y, 0)).xxx; return float4(full, 1.0); } float4 PSRGB_Limited(VertInOut vert_in) : TARGET { int x = int(vert_in.uv.x * width + PRECISION_OFFSET); int y = int(vert_in.uv.y * height + PRECISION_OFFSET); float4 rgba = image.Load(int3(x, y, 0)); rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0)); return rgba; } technique Planar420 { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPlanar420(vert_in); } } technique Planar444 { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPlanar444(vert_in); } } technique NV12 { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSNV12(vert_in); } } technique NV12_Y { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSNV12_Y(vert_in); } } technique NV12_UV { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSNV12_UV(vert_in); } } technique UYVY_Reverse { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3); } } technique YUY2_Reverse { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0); } } technique YVYU_Reverse { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0); } } technique I420_Reverse { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPlanar420_Reverse(vert_in); } } technique I444_Reverse { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSPlanar444_Reverse(vert_in); } } technique NV12_Reverse { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSNV12_Reverse(vert_in); } } technique Y800_Limited { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSY800_Limited(vert_in); } } technique Y800_Full { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSY800_Full(vert_in); } } technique RGB_Limited { pass { vertex_shader = VSDefault(vert_in); pixel_shader = PSRGB_Limited(vert_in); } }