/* * lanczos sharper * note - this shader is adapted from the GPL bsnes shader, very good stuff * there. */ uniform float4x4 ViewProj; uniform texture2d image; uniform float4x4 color_matrix; uniform float2 base_dimension_i; uniform float undistort_factor = 1.0; sampler_state textureSampler { AddressU = Clamp; AddressV = Clamp; Filter = Linear; }; struct VertData { float4 pos : POSITION; float2 uv : TEXCOORD0; }; struct FragData { float4 pos : POSITION; float2 uv : TEXCOORD0; float2 scale : TEXCOORD1; }; FragData VSDefault(VertData v_in) { FragData vert_out; vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj); vert_out.uv = v_in.uv; vert_out.scale = min(0.25 + abs(0.75 / mul(float4(1.0 / base_dimension_i.xy, 1.0, 1.0), ViewProj).xy), 1.0); return vert_out; } float sinc(float x) { const float PIval = 3.1415926535897932384626433832795; return sin(x * PIval) / (x * PIval); } float weight(float x, float radius) { float ax = abs(x); if (x == 0.0) return 1.0; else if (ax < radius) return sinc(x) * sinc(x / radius); else return 0.0; } float3 weight3(float x, float scale) { return float3( weight((x * 2.0 + 0.0 * 2.0 - 3.0) * scale, 3.0), weight((x * 2.0 + 1.0 * 2.0 - 3.0) * scale, 3.0), weight((x * 2.0 + 2.0 * 2.0 - 3.0) * scale, 3.0)); } float AspectUndistortX(float x, float a) { // The higher the power, the longer the linear part will be. return (1.0 - a) * (x * x * x * x * x) + a * x; } float AspectUndistortU(float u) { // Normalize texture coord to -1.0 to 1.0 range, and back. return AspectUndistortX((u - 0.5) * 2.0, undistort_factor) * 0.5 + 0.5; } float2 pixel_coord(float xpos, float ypos) { return float2(AspectUndistortU(xpos), ypos); } float4 pixel(float xpos, float ypos, bool undistort) { if (undistort) return image.Sample(textureSampler, pixel_coord(xpos, ypos)); else return image.Sample(textureSampler, float2(xpos, ypos)); } float4 get_line(float ypos, float3 xpos1, float3 xpos2, float3 rowtap1, float3 rowtap2, bool undistort) { return pixel(xpos1.r, ypos, undistort) * rowtap1.r + pixel(xpos1.g, ypos, undistort) * rowtap2.r + pixel(xpos1.b, ypos, undistort) * rowtap1.g + pixel(xpos2.r, ypos, undistort) * rowtap2.g + pixel(xpos2.g, ypos, undistort) * rowtap1.b + pixel(xpos2.b, ypos, undistort) * rowtap2.b; } float4 DrawLanczos(FragData v_in, bool undistort) { float2 stepxy = base_dimension_i; float2 pos = v_in.uv + stepxy * 0.5; float2 f = frac(pos / stepxy); float3 rowtap1 = weight3((1.0 - f.x) / 2.0, v_in.scale.x); float3 rowtap2 = weight3((1.0 - f.x) / 2.0 + 0.5, v_in.scale.x); float3 coltap1 = weight3((1.0 - f.y) / 2.0, v_in.scale.y); float3 coltap2 = weight3((1.0 - f.y) / 2.0 + 0.5, v_in.scale.y); /* make sure all taps added together is exactly 1.0, otherwise some * (very small) distortion can occur */ float suml = rowtap1.r + rowtap1.g + rowtap1.b + rowtap2.r + rowtap2.g + rowtap2.b; float sumc = coltap1.r + coltap1.g + coltap1.b + coltap2.r + coltap2.g + coltap2.b; rowtap1 /= suml; rowtap2 /= suml; coltap1 /= sumc; coltap2 /= sumc; float2 xystart = (-2.5 - f) * stepxy + pos; float3 xpos1 = float3(xystart.x , xystart.x + stepxy.x , xystart.x + stepxy.x * 2.0); float3 xpos2 = float3(xystart.x + stepxy.x * 3.0, xystart.x + stepxy.x * 4.0, xystart.x + stepxy.x * 5.0); return get_line(xystart.y , xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap1.r + get_line(xystart.y + stepxy.y , xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap2.r + get_line(xystart.y + stepxy.y * 2.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap1.g + get_line(xystart.y + stepxy.y * 3.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap2.g + get_line(xystart.y + stepxy.y * 4.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap1.b + get_line(xystart.y + stepxy.y * 5.0, xpos1, xpos2, rowtap1, rowtap2, undistort) * coltap2.b; } float4 PSDrawLanczosRGBA(FragData v_in, bool undistort) : TARGET { return DrawLanczos(v_in, undistort); } float4 PSDrawLanczosRGBADivide(FragData v_in) : TARGET { float4 rgba = DrawLanczos(v_in, false); float alpha = rgba.a; float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0; return float4(rgba.rgb * multiplier, alpha); } float4 PSDrawLanczosMatrix(FragData v_in) : TARGET { float3 rgb = DrawLanczos(v_in, false).rgb; float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz; return float4(yuv, 1.0); } technique Draw { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosRGBA(v_in, false); } } technique DrawAlphaDivide { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosRGBADivide(v_in); } } technique DrawUndistort { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosRGBA(v_in, true); } } technique DrawMatrix { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosMatrix(v_in); } }