/*
 *  This file was generated by glsl2h.pl script
 *
 *  This Program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *
 *  This Program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA USA.
 *  http://www.gnu.org/copyleft/gpl.html
 *
 */

#pragma once

#include "stdafx.h"

static const char* convert_glsl =
	"//#version 420 // Keep it for editor detection\n"
	"\n"
	"struct vertex_basic\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"};\n"
	"\n"
	"\n"
	"#ifdef VERTEX_SHADER\n"
	"\n"
	"out gl_PerVertex {\n"
	"    vec4 gl_Position;\n"
	"    float gl_PointSize;\n"
	"#if !pGL_ES\n"
	"    float gl_ClipDistance[];\n"
	"#endif\n"
	"};\n"
	"\n"
	"layout(location = 0) in vec2 POSITION;\n"
	"layout(location = 1) in vec2 TEXCOORD0;\n"
	"\n"
	"// FIXME set the interpolation (don't know what dx do)\n"
	"// flat means that there is no interpolation. The value given to the fragment shader is based on the provoking vertex conventions.\n"
	"//\n"
	"// noperspective means that there will be linear interpolation in window-space. This is usually not what you want, but it can have its uses.\n"
	"//\n"
	"// smooth, the default, means to do perspective-correct interpolation.\n"
	"//\n"
	"// The centroid qualifier only matters when multisampling. If this qualifier is not present, then the value is interpolated to the pixel's center, anywhere in the pixel, or to one of the pixel's samples. This sample may lie outside of the actual primitive being rendered, since a primitive can cover only part of a pixel's area. The centroid qualifier is used to prevent this; the interpolation point must fall within both the pixel's area and the primitive's area.\n"
	"out SHADER\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"} VSout;\n"
	"\n"
	"#define VSout_p (VSout.p)\n"
	"#define VSout_t (VSout.t)\n"
	"\n"
	"void vs_main()\n"
	"{\n"
	"    VSout_p = vec4(POSITION, 0.5f, 1.0f);\n"
	"    VSout_t = TEXCOORD0;\n"
	"    gl_Position = vec4(POSITION, 0.5f, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
	"}\n"
	"\n"
	"#endif\n"
	"\n"
	"#ifdef FRAGMENT_SHADER\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"} PSin;\n"
	"\n"
	"#define PSin_p (PSin.p)\n"
	"#define PSin_t (PSin.t)\n"
	"\n"
	"// Give a different name so I remember there is a special case!\n"
	"#ifdef ps_main1\n"
	"layout(location = 0) out uint SV_Target1;\n"
	"#else\n"
	"layout(location = 0) out vec4 SV_Target0;\n"
	"#endif\n"
	"\n"
	"#ifdef ENABLE_BINDLESS_TEX\n"
	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
	"#else\n"
	"layout(binding = 0) uniform sampler2D TextureSampler;\n"
	"#endif\n"
	"\n"
	"vec4 sample_c()\n"
	"{\n"
	"    return texture(TextureSampler, PSin_t );\n"
	"}\n"
	"\n"
	"vec4 ps_crt(uint i)\n"
	"{\n"
	"    vec4 mask[4] = vec4[4]\n"
	"        (\n"
	"         vec4(1, 0, 0, 0),\n"
	"         vec4(0, 1, 0, 0),\n"
	"         vec4(0, 0, 1, 0),\n"
	"         vec4(1, 1, 1, 0)\n"
	"        );\n"
	"	return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
	"}\n"
	"\n"
	"vec4 ps_scanlines(uint i)\n"
	"{\n"
	"	vec4 mask[2] =\n"
	"	{\n"
	"		vec4(1, 1, 1, 0),\n"
	"		vec4(0, 0, 0, 0)\n"
	"	};\n"
	"\n"
	"	return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
	"}\n"
	"\n"
	"#ifdef ps_main0\n"
	"void ps_main0()\n"
	"{\n"
	"    SV_Target0 = sample_c();\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main1\n"
	"void ps_main1()\n"
	"{\n"
	"    vec4 c = sample_c();\n"
	"\n"
	"	c.a *= 256.0f / 127.0f; // hm, 0.5 won't give us 1.0 if we just multiply with 2\n"
	"\n"
	"	highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
	"\n"
	"    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main7\n"
	"void ps_main7()\n"
	"{\n"
	"    vec4 c = sample_c();\n"
	"\n"
	"	c.a = dot(c.rgb, vec3(0.299, 0.587, 0.114));\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main5\n"
	"void ps_main5() // scanlines\n"
	"{\n"
	"	highp uvec4 p = uvec4(PSin_p);\n"
	"\n"
	"	vec4 c = ps_scanlines(p.y % 2u);\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main6\n"
	"void ps_main6() // diagonal\n"
	"{\n"
	"	highp uvec4 p = uvec4(PSin_p);\n"
	"\n"
	"	vec4 c = ps_crt((p.x + (p.y % 3u)) % 3u);\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main8\n"
	"void ps_main8() // triangular\n"
	"{\n"
	"	highp uvec4 p = uvec4(PSin_p);\n"
	"\n"
	"	vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u);\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main9\n"
	"void ps_main9()\n"
	"{\n"
	"\n"
	"    const float PI = 3.14159265359f;\n"
	"\n"
	"	vec2 texdim = vec2(textureSize(TextureSampler, 0)); \n"
	"\n"
	"    vec4 c;\n"
	"    if (dFdy(PSin_t.y) * PSin_t.y > 0.5f) {\n"
	"        c = sample_c(); \n"
	"    } else {\n"
	"        float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin_t.y * texdim.y));\n"
	"		c =  factor * texture(TextureSampler, vec2(PSin_t.x, (floor(PSin_t.y * texdim.y) + 0.5f) / texdim.y));\n"
	"    }\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"// Used for DATE (stencil)\n"
	"// DATM == 1\n"
	"#ifdef ps_main2\n"
	"void ps_main2()\n"
	"{\n"
	"    if(sample_c().a < (127.5f / 255.0f)) // >= 0x80 pass\n"
	"        discard;\n"
	"\n"
	"#ifdef ENABLE_OGL_STENCIL_DEBUG\n"
	"    SV_Target0 = vec4(1.0f, 0.0f, 0.0f, 1.0f);\n"
	"#endif\n"
	"}\n"
	"#endif\n"
	"\n"
	"// Used for DATE (stencil)\n"
	"// DATM == 0\n"
	"#ifdef ps_main3\n"
	"void ps_main3()\n"
	"{\n"
	"    if((127.5f / 255.0f) < sample_c().a) // < 0x80 pass (== 0x80 should not pass)\n"
	"        discard;\n"
	"\n"
	"#ifdef ENABLE_OGL_STENCIL_DEBUG\n"
	"    SV_Target0 = vec4(1.0f, 0.0f, 0.0f, 1.0f);\n"
	"#endif\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef ps_main4\n"
	"void ps_main4()\n"
	"{\n"
	"    // FIXME mod and fmod are different when value are negative\n"
	"    // 	output.c = fmod(sample_c(input.t) * 255 + 0.5f, 256) / 255;\n"
	"    vec4 c = mod(sample_c() * 255.0f + 0.5f, 256.0f) / 255.0f;\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"#endif\n"
	;

static const char* interlace_glsl =
	"//#version 420 // Keep it for editor detection\n"
	"\n"
	"struct vertex_basic\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"};\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"} PSin;\n"
	"\n"
	"#define PSin_p (PSin.p)\n"
	"#define PSin_t (PSin.t)\n"
	"\n"
	"#ifdef FRAGMENT_SHADER\n"
	"\n"
	"layout(location = 0) out vec4 SV_Target0;\n"
	"\n"
	"layout(std140, binding = 11) uniform cb11\n"
	"{\n"
	"    vec2 ZrH;\n"
	"    float hH;\n"
	"};\n"
	"\n"
	"#ifdef ENABLE_BINDLESS_TEX\n"
	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
	"#else\n"
	"layout(binding = 0) uniform sampler2D TextureSampler;\n"
	"#endif\n"
	"\n"
	"// TODO ensure that clip (discard) is < 0 and not <= 0 ???\n"
	"void ps_main0()\n"
	"{\n"
	"    if (fract(PSin_t.y * hH) - 0.5 < 0.0)\n"
	"        discard;\n"
	"    // I'm not sure it impact us but be safe to lookup texture before conditional if\n"
	"    // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control\n"
	"    vec4 c = texture(TextureSampler, PSin_t);\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"\n"
	"void ps_main1()\n"
	"{\n"
	"    if (0.5 - fract(PSin_t.y * hH) < 0.0)\n"
	"        discard;\n"
	"    // I'm not sure it impact us but be safe to lookup texture before conditional if\n"
	"    // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control\n"
	"    vec4 c = texture(TextureSampler, PSin_t);\n"
	"\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"\n"
	"void ps_main2()\n"
	"{\n"
	"    vec4 c0 = texture(TextureSampler, PSin_t - ZrH);\n"
	"    vec4 c1 = texture(TextureSampler, PSin_t);\n"
	"    vec4 c2 = texture(TextureSampler, PSin_t + ZrH);\n"
	"\n"
	"    SV_Target0 = (c0 + c1 * 2.0f + c2) / 4.0f;\n"
	"}\n"
	"\n"
	"void ps_main3()\n"
	"{\n"
	"    SV_Target0 = texture(TextureSampler, PSin_t);\n"
	"}\n"
	"\n"
	"#endif\n"
	;

static const char* merge_glsl =
	"//#version 420 // Keep it for editor detection\n"
	"\n"
	"struct vertex_basic\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"};\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"} PSin;\n"
	"\n"
	"#define PSin_p (PSin.p)\n"
	"#define PSin_t (PSin.t)\n"
	"\n"
	"#ifdef FRAGMENT_SHADER\n"
	"\n"
	"layout(location = 0) out vec4 SV_Target0;\n"
	"\n"
	"layout(std140, binding = 10) uniform cb10\n"
	"{\n"
	"    vec4 BGColor;\n"
	"};\n"
	"\n"
	"#ifdef ENABLE_BINDLESS_TEX\n"
	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
	"#else\n"
	"layout(binding = 0) uniform sampler2D TextureSampler;\n"
	"#endif\n"
	"\n"
	"void ps_main0()\n"
	"{\n"
	"    vec4 c = texture(TextureSampler, PSin_t);\n"
	"	c.a = min(c.a * 2.0, 1.0);\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"\n"
	"void ps_main1()\n"
	"{\n"
	"    vec4 c = texture(TextureSampler, PSin_t);\n"
	"	c.a = BGColor.a;\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"\n"
	"#endif\n"
	;

static const char* shadeboost_glsl =
	"//#version 420 // Keep it for editor detection\n"
	"\n"
	"/*\n"
	"** Contrast, saturation, brightness\n"
	"** Code of this function is from TGM's shader pack\n"
	"** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057\n"
	"*/\n"
	"\n"
	"struct vertex_basic\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"};\n"
	"\n"
	"#ifdef FRAGMENT_SHADER\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"} PSin;\n"
	"\n"
	"#define PSin_p (PSin.p)\n"
	"#define PSin_t (PSin.t)\n"
	"\n"
	"layout(location = 0) out vec4 SV_Target0;\n"
	"\n"
	"layout(std140, binding = 12) uniform cb12\n"
	"{\n"
	"    vec4 BGColor;\n"
	"};\n"
	"\n"
	"#ifdef ENABLE_BINDLESS_TEX\n"
	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
	"#else\n"
	"layout(binding = 0) uniform sampler2D TextureSampler;\n"
	"#endif\n"
	"\n"
	"// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% \n"
	"vec4 ContrastSaturationBrightness(vec4 color)\n"
	"{\n"
	"	const float sat = SB_SATURATION / 50.0;\n"
	"	const float brt = SB_BRIGHTNESS / 50.0;\n"
	"	const float con = SB_CONTRAST / 50.0;\n"
	"	\n"
	"	// Increase or decrease these values to adjust r, g and b color channels separately\n"
	"	const float AvgLumR = 0.5;\n"
	"	const float AvgLumG = 0.5;\n"
	"	const float AvgLumB = 0.5;\n"
	"	\n"
	"	const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721);\n"
	"	\n"
	"	vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB);\n"
	"	vec3 brtColor = color.rgb * brt;\n"
	"    float dot_intensity = dot(brtColor, LumCoeff);\n"
	"	vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity);\n"
	"	vec3 satColor = mix(intensity, brtColor, sat);\n"
	"	vec3 conColor = mix(AvgLumin, satColor, con);\n"
	"\n"
	"	color.rgb = conColor;	\n"
	"	return color;\n"
	"}\n"
	"\n"
	"\n"
	"void ps_main()\n"
	"{\n"
	"    vec4 c = texture(TextureSampler, PSin_t);\n"
	"	SV_Target0 = ContrastSaturationBrightness(c);\n"
	"}\n"
	"\n"
	"\n"
	"#endif\n"
	;

static const char* tfx_glsl =
	"//#version 420 // Keep it for text editor detection\n"
	"\n"
	"// note lerp => mix\n"
	"\n"
	"#define FMT_32 0\n"
	"#define FMT_24 1\n"
	"#define FMT_16 2\n"
	"#define FMT_PAL 4 /* flag bit */\n"
	"\n"
	"// Not sure we have same issue on opengl. Doesn't work anyway on ATI card\n"
	"// And I say this as an ATI user.\n"
	"#define ATI_SUCKS 0\n"
	"\n"
	"#ifndef VS_BPPZ\n"
	"#define VS_BPPZ 0\n"
	"#define VS_TME 1\n"
	"#define VS_FST 1\n"
	"#define VS_LOGZ 0\n"
	"#endif\n"
	"\n"
	"#ifndef PS_FST\n"
	"#define PS_FST 0\n"
	"#define PS_WMS 0\n"
	"#define PS_WMT 0\n"
	"#define PS_FMT FMT_32\n"
	"#define PS_AEM 0\n"
	"#define PS_TFX 0\n"
	"#define PS_TCC 1\n"
	"#define PS_ATST 1\n"
	"#define PS_FOG 0\n"
	"#define PS_CLR1 0\n"
	"#define PS_FBA 0\n"
	"#define PS_AOUT 0\n"
	"#define PS_LTF 1\n"
	"#define PS_COLCLIP 0\n"
	"#define PS_DATE 0\n"
	"#define PS_SPRITEHACK 0\n"
	"#define PS_POINT_SAMPLER 0\n"
	"#define PS_TCOFFSETHACK 0\n"
	"#define PS_IIP 1\n"
	"#endif\n"
	"\n"
	"struct vertex\n"
	"{\n"
	"    vec4 t;\n"
	"    vec4 c;\n"
	"	vec4 fc;\n"
	"};\n"
	"\n"
	"#ifdef VERTEX_SHADER\n"
	"layout(location = 0) in vec2  i_st;\n"
	"layout(location = 2) in vec4  i_c;\n"
	"layout(location = 3) in float i_q;\n"
	"layout(location = 4) in uvec2 i_p;\n"
	"layout(location = 5) in uint  i_z;\n"
	"layout(location = 6) in uvec2 i_uv;\n"
	"layout(location = 7) in vec4  i_f;\n"
	"\n"
	"out SHADER\n"
	"{\n"
	"    vec4 t;\n"
	"    vec4 c;\n"
	"	flat vec4 fc;\n"
	"} VSout;\n"
	"\n"
	"#define VSout_t (VSout.t)\n"
	"#define VSout_c (VSout.c)\n"
	"#define VSout_fc (VSout.fc)\n"
	"\n"
	"out gl_PerVertex {\n"
	"    invariant vec4 gl_Position;\n"
	"    float gl_PointSize;\n"
	"#if !pGL_ES\n"
	"    float gl_ClipDistance[];\n"
	"#endif\n"
	"};\n"
	"\n"
	"layout(std140, binding = 20) uniform cb20\n"
	"{\n"
	"    vec2 VertexScale;\n"
	"    vec2 VertexOffset;\n"
	"    vec2 TextureScale;\n"
	"};\n"
	"\n"
	"#ifdef ZERO_TO_ONE_DEPTH\n"
	"const float exp_min32 = exp2(-32.0f);\n"
	"#else\n"
	"const float exp_min31 = exp2(-31.0f);\n"
	"#endif\n"
	"\n"
	"#ifdef SUBROUTINE_GL40\n"
	"// Function pointer type\n"
	"subroutine void TextureCoordType(void);\n"
	"\n"
	"// a function pointer variable\n"
	"layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n"
	"\n"
	"layout(index = 0) subroutine(TextureCoordType)\n"
	"void tme_0()\n"
	"{\n"
	"    VSout_t.xy = vec2(0.0f, 0.0f);\n"
	"    VSout_t.w = 1.0f;\n"
	"}\n"
	"\n"
	"layout(index = 1) subroutine(TextureCoordType)\n"
	"void tme_1_fst_0()\n"
	"{\n"
	"    VSout_t.xy = i_st;\n"
	"    VSout_t.w = i_q;\n"
	"}\n"
	"\n"
	"layout(index = 2) subroutine(TextureCoordType)\n"
	"void tme_1_fst_1()\n"
	"{\n"
	"    VSout_t.xy = vec2(i_uv) * TextureScale;\n"
	"    VSout_t.w = 1.0f;\n"
	"}\n"
	"\n"
	"#else\n"
	"\n"
	"void texture_coord()\n"
	"{\n"
	"    if(VS_TME != 0)\n"
	"    {\n"
	"        if(VS_FST != 0)\n"
	"        {\n"
	"            VSout_t.xy = vec2(i_uv) * TextureScale;\n"
	"            VSout_t.w = 1.0f;\n"
	"        }\n"
	"        else\n"
	"        {\n"
	"            VSout_t.xy = i_st;\n"
	"            VSout_t.w = i_q;\n"
	"        }\n"
	"    }\n"
	"    else\n"
	"    {\n"
	"        VSout_t.xy = vec2(0.0f, 0.0f);\n"
	"        VSout_t.w = 1.0f;\n"
	"    }\n"
	"}\n"
	"\n"
	"#endif\n"
	"\n"
	"void vs_main()\n"
	"{\n"
	"    highp uint z;\n"
	"    if(VS_BPPZ == 1) // 24\n"
	"        z = i_z & uint(0xffffff);\n"
	"    else if(VS_BPPZ == 2) // 16\n"
	"        z = i_z & uint(0xffff);\n"
	"    else\n"
	"        z = i_z;\n"
	"\n"
	"    // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n"
	"    // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n"
	"    // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel\n"
	"    // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133\n"
	"    vec4 p;\n"
	"\n"
	"    p.xy = vec2(i_p) - vec2(0.05f, 0.05f);\n"
	"    p.xy = p.xy * VertexScale - VertexOffset;\n"
	"    p.w = 1.0f;\n"
	"#ifdef ZERO_TO_ONE_DEPTH\n"
	"    if(VS_LOGZ == 1) {\n"
	"        p.z = log2(float(1u+z)) / 32.0f;\n"
	"    } else {\n"
	"        p.z = float(z) * exp_min32;\n"
	"    }\n"
	"#else\n"
	"    if(VS_LOGZ == 1) {\n"
	"        p.z = log2(float(1u+z)) / 31.0f - 1.0f;\n"
	"    } else {\n"
	"        p.z = float(z) * exp_min31 - 1.0f;\n"
	"    }\n"
	"#endif\n"
	"\n"
	"    gl_Position = p;\n"
	"\n"
	"    texture_coord();\n"
	"\n"
	"    VSout_c = i_c;\n"
	"	VSout_fc = i_c;\n"
	"    VSout_t.z = i_f.r;\n"
	"}\n"
	"\n"
	"#endif\n"
	"\n"
	"#ifdef GEOMETRY_SHADER\n"
	"in gl_PerVertex {\n"
	"    invariant vec4 gl_Position;\n"
	"    float gl_PointSize;\n"
	"#if !pGL_ES\n"
	"    float gl_ClipDistance[];\n"
	"#endif\n"
	"} gl_in[];\n"
	"//in int gl_PrimitiveIDIn;\n"
	"\n"
	"out gl_PerVertex {\n"
	"    vec4 gl_Position;\n"
	"    float gl_PointSize;\n"
	"#if !pGL_ES\n"
	"    float gl_ClipDistance[];\n"
	"#endif\n"
	"};\n"
	"//out int gl_PrimitiveID;\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 t;\n"
	"    vec4 c;\n"
	"    flat vec4 fc;\n"
	"} GSin[];\n"
	"\n"
	"out SHADER\n"
	"{\n"
	"    vec4 t;\n"
	"    vec4 c;\n"
	"    flat vec4 fc;\n"
	"} GSout;\n"
	"\n"
	"void out_vertex(in vertex v)\n"
	"{\n"
	"    GSout.t = v.t;\n"
	"    GSout.c = v.c;\n"
	"    GSout.fc = v.fc;\n"
	"    gl_PrimitiveID = gl_PrimitiveIDIn;\n"
	"    EmitVertex();\n"
	"}\n"
	"\n"
	"layout(lines) in;\n"
	"layout(triangle_strip, max_vertices = 6) out;\n"
	"\n"
	"void gs_main()\n"
	"{\n"
	"    // left top     => GSin[0];\n"
	"    // right bottom => GSin[1];\n"
	"    vertex rb = vertex(GSin[1].t, GSin[1].c, GSin[1].fc);\n"
	"    vertex lt = vertex(GSin[0].t, GSin[0].c, GSin[0].fc);\n"
	"\n"
	"    vec4 rb_p = gl_in[1].gl_Position;\n"
	"    vec4 lb_p = gl_in[1].gl_Position;\n"
	"    vec4 rt_p = gl_in[1].gl_Position;\n"
	"    vec4 lt_p = gl_in[0].gl_Position;\n"
	"\n"
	"    // flat depth\n"
	"    lt_p.z = rb_p.z;\n"
	"    // flat fog and texture perspective\n"
	"    lt.t.zw = rb.t.zw;\n"
	"    // flat color\n"
	"    lt.c = rb.c;\n"
	"\n"
	"	// Swap texture and position coordinate\n"
	"    vertex lb = rb;\n"
	"    lb_p.x = lt_p.x;\n"
	"    lb.t.x = lt.t.x;\n"
	"\n"
	"    vertex rt = rb;\n"
	"    rt_p.y = lt_p.y;\n"
	"    rt.t.y = lt.t.y;\n"
	"\n"
	"    // Triangle 1\n"
	"    gl_Position = lt_p;\n"
	"    out_vertex(lt);\n"
	"\n"
	"    gl_Position = lb_p;\n"
	"    out_vertex(lb);\n"
	"\n"
	"    gl_Position = rt_p;\n"
	"    out_vertex(rt);\n"
	"\n"
	"    EndPrimitive();\n"
	"\n"
	"    // Triangle 2\n"
	"    gl_Position = lb_p;\n"
	"    out_vertex(lb);\n"
	"\n"
	"    gl_Position = rt_p;\n"
	"    out_vertex(rt);\n"
	"\n"
	"    gl_Position = rb_p;\n"
	"    out_vertex(rb);\n"
	"\n"
	"    EndPrimitive();\n"
	"}\n"
	"\n"
	"#endif\n"
	"\n"
	"#ifdef FRAGMENT_SHADER\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 t;\n"
	"    vec4 c;\n"
	"    flat vec4 fc;\n"
	"} PSin;\n"
	"\n"
	"#define PSin_t (PSin.t)\n"
	"#define PSin_c (PSin.c)\n"
	"#define PSin_fc (PSin.fc)\n"
	"\n"
	"// Same buffer but 2 colors for dual source blending\n"
	"#if pGL_ES\n"
	"layout(location = 0) out vec4 SV_Target0;\n"
	"#else\n"
	"layout(location = 0, index = 0) out vec4 SV_Target0;\n"
	"layout(location = 0, index = 1) out vec4 SV_Target1;\n"
	"#endif\n"
	"\n"
	"#ifdef ENABLE_BINDLESS_TEX\n"
	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
	"layout(bindless_sampler, location = 1) uniform sampler2D PaletteSampler;\n"
	"#else\n"
	"layout(binding = 0) uniform sampler2D TextureSampler;\n"
	"layout(binding = 1) uniform sampler2D PaletteSampler;\n"
	"#endif\n"
	"\n"
	"#ifndef DISABLE_GL42_image\n"
	"#if PS_DATE > 0\n"
	"// FIXME how to declare memory access\n"
	"layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;\n"
	"#endif\n"
	"#else\n"
	"// use basic stencil\n"
	"#endif\n"
	"\n"
	"#ifndef DISABLE_GL42_image\n"
	"#if PS_DATE > 0\n"
	"// origin_upper_left\n"
	"layout(pixel_center_integer) in vec4 gl_FragCoord;\n"
	"//in int gl_PrimitiveID;\n"
	"#endif\n"
	"#endif\n"
	"\n"
	"layout(std140, binding = 21) uniform cb21\n"
	"{\n"
	"    vec3 FogColor;\n"
	"    float AREF;\n"
	"    vec4 WH;\n"
	"    vec2 MinF;\n"
	"    vec2 TA;\n"
	"    uvec4 MskFix;\n"
	"    vec4 HalfTexel;\n"
	"    vec4 MinMax;\n"
	"    vec4 TC_OffsetHack;\n"
	"};\n"
	"\n"
	"vec4 sample_c(vec2 uv)\n"
	"{\n"
	"    // FIXME: check the issue on openGL\n"
	"	if (ATI_SUCKS == 1 && PS_POINT_SAMPLER == 1)\n"
	"	{\n"
	"		// Weird issue with ATI cards (happens on at least HD 4xxx and 5xxx),\n"
	"		// it looks like they add 127/128 of a texel to sampling coordinates\n"
	"		// occasionally causing point sampling to erroneously round up.\n"
	"		// I'm manually adjusting coordinates to the centre of texels here,\n"
	"		// though the centre is just paranoia, the top left corner works fine.\n"
	"		uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;\n"
	"	}\n"
	"\n"
	"    return texture(TextureSampler, uv);\n"
	"}\n"
	"\n"
	"vec4 sample_p(float u)\n"
	"{\n"
	"    //FIXME do we need a 1D sampler. Big impact on opengl to find 1 dim\n"
	"    // So for the moment cheat with 0.0f dunno if it work\n"
	"    return texture(PaletteSampler, vec2(u, 0.0f));\n"
	"}\n"
	"\n"
	"#if 0\n"
	"vec4 sample_rt(vec2 uv)\n"
	"{\n"
	"    return texture(RTCopySampler, uv);\n"
	"}\n"
	"#endif\n"
	"\n"
	"// FIXME crash nvidia\n"
	"#if 0\n"
	"// Function pointer type\n"
	"subroutine vec4 WrapType(vec4 uv);\n"
	"\n"
	"// a function pointer variable\n"
	"layout(location = 4) subroutine uniform WrapType wrapuv;\n"
	"\n"
	"layout(index = 24) subroutine(WrapType)\n"
	"vec4 wrapuv_wms_wmt_2(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 25) subroutine(WrapType)\n"
	"vec4 wrapuv_wms_wmt3(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 26) subroutine(WrapType)\n"
	"vec4 wrapuv_wms2_wmt3(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
	"    uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 27) subroutine(WrapType)\n"
	"vec4 wrapuv_wms3_wmt2(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
	"    uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 28) subroutine(WrapType)\n"
	"vec4 wrapuv_wms2_wmtx(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 29) subroutine(WrapType)\n"
	"vec4 wrapuv_wmsx_wmt3(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 30) subroutine(WrapType)\n"
	"vec4 wrapuv_wms3_wmtx(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 31) subroutine(WrapType)\n"
	"vec4 wrapuv_wmsx_wmt2(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"    uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 32) subroutine(WrapType)\n"
	"vec4 wrapuv_dummy(vec4 uv)\n"
	"{\n"
	"    return uv;\n"
	"}\n"
	"\n"
	"#else\n"
	"vec4 wrapuv(vec4 uv)\n"
	"{\n"
	"    vec4 uv_out = uv;\n"
	"\n"
	"    if(PS_WMS == PS_WMT)\n"
	"    {\n"
	"        if(PS_WMS == 2)\n"
	"        {\n"
	"            uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
	"        }\n"
	"        else if(PS_WMS == 3)\n"
	"        {\n"
	"            uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
	"        }\n"
	"    }\n"
	"    else\n"
	"    {\n"
	"        if(PS_WMS == 2)\n"
	"        {\n"
	"            uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
	"        }\n"
	"        else if(PS_WMS == 3)\n"
	"        {\n"
	"            uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
	"        }\n"
	"        if(PS_WMT == 2)\n"
	"        {\n"
	"            uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
	"        }\n"
	"        else if(PS_WMT == 3)\n"
	"        {\n"
	"            uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
	"        }\n"
	"    }\n"
	"\n"
	"    return uv_out;\n"
	"}\n"
	"#endif\n"
	"\n"
	"// FIXME crash nvidia\n"
	"#if 0\n"
	"// Function pointer type\n"
	"subroutine vec2 ClampType(vec2 uv);\n"
	"\n"
	"// a function pointer variable\n"
	"layout(location = 3) subroutine uniform ClampType clampuv;\n"
	"\n"
	"layout(index = 20) subroutine(ClampType)\n"
	"vec2 clampuv_wms2_wmt2(vec2 uv)\n"
	"{\n"
	"    return clamp(uv, MinF, MinMax.zw);\n"
	"}\n"
	"\n"
	"layout(index = 21) subroutine(ClampType)\n"
	"vec2 clampuv_wms2(vec2 uv)\n"
	"{\n"
	"    vec2 uv_out = uv;\n"
	"    uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 22) subroutine(ClampType)\n"
	"vec2 clampuv_wmt2(vec2 uv)\n"
	"{\n"
	"    vec2 uv_out = uv;\n"
	"    uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n"
	"    return uv_out;\n"
	"}\n"
	"\n"
	"layout(index = 23) subroutine(ClampType)\n"
	"vec2 clampuv_dummy(vec2 uv)\n"
	"{\n"
	"    return uv;\n"
	"}\n"
	"\n"
	"#else\n"
	"vec2 clampuv(vec2 uv)\n"
	"{\n"
	"    vec2 uv_out = uv;\n"
	"\n"
	"    if(PS_WMS == 2 && PS_WMT == 2) \n"
	"    {\n"
	"        uv_out = clamp(uv, MinF, MinMax.zw);\n"
	"    }\n"
	"    else if(PS_WMS == 2)\n"
	"    {\n"
	"        uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n"
	"    }\n"
	"    else if(PS_WMT == 2)\n"
	"    {\n"
	"        uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n"
	"    }\n"
	"\n"
	"    return uv_out;\n"
	"}\n"
	"#endif\n"
	"\n"
	"mat4 sample_4c(vec4 uv)\n"
	"{\n"
	"    mat4 c;\n"
	"\n"
	"    c[0] = sample_c(uv.xy);\n"
	"    c[1] = sample_c(uv.zy);\n"
	"    c[2] = sample_c(uv.xw);\n"
	"    c[3] = sample_c(uv.zw);\n"
	"\n"
	"    return c;\n"
	"}\n"
	"\n"
	"vec4 sample_4a(vec4 uv)\n"
	"{\n"
	"    vec4 c;\n"
	"\n"
	"    // Dx used the alpha channel.\n"
	"    // Opengl is only 8 bits on red channel.\n"
	"    c.x = sample_c(uv.xy).r;\n"
	"    c.y = sample_c(uv.zy).r;\n"
	"    c.z = sample_c(uv.xw).r;\n"
	"    c.w = sample_c(uv.zw).r;\n"
	"\n"
	"	return c * 255.0/256.0 + 0.5/256.0;\n"
	"}\n"
	"\n"
	"mat4 sample_4p(vec4 u)\n"
	"{\n"
	"    mat4 c;\n"
	"\n"
	"    c[0] = sample_p(u.x);\n"
	"    c[1] = sample_p(u.y);\n"
	"    c[2] = sample_p(u.z);\n"
	"    c[3] = sample_p(u.w);\n"
	"\n"
	"    return c;\n"
	"}\n"
	"\n"
	"vec4 sample_color(vec2 st, float q)\n"
	"{\n"
	"    if(PS_FST == 0) st /= q;\n"
	"\n"
	"    if(PS_TCOFFSETHACK == 1) st += TC_OffsetHack.xy;\n"
	"\n"
	"    vec4 t;\n"
	"    mat4 c;\n"
	"    vec2 dd;\n"
	"\n"
	"    if (PS_LTF == 0 && PS_FMT <= FMT_16 && PS_WMS < 3 && PS_WMT < 3)\n"
	"    {\n"
	"        c[0] = sample_c(clampuv(st));\n"
	"    }\n"
	"    else\n"
	"    {\n"
	"        vec4 uv;\n"
	"\n"
	"        if(PS_LTF != 0)\n"
	"        {\n"
	"            uv = st.xyxy + HalfTexel;\n"
	"            dd = fract(uv.xy * WH.zw);\n"
	"        }\n"
	"        else\n"
	"        {\n"
	"            uv = st.xyxy;\n"
	"        }\n"
	"\n"
	"        uv = wrapuv(uv);\n"
	"\n"
	"        if((PS_FMT & FMT_PAL) != 0)\n"
	"        {\n"
	"            c = sample_4p(sample_4a(uv));\n"
	"        }\n"
	"        else\n"
	"        {\n"
	"            c = sample_4c(uv);\n"
	"        }\n"
	"    }\n"
	"\n"
	"    // PERF: see the impact of the exansion before/after the interpolation\n"
	"    for (int i = 0; i < 4; i++)\n"
	"    {\n"
	"        if((PS_FMT & ~FMT_PAL) == FMT_24)\n"
	"        {\n"
	"            // FIXME GLSL any only support bvec so try to mix it with notEqual\n"
	"            bvec3 rgb_check = notEqual( c[i].rgb, vec3(0.0f, 0.0f, 0.0f) );\n"
	"            c[i].a = ( (PS_AEM == 0) || any(rgb_check)  ) ? TA.x : 0.0f;\n"
	"        }\n"
	"        else if((PS_FMT & ~FMT_PAL) == FMT_16)\n"
	"        {\n"
	"            // FIXME GLSL any only support bvec so try to mix it with notEqual\n"
	"            bvec3 rgb_check = notEqual( c[i].rgb, vec3(0.0f, 0.0f, 0.0f) );\n"
	"            c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f;\n"
	"        }\n"
	"    }\n"
	"\n"
	"    if(PS_LTF != 0)\n"
	"    {\n"
	"        t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);\n"
	"    }\n"
	"    else\n"
	"    {\n"
	"        t = c[0];\n"
	"    }\n"
	"\n"
	"    return t;\n"
	"}\n"
	"\n"
	"#ifdef SUBROUTINE_GL40\n"
	"// Function pointer type\n"
	"subroutine vec4 TfxType(vec4 t, vec4 c);\n"
	"\n"
	"// a function pointer variable\n"
	"layout(location = 2) subroutine uniform TfxType tfx;\n"
	"\n"
	"layout(index = 11) subroutine(TfxType)\n"
	"vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 12) subroutine(TfxType)\n"
	"vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out.rgb = t.rgb;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 13) subroutine(TfxType)\n"
	"vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 14) subroutine(TfxType)\n"
	"vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 15) subroutine(TfxType)\n"
	"vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out = c * t * 255.0f / 128.0f;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 16) subroutine(TfxType)\n"
	"vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out = t;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 17) subroutine(TfxType)\n"
	"vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
	"    c_out.a += t.a;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 18) subroutine(TfxType)\n"
	"vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
	"    c_out.a = t.a;\n"
	"    return c_out;\n"
	"}\n"
	"\n"
	"layout(index = 19) subroutine(TfxType)\n"
	"vec4 tfx_dummy(vec4 t, vec4 c)\n"
	"{\n"
	"    return c;\n"
	"}\n"
	"\n"
	"#else\n"
	"vec4 tfx(vec4 t, vec4 c)\n"
	"{\n"
	"    vec4 c_out = c;\n"
	"    if(PS_TFX == 0)\n"
	"    {\n"
	"        if(PS_TCC != 0) \n"
	"        {\n"
	"            c_out = c * t * 255.0f / 128.0f;\n"
	"        }\n"
	"        else\n"
	"        {\n"
	"            c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n"
	"        }\n"
	"    }\n"
	"    else if(PS_TFX == 1)\n"
	"    {\n"
	"        if(PS_TCC != 0) \n"
	"        {\n"
	"            c_out = t;\n"
	"        }\n"
	"        else\n"
	"        {\n"
	"            c_out.rgb = t.rgb;\n"
	"        }\n"
	"    }\n"
	"    else if(PS_TFX == 2)\n"
	"    {\n"
	"        c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
	"\n"
	"        if(PS_TCC != 0) \n"
	"        {\n"
	"            c_out.a += t.a;\n"
	"        }\n"
	"    }\n"
	"    else if(PS_TFX == 3)\n"
	"    {\n"
	"        c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
	"\n"
	"        if(PS_TCC != 0) \n"
	"        {\n"
	"            c_out.a = t.a;\n"
	"        }\n"
	"    }\n"
	"\n"
	"    return c_out;\n"
	"}\n"
	"#endif\n"
	"\n"
	"\n"
	"#if 0\n"
	"void datst()\n"
	"{\n"
	"#if PS_DATE > 0\n"
	"    float alpha = sample_rt(PSin_tp.xy).a;\n"
	"    float alpha0x80 = 128.0 / 255;\n"
	"\n"
	"    if (PS_DATE == 1 && alpha >= alpha0x80)\n"
	"        discard;\n"
	"    else if (PS_DATE == 2 && alpha < alpha0x80)\n"
	"        discard;\n"
	"#endif\n"
	"}\n"
	"#endif\n"
	"\n"
	"#ifdef SUBROUTINE_GL40\n"
	"// Function pointer type\n"
	"subroutine void AlphaTestType(vec4 c);\n"
	"\n"
	"// a function pointer variable\n"
	"layout(location = 0) subroutine uniform AlphaTestType atst;\n"
	"\n"
	"layout(index = 0) subroutine(AlphaTestType)\n"
	"void atest_never(vec4 c)\n"
	"{\n"
	"    discard;\n"
	"}\n"
	"\n"
	"layout(index = 1) subroutine(AlphaTestType)\n"
	"void atest_always(vec4 c)\n"
	"{\n"
	"    // Nothing to do\n"
	"}\n"
	"\n"
	"layout(index = 2) subroutine(AlphaTestType)\n"
	"void atest_l(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"    if (PS_SPRITEHACK == 0)\n"
	"        if ((AREF - a - 0.5f) < 0.0f)\n"
	"            discard;\n"
	"}\n"
	"\n"
	"layout(index = 3) subroutine(AlphaTestType)\n"
	"void atest_le(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"    if ((AREF - a + 0.5f) < 0.0f)\n"
	"        discard;\n"
	"}\n"
	"\n"
	"layout(index = 4) subroutine(AlphaTestType)\n"
	"void atest_e(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"    if ((0.5f - abs(a - AREF)) < 0.0f)\n"
	"        discard;\n"
	"}\n"
	"\n"
	"layout(index = 5) subroutine(AlphaTestType)\n"
	"void atest_ge(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"    if ((a-AREF + 0.5f) < 0.0f)\n"
	"        discard;\n"
	"}\n"
	"\n"
	"layout(index = 6) subroutine(AlphaTestType)\n"
	"void atest_g(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"    if ((a-AREF - 0.5f) < 0.0f)\n"
	"        discard;\n"
	"}\n"
	"\n"
	"layout(index = 7) subroutine(AlphaTestType)\n"
	"void atest_ne(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"    if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
	"        discard;\n"
	"}\n"
	"\n"
	"#else\n"
	"void atst(vec4 c)\n"
	"{\n"
	"    float a = trunc(c.a * 255.0 + 0.01);\n"
	"\n"
	"    if(PS_ATST == 0) // never\n"
	"    {\n"
	"        discard;\n"
	"    }\n"
	"    else if(PS_ATST == 1) // always\n"
	"    {\n"
	"        // nothing to do\n"
	"    }\n"
	"    else if(PS_ATST == 2 ) // l\n"
	"    {\n"
	"        if (PS_SPRITEHACK == 0)\n"
	"            if ((AREF - a - 0.5f) < 0.0f)\n"
	"                discard;\n"
	"    }\n"
	"    else if(PS_ATST == 3 ) // le\n"
	"    {\n"
	"        if ((AREF - a + 0.5f) < 0.0f)\n"
	"            discard;\n"
	"    }\n"
	"    else if(PS_ATST == 4) // e\n"
	"    {\n"
	"        if ((0.5f - abs(a - AREF)) < 0.0f)\n"
	"            discard;\n"
	"    }\n"
	"    else if(PS_ATST == 5) // ge\n"
	"    {\n"
	"        if ((a-AREF + 0.5f) < 0.0f)\n"
	"            discard;\n"
	"    }\n"
	"    else if(PS_ATST == 6) // g\n"
	"    {\n"
	"        if ((a-AREF - 0.5f) < 0.0f)\n"
	"            discard;\n"
	"    }\n"
	"    else if(PS_ATST == 7) // ne\n"
	"    {\n"
	"        if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
	"            discard;\n"
	"    }\n"
	"}\n"
	"#endif\n"
	"\n"
	"// Note layout stuff might require gl4.3\n"
	"#ifdef SUBROUTINE_GL40\n"
	"// Function pointer type\n"
	"subroutine void ColClipType(inout vec4 c);\n"
	"\n"
	"// a function pointer variable\n"
	"layout(location = 1) subroutine uniform ColClipType colclip;\n"
	"\n"
	"layout(index = 8) subroutine(ColClipType)\n"
	"void colclip_0(inout vec4 c)\n"
	"{\n"
	"	// nothing to do\n"
	"}\n"
	"\n"
	"layout(index = 9) subroutine(ColClipType)\n"
	"void colclip_1(inout vec4 c)\n"
	"{\n"
	"	// FIXME !!!!\n"
	"	//c.rgb *= c.rgb < 128./255;\n"
	"	bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
	"	c.rgb *= vec3(factor);\n"
	"}\n"
	"\n"
	"layout(index = 10) subroutine(ColClipType)\n"
	"void colclip_2(inout vec4 c)\n"
	"{\n"
	"	c.rgb = 256.0f/255.0f - c.rgb;\n"
	"	// FIXME !!!!\n"
	"	//c.rgb *= c.rgb < 128./255;\n"
	"	bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
	"	c.rgb *= vec3(factor);\n"
	"}\n"
	"\n"
	"#else\n"
	"void colclip(inout vec4 c)\n"
	"{\n"
	"    if (PS_COLCLIP == 2)\n"
	"    {\n"
	"        c.rgb = 256.0f/255.0f - c.rgb;\n"
	"    }\n"
	"    if (PS_COLCLIP > 0)\n"
	"    {\n"
	"        // FIXME !!!!\n"
	"        //c.rgb *= c.rgb < 128./255;\n"
	"        bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
	"        c.rgb *= vec3(factor);\n"
	"    }\n"
	"}\n"
	"#endif\n"
	"\n"
	"void fog(inout vec4 c, float f)\n"
	"{\n"
	"    if(PS_FOG != 0)\n"
	"    {\n"
	"        c.rgb = mix(FogColor, c.rgb, f);\n"
	"    }\n"
	"}\n"
	"\n"
	"vec4 ps_color()\n"
	"{\n"
	"    vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n"
	"\n"
	"    vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);\n"
	"    vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);\n"
	"#if PS_IIP == 1\n"
	"    vec4 c = clamp(tfx(t, PSin_c), zero, one);\n"
	"#else\n"
	"    vec4 c = clamp(tfx(t, PSin_fc), zero, one);\n"
	"#endif\n"
	"\n"
	"    atst(c);\n"
	"\n"
	"    fog(c, PSin_t.z);\n"
	"\n"
	"	colclip(c);\n"
	"\n"
	"    if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
	"    {\n"
	"        c.rgb = vec3(1.0f, 1.0f, 1.0f); \n"
	"    }\n"
	"\n"
	"    return c;\n"
	"}\n"
	"\n"
	"#if pGL_ES\n"
	"void ps_main()\n"
	"{\n"
	"    vec4 c = ps_color();\n"
	"    c.a *= 2.0;\n"
	"    SV_Target0 = c;\n"
	"}\n"
	"#endif\n"
	"\n"
	"#if !pGL_ES\n"
	"void ps_main()\n"
	"{\n"
	"#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n"
	"    int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n"
	"    // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n"
	"    // the bad alpha value so we must keep it.\n"
	"\n"
	"	if (gl_PrimitiveID > stencil_ceil) {\n"
	"		discard;\n"
	"	}\n"
	"#endif\n"
	"\n"
	"    vec4 c = ps_color();\n"
	"\n"
	"    float alpha = c.a * 2.0;\n"
	"\n"
	"    if(PS_AOUT != 0) // 16 bit output\n"
	"    {\n"
	"        float a = 128.0f / 255.0; // alpha output will be 0x80\n"
	"\n"
	"        c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n"
	"    }\n"
	"    else if(PS_FBA != 0)\n"
	"    {\n"
	"        if(c.a < 0.5) c.a += 0.5;\n"
	"    }\n"
	"\n"
	"    // Get first primitive that will write a failling alpha value\n"
	"#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n"
	"    // DATM == 0\n"
	"    // Pixel with alpha equal to 1 will failed\n"
	"    if (c.a > 127.5f / 255.0f) {\n"
	"        imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
	"    }\n"
	"    //memoryBarrier();\n"
	"#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n"
	"    // DATM == 1\n"
	"    // Pixel with alpha equal to 0 will failed\n"
	"    if (c.a < 127.5f / 255.0f) {\n"
	"        imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
	"    }\n"
	"#endif\n"
	"\n"
	"\n"
	"#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image)\n"
	"    // Don't write anything on the framebuffer\n"
	"    // Note: you can't use discard because it will also drop\n"
	"    // image operation\n"
	"#else\n"
	"    SV_Target0 = c;\n"
	"    SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n"
	"#endif\n"
	"\n"
	"}\n"
	"#endif // !pGL_ES\n"
	"\n"
	"#endif\n"
	;

static const char* fxaa_fx =
	"#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130)\n"
	"\n"
	"#ifndef FXAA_GLSL_130\n"
	"    #define FXAA_GLSL_130 0\n"
	"#endif\n"
	"\n"
	"#define UHQ_FXAA 1          //High Quality Fast Approximate Anti Aliasing. Adapted for GSdx from Timothy Lottes FXAA 3.11.\n"
	"#define FxaaSubpixMax 0.0   //[0.00 to 1.00] Amount of subpixel aliasing removal. 0.00: Edge only antialiasing (no blurring)\n"
	"#define FxaaEarlyExit 1     //[0 or 1] Use Fxaa early exit pathing. When disabled, the entire scene is antialiased(FSAA). 0 is off, 1 is on.\n"
	"\n"
	"/*------------------------------------------------------------------------------\n"
	"							 [GLOBALS|FUNCTIONS]\n"
	"------------------------------------------------------------------------------*/\n"
	"#if (FXAA_GLSL_130 == 1)\n"
	"\n"
	"struct vertex_basic\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"};\n"
	"\n"
	"#ifdef ENABLE_BINDLESS_TEX\n"
	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
	"#else\n"
	"layout(binding = 0) uniform sampler2D TextureSampler;\n"
	"#endif\n"
	"\n"
	"in SHADER\n"
	"{\n"
	"    vec4 p;\n"
	"    vec2 t;\n"
	"} PSin;\n"
	"\n"
	"layout(location = 0) out vec4 SV_Target0;\n"
	"\n"
	"#else\n"
	"\n"
	"#if (SHADER_MODEL >= 0x400)\n"
	"Texture2D Texture : register(t0);\n"
	"SamplerState TextureSampler : register(s0);\n"
	"#else\n"
	"texture2D Texture : register(t0);\n"
	"sampler2D TextureSampler : register(s0);\n"
	"#define SamplerState sampler2D\n"
	"#endif\n"
	"\n"
	"cbuffer cb0\n"
	"{\n"
	"	float4 _rcpFrame : register(c0);\n"
	"};\n"
	"\n"
	"struct VS_INPUT\n"
	"{\n"
	"	float4 p : POSITION;\n"
	"	float2 t : TEXCOORD0;\n"
	"};\n"
	"\n"
	"struct VS_OUTPUT\n"
	"{\n"
	"	#if (SHADER_MODEL >= 0x400)\n"
	"	float4 p : SV_Position;\n"
	"	#else\n"
	"	float4 p : TEXCOORD1;\n"
	"	#endif\n"
	"	float2 t : TEXCOORD0;\n"
	"};\n"
	"\n"
	"struct PS_OUTPUT\n"
	"{\n"
	"	#if (SHADER_MODEL >= 0x400)\n"
	"	float4 c : SV_Target0;\n"
	"	#else\n"
	"	float4 c : COLOR0;\n"
	"	#endif\n"
	"};\n"
	"\n"
	"#endif\n"
	"\n"
	"/*------------------------------------------------------------------------------\n"
	"                             [FXAA CODE SECTION]\n"
	"------------------------------------------------------------------------------*/\n"
	"\n"
	"#if (SHADER_MODEL >= 0x500)\n"
	"#define FXAA_HLSL_5 1\n"
	"#define FXAA_GATHER4_ALPHA 1\n"
	"#elif (SHADER_MODEL >= 0x400)\n"
	"#define FXAA_HLSL_4 1\n"
	"#define FXAA_GATHER4_ALPHA 0\n"
	"#elif (FXAA_GLSL_130 == 1)\n"
	"#define FXAA_GATHER4_ALPHA 1\n"
	"#else\n"
	"#define FXAA_HLSL_3 1\n"
	"#define FXAA_GATHER4_ALPHA 0\n"
	"#endif\n"
	"\n"
	"#if (FXAA_HLSL_5 == 1)\n"
	"struct FxaaTex { SamplerState smpl; Texture2D tex; };\n"
	"#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n"
	"#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n"
	"#define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)\n"
	"#define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)\n"
	"#define FxaaDiscard clip(-1)\n"
	"#define FxaaSat(x) saturate(x)\n"
	"\n"
	"#elif (FXAA_HLSL_4 == 1)\n"
	"struct FxaaTex { SamplerState smpl; Texture2D tex; };\n"
	"#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n"
	"#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n"
	"#define FxaaDiscard clip(-1)\n"
	"#define FxaaSat(x) saturate(x)\n"
	"\n"
	"#elif (FXAA_HLSL_3 == 1)\n"
	"#define FxaaTex sampler2D\n"
	"#define int2 float2\n"
	"#define FxaaSat(x) saturate(x)\n"
	"#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))\n"
	"#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))\n"
	"\n"
	"#elif (FXAA_GLSL_130 == 1)\n"
	"\n"
	"#define int2 ivec2\n"
	"#define float2 vec2\n"
	"#define float3 vec3\n"
	"#define float4 vec4\n"
	"#define FxaaDiscard discard\n"
	"#define FxaaSat(x) clamp(x, 0.0, 1.0)\n"
	"#define FxaaTex sampler2D\n"
	"#define FxaaTexTop(t, p) textureLod(t, p, 0.0)\n"
	"#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)\n"
	"#if (FXAA_GATHER4_ALPHA == 1)\n"
	"// use #extension GL_ARB_gpu_shader5 : enable\n"
	"#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)\n"
	"#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)\n"
	"#endif\n"
	"\n"
	"#endif\n"
	"\n"
	"#define FxaaEdgeThreshold 0.063\n"
	"#define FxaaEdgeThresholdMin 0.00\n"
	"#define FXAA_QUALITY__P0 1.0\n"
	"#define FXAA_QUALITY__P1 1.5\n"
	"#define FXAA_QUALITY__P2 2.0\n"
	"#define FXAA_QUALITY__P3 2.0\n"
	"#define FXAA_QUALITY__P4 2.0\n"
	"#define FXAA_QUALITY__P5 2.0\n"
	"#define FXAA_QUALITY__P6 2.0\n"
	"#define FXAA_QUALITY__P7 2.0\n"
	"#define FXAA_QUALITY__P8 2.0\n"
	"#define FXAA_QUALITY__P9 2.0\n"
	"#define FXAA_QUALITY__P10 4.0\n"
	"#define FXAA_QUALITY__P11 8.0\n"
	"#define FXAA_QUALITY__P12 8.0\n"
	"\n"
	"/*------------------------------------------------------------------------------\n"
	"                        [GAMMA PREPASS CODE SECTION]\n"
	"------------------------------------------------------------------------------*/\n"
	"float RGBLuminance(float3 color)\n"
	"{\n"
	"	const float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750);\n"
	"	return dot(color.rgb, lumCoeff);\n"
	"}\n"
	"\n"
	"#if (FXAA_GLSL_130 == 0)\n"
	"#define PixelSize float2(_rcpFrame.x, _rcpFrame.y)\n"
	"#endif\n"
	"\n"
	"\n"
	"float3 RGBGammaToLinear(float3 color, float gamma)\n"
	"{\n"
	"	color = FxaaSat(color);\n"
	"	color.r = (color.r <= 0.0404482362771082) ?\n"
	"	color.r / 12.92 : pow((color.r + 0.055) / 1.055, gamma);\n"
	"	color.g = (color.g <= 0.0404482362771082) ?\n"
	"	color.g / 12.92 : pow((color.g + 0.055) / 1.055, gamma);\n"
	"	color.b = (color.b <= 0.0404482362771082) ?\n"
	"	color.b / 12.92 : pow((color.b + 0.055) / 1.055, gamma);\n"
	"\n"
	"	return color;\n"
	"}\n"
	"\n"
	"float3 LinearToRGBGamma(float3 color, float gamma)\n"
	"{\n"
	"	color = FxaaSat(color);\n"
	"	color.r = (color.r <= 0.00313066844250063) ?\n"
	"	color.r * 12.92 : 1.055 * pow(color.r, 1.0 / gamma) - 0.055;\n"
	"	color.g = (color.g <= 0.00313066844250063) ?\n"
	"	color.g * 12.92 : 1.055 * pow(color.g, 1.0 / gamma) - 0.055;\n"
	"	color.b = (color.b <= 0.00313066844250063) ?\n"
	"	color.b * 12.92 : 1.055 * pow(color.b, 1.0 / gamma) - 0.055;\n"
	"\n"
	"	return color;\n"
	"}\n"
	"\n"
	"float4 PreGammaPass(float4 color, float2 uv0)\n"
	"{\n"
	"	#if (SHADER_MODEL >= 0x400)\n"
	"		color = Texture.Sample(TextureSampler, uv0);\n"
	"    #elif (FXAA_GLSL_130 == 1)\n"
	"		color = texture(TextureSampler, uv0);\n"
	"	#else\n"
	"		color = tex2D(TextureSampler, uv0);\n"
	"	#endif\n"
	"\n"
	"	const float GammaConst = 2.233;\n"
	"	color.rgb = RGBGammaToLinear(color.rgb, GammaConst);\n"
	"	color.rgb = LinearToRGBGamma(color.rgb, GammaConst);\n"
	"	color.a = RGBLuminance(color.rgb);\n"
	"\n"
	"	return color;\n"
	"}\n"
	"\n"
	"\n"
	"/*------------------------------------------------------------------------------\n"
	"                        [FXAA CODE SECTION]\n"
	"------------------------------------------------------------------------------*/\n"
	"\n"
	"float FxaaLuma(float4 rgba)\n"
	"{ \n"
	"	rgba.w = RGBLuminance(rgba.xyz);\n"
	"	return rgba.w; \n"
	"}\n"
	"\n"
	"float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaSubpix, float fxaaEdgeThreshold, float fxaaEdgeThresholdMin)\n"
	"{\n"
	"	float2 posM;\n"
	"	posM.x = pos.x;\n"
	"	posM.y = pos.y;\n"
	"\n"
	"	#if (FXAA_GATHER4_ALPHA == 1)\n"
	"	float4 rgbyM = FxaaTexTop(tex, posM);\n"
	"	float4 luma4A = FxaaTexAlpha4(tex, posM);\n"
	"	float4 luma4B = FxaaTexOffAlpha4(tex, posM, int2(-1, -1));\n"
	"	rgbyM.w = RGBLuminance(rgbyM.xyz);\n"
	"\n"
	"	#define lumaM rgbyM.w\n"
	"	#define lumaE luma4A.z\n"
	"	#define lumaS luma4A.x\n"
	"	#define lumaSE luma4A.y\n"
	"	#define lumaNW luma4B.w\n"
	"	#define lumaN luma4B.z\n"
	"	#define lumaW luma4B.x\n"
	"    \n"
	"	#else\n"
	"	float4 rgbyM = FxaaTexTop(tex, posM);\n"
	"	rgbyM.w = RGBLuminance(rgbyM.xyz);\n"
	"	#define lumaM rgbyM.w\n"
	"\n"
	"	float lumaS = FxaaLuma(FxaaTexOff(tex, posM, int2( 0, 1), fxaaRcpFrame.xy));\n"
	"	float lumaE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 0), fxaaRcpFrame.xy));\n"
	"	float lumaN = FxaaLuma(FxaaTexOff(tex, posM, int2( 0,-1), fxaaRcpFrame.xy));\n"
	"	float lumaW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 0), fxaaRcpFrame.xy));\n"
	"	#endif\n"
	"\n"
	"	float maxSM = max(lumaS, lumaM);\n"
	"	float minSM = min(lumaS, lumaM);\n"
	"	float maxESM = max(lumaE, maxSM);\n"
	"	float minESM = min(lumaE, minSM);\n"
	"	float maxWN = max(lumaN, lumaW);\n"
	"	float minWN = min(lumaN, lumaW);\n"
	"\n"
	"	float rangeMax = max(maxWN, maxESM);\n"
	"	float rangeMin = min(minWN, minESM);\n"
	"	float range = rangeMax - rangeMin;\n"
	"	float rangeMaxScaled = rangeMax * fxaaEdgeThreshold;\n"
	"	float rangeMaxClamped = max(fxaaEdgeThresholdMin, rangeMaxScaled);\n"
	"\n"
	"	bool earlyExit = range < rangeMaxClamped;\n"
	"	#if (FxaaEarlyExit == 1)\n"
	"	if(earlyExit) { return rgbyM; }\n"
	"	#endif\n"
	"\n"
	"	#if (FXAA_GATHER4_ALPHA == 0)\n"
	"	float lumaNW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1,-1), fxaaRcpFrame.xy));\n"
	"	float lumaSE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 1), fxaaRcpFrame.xy));\n"
	"	float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy));\n"
	"	float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy));\n"
	"	#else\n"
	"	float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy));\n"
	"	float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy));\n"
	"	#endif\n"
	"\n"
	"	float lumaNS = lumaN + lumaS;\n"
	"	float lumaWE = lumaW + lumaE;\n"
	"	float subpixRcpRange = 1.0/range;\n"
	"	float subpixNSWE = lumaNS + lumaWE;\n"
	"	float edgeHorz1 = (-2.0 * lumaM) + lumaNS;\n"
	"	float edgeVert1 = (-2.0 * lumaM) + lumaWE;\n"
	"	float lumaNESE = lumaNE + lumaSE;\n"
	"	float lumaNWNE = lumaNW + lumaNE;\n"
	"	float edgeHorz2 = (-2.0 * lumaE) + lumaNESE;\n"
	"	float edgeVert2 = (-2.0 * lumaN) + lumaNWNE;\n"
	"\n"
	"	float lumaNWSW = lumaNW + lumaSW;\n"
	"	float lumaSWSE = lumaSW + lumaSE;\n"
	"	float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);\n"
	"	float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);\n"
	"	float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;\n"
	"	float edgeVert3 = (-2.0 * lumaS) + lumaSWSE;\n"
	"	float edgeHorz = abs(edgeHorz3) + edgeHorz4;\n"
	"	float edgeVert = abs(edgeVert3) + edgeVert4;\n"
	"\n"
	"	float subpixNWSWNESE = lumaNWSW + lumaNESE;\n"
	"	float lengthSign = fxaaRcpFrame.x;\n"
	"	bool horzSpan = edgeHorz >= edgeVert;\n"
	"	float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;\n"
	"	if(!horzSpan) lumaN = lumaW;\n"
	"	if(!horzSpan) lumaS = lumaE;\n"
	"	if(horzSpan) lengthSign = fxaaRcpFrame.y;\n"
	"	float subpixB = (subpixA * (1.0/12.0)) - lumaM;\n"
	"\n"
	"	float gradientN = lumaN - lumaM;\n"
	"	float gradientS = lumaS - lumaM;\n"
	"	float lumaNN = lumaN + lumaM;\n"
	"	float lumaSS = lumaS + lumaM;\n"
	"	bool pairN = abs(gradientN) >= abs(gradientS);\n"
	"	float gradient = max(abs(gradientN), abs(gradientS));\n"
	"	if(pairN) lengthSign = -lengthSign;\n"
	"	float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);\n"
	"\n"
	"	float2 posB;\n"
	"	posB.x = posM.x;\n"
	"	posB.y = posM.y;\n"
	"	float2 offNP;\n"
	"	offNP.x = (!horzSpan) ? 0.0 : fxaaRcpFrame.x;\n"
	"	offNP.y = ( horzSpan) ? 0.0 : fxaaRcpFrame.y;\n"
	"	if(!horzSpan) posB.x += lengthSign * 0.5;\n"
	"	if( horzSpan) posB.y += lengthSign * 0.5;\n"
	"\n"
	"	float2 posN;\n"
	"	posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;\n"
	"	posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;\n"
	"	float2 posP;\n"
	"	posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;\n"
	"	posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;\n"
	"	float subpixD = ((-2.0)*subpixC) + 3.0;\n"
	"	float lumaEndN = FxaaLuma(FxaaTexTop(tex, posN));\n"
	"	float subpixE = subpixC * subpixC;\n"
	"	float lumaEndP = FxaaLuma(FxaaTexTop(tex, posP));\n"
	"\n"
	"	if(!pairN) lumaNN = lumaSS;\n"
	"	float gradientScaled = gradient * 1.0/4.0;\n"
	"	float lumaMM = lumaM - lumaNN * 0.5;\n"
	"	float subpixF = subpixD * subpixE;\n"
	"	bool lumaMLTZero = lumaMM < 0.0;\n"
	"	lumaEndN -= lumaNN * 0.5;\n"
	"	lumaEndP -= lumaNN * 0.5;\n"
	"	bool doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	bool doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;\n"
	"	bool doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;\n"
	"\n"
	"	if(doneNP) {\n"
	"	if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
	"	if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
	"	if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
	"	if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
	"	doneN = abs(lumaEndN) >= gradientScaled;\n"
	"	doneP = abs(lumaEndP) >= gradientScaled;\n"
	"	if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;\n"
	"	if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;\n"
	"	doneNP = (!doneN) || (!doneP);\n"
	"	if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;\n"
	"	if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;\n"
	"	}}}}}}}}}}}\n"
	"\n"
	"	float dstN = posM.x - posN.x;\n"
	"	float dstP = posP.x - posM.x;\n"
	"	if(!horzSpan) dstN = posM.y - posN.y;\n"
	"	if(!horzSpan) dstP = posP.y - posM.y;\n"
	"\n"
	"	bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;\n"
	"	float spanLength = (dstP + dstN);\n"
	"	bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;\n"
	"	float spanLengthRcp = 1.0/spanLength;\n"
	"\n"
	"	bool directionN = dstN < dstP;\n"
	"	float dst = min(dstN, dstP);\n"
	"	bool goodSpan = directionN ? goodSpanN : goodSpanP;\n"
	"	float subpixG = subpixF * subpixF;\n"
	"	float pixelOffset = (dst * (-spanLengthRcp)) + 0.5;\n"
	"	float subpixH = subpixG * fxaaSubpix;\n"
	"\n"
	"	float pixelOffsetGood = goodSpan ? pixelOffset : 0.0;\n"
	"	float pixelOffsetSubpix = max(pixelOffsetGood, subpixH);\n"
	"	if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;\n"
	"	if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;\n"
	"\n"
	"	return float4(FxaaTexTop(tex, posM).xyz, lumaM);\n"
	"}\n"
	"\n"
	"#if (FXAA_GLSL_130 == 1)\n"
	"float4 FxaaPass(float4 FxaaColor, float2 uv0)\n"
	"#else\n"
	"float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0)\n"
	"#endif\n"
	"{\n"
	"\n"
	"	#if (SHADER_MODEL >= 0x400)\n"
	"	FxaaTex tex;\n"
	"	tex.tex = Texture;\n"
	"	tex.smpl = TextureSampler;\n"
	"\n"
	"	Texture.GetDimensions(PixelSize.x, PixelSize.y);\n"
	"	FxaaColor = FxaaPixelShader(uv0, tex, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n"
	"\n"
	"    #elif (FXAA_GLSL_130 == 1)\n"
	"\n"
	"	vec2 PixelSize = textureSize(TextureSampler, 0);\n"
	"	FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n"
	"\n"
	"	#else\n"
	"	FxaaTex tex;\n"
	"	tex = TextureSampler;\n"
	"	FxaaColor = FxaaPixelShader(uv0, tex, PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n"
	"	#endif\n"
	"\n"
	"	return FxaaColor;\n"
	"}\n"
	"\n"
	"/*------------------------------------------------------------------------------\n"
	"                      [MAIN() & COMBINE PASS CODE SECTION]\n"
	"------------------------------------------------------------------------------*/\n"
	"#if (FXAA_GLSL_130 == 1)\n"
	"\n"
	"void ps_main()\n"
	"{\n"
	"    vec4 color = texture(TextureSampler, PSin.t);\n"
	"    color      = PreGammaPass(color, PSin.t);\n"
	"    color      = FxaaPass(color, PSin.t);\n"
	"\n"
	"    SV_Target0 = color;\n"
	"}\n"
	"\n"
	"#else\n"
	"\n"
	"PS_OUTPUT ps_main(VS_OUTPUT input)\n"
	"{\n"
	"	PS_OUTPUT output;\n"
	"\n"
	"	#if (SHADER_MODEL >= 0x400)\n"
	"		float4 color = Texture.Sample(TextureSampler, input.t);\n"
	"\n"
	"		color = PreGammaPass(color, input.t);\n"
	"		color = FxaaPass(color, input.t);\n"
	"	#else\n"
	"		float4 color = tex2D(TextureSampler, input.t);\n"
	"\n"
	"		color = PreGammaPass(color, input.t);\n"
	"		color = FxaaPass(color, input.t);\n"
	"	#endif\n"
	"\n"
	"	output.c = color;\n"
	"	\n"
	"	return output;\n"
	"}\n"
	"\n"
	"#endif\n"
	"\n"
	"#endif\n"
	;
