rsx: Refactor shader codegen and fix shadow sampling on depth-float

2020-12-19 14:28:10 +03:00 · 2020-12-19 14:28:10 +03:00 · bee76fc8d1
parent d9cb1a6319
commit bee76fc8d1
6 changed files with 1059 additions and 986 deletions
--- a/rpcs3/Emu/CMakeLists.txt
+++ b/rpcs3/Emu/CMakeLists.txt
@ -392,6 +392,7 @@ target_sources(rpcs3_emu PRIVATE
 	RSX/rsx_utils.cpp
 	RSX/Common/BufferUtils.cpp
 	RSX/Common/FragmentProgramDecompiler.cpp
+	RSX/Common/GLSLCommon.cpp
 	RSX/Common/ProgramStateCache.cpp
 	RSX/Common/surface_store.cpp
 	RSX/Common/TextureUtils.cpp
--- a/rpcs3/Emu/RSX/Common/GLSLCommon.cpp
+++ b/rpcs3/Emu/RSX/Common/GLSLCommon.cpp
@ -0,0 +1,996 @@
+#include "stdafx.h"
+#include "Utilities/StrFmt.h"
+
+#include "GLSLCommon.h"
+
+namespace program_common
+{
+	void insert_compare_op(std::ostream& OS, bool low_precision)
+	{
+		if (low_precision)
+		{
+			OS <<
+				"int compare(const in float a, const in float b)\n"
+				"{\n"
+				"	if (abs(a - b) < 0.000001) return 2;\n"
+				"	return (a > b)? 4 : 1;\n"
+				"}\n\n"
+
+				"bool comparison_passes(const in float a, const in float b, const in uint func)\n"
+				"{\n"
+				"	if (func == 0) return false; // never\n"
+				"	if (func == 7) return true;  // always\n\n"
+
+				"	int op = compare(a, b);\n"
+				"	switch (func)\n"
+				"	{\n"
+				"		case 1: return op == 1; // less\n"
+				"		case 2: return op == 2; // equal\n"
+				"		case 3: return op <= 2; // lequal\n"
+				"		case 4: return op == 4; // greater\n"
+				"		case 5: return op != 2; // nequal\n"
+				"		case 6: return (op == 4 || op == 2); // gequal\n"
+				"	}\n\n"
+
+				"	return false; // unreachable\n"
+				"}\n\n";
+		}
+		else
+		{
+			OS <<
+			"bool comparison_passes(const in float a, const in float b, const in uint func)\n"
+			"{\n"
+			"	switch (func)\n"
+			"	{\n"
+			"		default:\n"
+			"		case 0: return false; //never\n"
+			"		case 1: return (a < b); //less\n"
+			"		case 2: return (a == b); //equal\n"
+			"		case 3: return (a <= b); //lequal\n"
+			"		case 4: return (a > b); //greater\n"
+			"		case 5: return (a != b); //nequal\n"
+			"		case 6: return (a >= b); //gequal\n"
+			"		case 7: return true; //always\n"
+			"	}\n"
+			"}\n\n";
+		}
+	}
+
+	void insert_compare_op_vector(std::ostream& OS)
+	{
+		OS <<
+		"bvec4 comparison_passes(const in vec4 a, const in vec4 b, const in uint func)\n"
+		"{\n"
+		"	switch (func)\n"
+		"	{\n"
+		"		default:\n"
+		"		case 0: return bvec4(false); //never\n"
+		"		case 1: return lessThan(a, b); //less\n"
+		"		case 2: return equal(a, b); //equal\n"
+		"		case 3: return lessThanEqual(a, b); //lequal\n"
+		"		case 4: return greaterThan(a, b); //greater\n"
+		"		case 5: return notEqual(a, b); //nequal\n"
+		"		case 6: return greaterThanEqual(a, b); //gequal\n"
+		"		case 7: return bvec4(true); //always\n"
+		"	}\n"
+		"}\n\n";
+	}
+
+	void insert_fog_declaration(std::ostream& OS, const std::string& wide_vector_type, const std::string& input_coord, bool declare)
+	{
+		std::string template_body;
+
+		if (!declare)
+			template_body += "$T fetch_fog_value(const in uint mode)\n";
+		else
+			template_body += "$T fetch_fog_value(const in uint mode, const in $T $I)\n";
+
+		template_body +=
+		"{\n"
+		"	$T result = $T($I.x, 0., 0., 0.);\n"
+		"	switch(mode)\n"
+		"	{\n"
+		"	default:\n"
+		"		return result;\n"
+		"	case 0:\n"
+		"		//linear\n"
+		"		result.y = fog_param1 * $I.x + (fog_param0 - 1.);\n"
+		"		break;\n"
+		"	case 1:\n"
+		"		//exponential\n"
+		"		result.y = exp(11.084 * (fog_param1 * $I.x + fog_param0 - 1.5));\n"
+		"		break;\n"
+		"	case 2:\n"
+		"		//exponential2\n"
+		"		result.y = exp(-pow(4.709 * (fog_param1 * $I.x + fog_param0 - 1.5), 2.));\n"
+		"		break;\n"
+		"	case 3:\n"
+		"		//exponential_abs\n"
+		"		result.y = exp(11.084 * (fog_param1 * abs($I.x) + fog_param0 - 1.5));\n"
+		"		break;\n"
+		"	case 4:\n"
+		"		//exponential2_abs\n"
+		"		result.y = exp(-pow(4.709 * (fog_param1 * abs($I.x) + fog_param0 - 1.5), 2.));\n"
+		"		break;\n"
+		" case 5:\n"
+		"		//linear_abs\n"
+		"		result.y = fog_param1 * abs($I.x) + (fog_param0 - 1.);\n"
+		"		break;\n"
+		"	}\n"
+		"\n"
+		"	result.y = clamp(result.y, 0., 1.);\n"
+		"	return result;\n"
+		"}\n\n";
+
+		std::pair<std::string, std::string> replacements[] =
+			{std::make_pair("$T", wide_vector_type),
+			 std::make_pair("$I", input_coord)};
+
+		OS << fmt::replace_all(template_body, replacements);
+	}
+}
+
+namespace glsl
+{
+	std::string getFloatTypeNameImpl(usz elementCount)
+	{
+		switch (elementCount)
+		{
+		default:
+			abort();
+		case 1:
+			return "float";
+		case 2:
+			return "vec2";
+		case 3:
+			return "vec3";
+		case 4:
+			return "vec4";
+		}
+	}
+
+	std::string getHalfTypeNameImpl(usz elementCount)
+	{
+		switch (elementCount)
+		{
+		default:
+			abort();
+		case 1:
+			return "float16_t";
+		case 2:
+			return "f16vec2";
+		case 3:
+			return "f16vec3";
+		case 4:
+			return "f16vec4";
+		}
+	}
+
+	std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1, bool scalar)
+	{
+		if (scalar)
+		{
+			switch (f)
+			{
+			case COMPARE::FUNCTION_SEQ:
+				return Op0 + " == " + Op1;
+			case COMPARE::FUNCTION_SGE:
+				return Op0 + " >= " + Op1;
+			case COMPARE::FUNCTION_SGT:
+				return Op0 + " > " + Op1;
+			case COMPARE::FUNCTION_SLE:
+				return Op0 + " <= " + Op1;
+			case COMPARE::FUNCTION_SLT:
+				return Op0 + " < " + Op1;
+			case COMPARE::FUNCTION_SNE:
+				return Op0 + " != " + Op1;
+			}
+		}
+		else
+		{
+			switch (f)
+			{
+			case COMPARE::FUNCTION_SEQ:
+				return "equal(" + Op0 + ", " + Op1 + ")";
+			case COMPARE::FUNCTION_SGE:
+				return "greaterThanEqual(" + Op0 + ", " + Op1 + ")";
+			case COMPARE::FUNCTION_SGT:
+				return "greaterThan(" + Op0 + ", " + Op1 + ")";
+			case COMPARE::FUNCTION_SLE:
+				return "lessThanEqual(" + Op0 + ", " + Op1 + ")";
+			case COMPARE::FUNCTION_SLT:
+				return "lessThan(" + Op0 + ", " + Op1 + ")";
+			case COMPARE::FUNCTION_SNE:
+				return "notEqual(" + Op0 + ", " + Op1 + ")";
+			}
+		}
+
+		fmt::throw_exception("Unknown compare function");
+	}
+
+	void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules, bool glsl4_compliant)
+	{
+		std::string vertex_id_name = (rules != glsl_rules_spirv) ? "gl_VertexID" : "gl_VertexIndex";
+
+		//Actually decode a vertex attribute from a raw byte stream
+		OS <<
+		"#define VTX_FMT_SNORM16 0\n"
+		"#define VTX_FMT_FLOAT32 1\n"
+		"#define VTX_FMT_FLOAT16 2\n"
+		"#define VTX_FMT_UNORM8  3\n"
+		"#define VTX_FMT_SINT16  4\n"
+		"#define VTX_FMT_COMP32  5\n"
+		"#define VTX_FMT_UINT8   6\n\n";
+
+		// For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?)
+		// Note: Tested on Mesa iris with HD 530 and compilant path works fine, may be a bug on Windows proprietary drivers
+		if (!glsl4_compliant)
+		{
+			OS <<
+			"void mov(inout vec4 vector, const in int index, const in float scalar)\n"
+			"{\n"
+			"	switch(index)\n"
+			"	{\n"
+			"		case 0: vector.x = scalar; return;\n"
+			"		case 1: vector.y = scalar; return;\n"
+			"		case 2: vector.z = scalar; return;\n"
+			"		case 3: vector.w = scalar; return;\n"
+			"	}\n"
+			"}\n\n"
+
+			"uint ref(const in uvec4 vector, const in int index)\n"
+			"{\n"
+			"	switch(index)\n"
+			"	{\n"
+			"		case 0: return vector.x;\n"
+			"		case 1: return vector.y;\n"
+			"		case 2: return vector.z;\n"
+			"		case 3: return vector.w;\n"
+			"	}\n"
+			"}\n\n";
+		}
+		else
+		{
+			OS <<
+			"#define mov(v, i, s) v[i] = s\n"
+			"#define ref(v, i) v[i]\n\n";
+		}
+
+		OS <<
+		"struct attribute_desc\n"
+		"{\n"
+		"	uint type;\n"
+		"	uint attribute_size;\n"
+		"	uint starting_offset;\n"
+		"	uint stride;\n"
+		"	uint frequency;\n"
+		"	bool swap_bytes;\n"
+		"	bool is_volatile;\n"
+		"	bool modulo;\n"
+		"};\n\n"
+
+		"uint gen_bits(const in uint x, const in uint y, const in uint z, const in uint w, const in bool swap)\n"
+		"{\n"
+		"	return (swap) ?\n"
+		"		_set_bits(_set_bits(_set_bits(w, z, 8, 8), y, 16, 8), x, 24, 8) :\n"
+		"		_set_bits(_set_bits(_set_bits(x, y, 8, 8), z, 16, 8), w, 24, 8);\n"
+		"}\n\n"
+
+		"uint gen_bits(const in uint x, const in uint y, const in bool swap)\n"
+		"{\n"
+		"	return (swap)? _set_bits(y, x, 8, 8) : _set_bits(x, y, 8, 8);\n"
+		"}\n\n"
+
+		// NOTE: (int(n) or int(n)) is broken on some NVIDIA and INTEL hardware when the sign bit is involved.
+		// See https://github.com/RPCS3/rpcs3/issues/8990
+		"vec4 sext(const in ivec4 bits)\n"
+		"{\n"
+		"	// convert raw 16 bit values into signed 32-bit float4 counterpart\n"
+		"	bvec4 sign_check = lessThan(bits, ivec4(0x8000));\n"
+		"	return _select(bits - 65536, bits, sign_check);\n"
+		"}\n\n"
+
+		"float sext(const in int bits)\n"
+		"{\n"
+		"	return (bits < 0x8000) ? float(bits) : float(bits - 65536); \n"
+		"}\n\n"
+
+		"vec4 fetch_attribute(const in attribute_desc desc, const in int vertex_id, usamplerBuffer input_stream)\n"
+		"{\n"
+		"	const int elem_size_table[] = { 2, 4, 2, 1, 2, 4, 1 };\n"
+		"	const float scaling_table[] = { 32768., 1., 1., 255., 1., 32767., 1. };\n"
+		"	const int elem_size = elem_size_table[desc.type];\n"
+		"	const vec4 scale = scaling_table[desc.type].xxxx;\n\n"
+
+		"	uvec4 tmp, result = uvec4(0u);\n"
+		"	vec4 ret;\n"
+		"	int n, i = int((vertex_id * desc.stride) + desc.starting_offset);\n\n"
+
+		"	for (n = 0; n < desc.attribute_size; n++)\n"
+		"	{\n"
+		"		tmp.x = texelFetch(input_stream, i++).x;\n"
+		"		if (elem_size == 2)\n"
+		"		{\n"
+		"			tmp.y = texelFetch(input_stream, i++).x;\n"
+		"			tmp.x = gen_bits(tmp.x, tmp.y, desc.swap_bytes);\n"
+		"		}\n"
+		"		else if (elem_size == 4)\n"
+		"		{\n"
+		"			tmp.y = texelFetch(input_stream, i++).x;\n"
+		"			tmp.z = texelFetch(input_stream, i++).x;\n"
+		"			tmp.w = texelFetch(input_stream, i++).x;\n"
+		"			tmp.x = gen_bits(tmp.x, tmp.y, tmp.z, tmp.w, desc.swap_bytes);\n"
+		"		}\n\n"
+
+		"		mov(result, n, tmp.x);\n"
+		"	}\n\n"
+
+		"	// Actual decoding step is done in vector space, outside the loop\n"
+		"	if (desc.type == VTX_FMT_SNORM16 || desc.type == VTX_FMT_SINT16)\n"
+		"	{\n"
+		"		ret = sext(ivec4(result));\n"
+		"	}\n"
+		"	else if (desc.type == VTX_FMT_FLOAT32)\n"
+		"	{\n"
+		"		ret = uintBitsToFloat(result);\n"
+		"	}\n"
+		"	else if (desc.type == VTX_FMT_FLOAT16)\n"
+		"	{\n"
+		"		tmp.x = _set_bits(result.x, result.y, 16, 16);\n"
+		"		tmp.y = _set_bits(result.z, result.w, 16, 16);\n"
+		"		ret.xy = unpackHalf2x16(tmp.x);\n"
+		"		ret.zw = unpackHalf2x16(tmp.y);\n"
+		"	}\n"
+		"	else if (desc.type == VTX_FMT_UINT8 || desc.type == VTX_FMT_UNORM8)\n"
+		"	{\n"
+		"		ret = vec4(desc.swap_bytes? result.wzyx : result);\n"
+		"	}\n"
+		"	else //if (desc.type == VTX_FMT_COMP32)\n"
+		"	{\n"
+		"		result = uvec4(_get_bits(result.x, 0, 11),\n"
+		"			_get_bits(result.x, 11, 11),\n"
+		"			_get_bits(result.x, 22, 10),\n"
+		"			uint(scale.x));\n"
+		"		ret = sext(ivec4(result) << ivec4(5, 5, 6, 0));\n"
+		"	}\n\n"
+
+		"	if (desc.attribute_size < 4)\n"
+		"	{\n"
+		"		ret.w = scale.x;\n"
+		"	}\n\n"
+
+		"	return ret / scale; \n"
+		"}\n\n"
+
+		"attribute_desc fetch_desc(const in int location)\n"
+		"{\n"
+		"	// Each descriptor is 64 bits wide\n"
+		"	// [0-8] attribute stride\n"
+		"	// [8-24] attribute divisor\n"
+		"	// [24-27] attribute type\n"
+		"	// [27-30] attribute size\n"
+		"	// [30-31] reserved\n"
+		"	// [32-60] starting offset\n"
+		"	// [60-61] swap bytes flag\n"
+		"	// [61-62] volatile flag\n"
+		"	// [62-63] modulo enable flag\n\n";
+
+		if (rules == glsl_rules_opengl4)
+		{
+			// Data is packed into a ubo
+			OS <<
+			"	int block = (location >> 1);\n"
+			"	int sub_block = (location & 1) << 1;\n"
+			"	uvec2 attrib = uvec2(\n"
+			"		ref(input_attributes_blob[block], sub_block + 0),\n"
+			"		ref(input_attributes_blob[block], sub_block + 1));\n\n";
+		}
+		else
+		{
+			// Fetch parameters streamed separately from draw parameters
+			OS <<
+			"	uvec2 attrib = texelFetch(vertex_layout_stream, location + int(layout_ptr_offset)).xy;\n\n";
+		}
+
+		OS <<
+		"	attribute_desc result;\n"
+		"	result.stride = _get_bits(attrib.x, 0, 8);\n"
+		"	result.frequency = _get_bits(attrib.x, 8, 16);\n"
+		"	result.type = _get_bits(attrib.x, 24, 3);\n"
+		"	result.attribute_size = _get_bits(attrib.x, 27, 3);\n"
+		"	result.starting_offset = _get_bits(attrib.y, 0, 29);\n"
+		"	result.swap_bytes = _test_bit(attrib.y, 29);\n"
+		"	result.is_volatile = _test_bit(attrib.y, 30);\n"
+		"	result.modulo = _test_bit(attrib.y, 31);\n"
+		"	return result;\n"
+		"}\n\n"
+
+		"vec4 read_location(const in int location)\n"
+		"{\n"
+		"	attribute_desc desc = fetch_desc(location);\n"
+		"	int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n"
+		"	if (desc.frequency == 0)\n"
+		"	{\n"
+		"		vertex_id = 0;\n"
+		"	}\n"
+		"	else if (desc.modulo)\n"
+		"	{\n"
+		"		//if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
+		"		vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) % int(desc.frequency);\n"
+		"	}\n"
+		"	else\n"
+		"	{\n"
+		"		vertex_id /= int(desc.frequency); \n"
+		"	}\n\n"
+
+		"	if (desc.is_volatile)\n"
+		"		return fetch_attribute(desc, vertex_id, volatile_input_stream);\n"
+		"	else\n"
+		"		return fetch_attribute(desc, vertex_id, persistent_input_stream);\n"
+		"}\n\n";
+	}
+
+	void insert_rop_init(std::ostream& OS)
+	{
+		OS <<
+		"	if (_test_bit(rop_control, 9))\n"
+		"	{\n"
+		"		// Convert x,y to linear address\n"
+		"		const ivec2 stipple_coord = ivec2(gl_FragCoord.xy) % ivec2(32, 32);\n"
+		"		const int address = stipple_coord.y * 32 + stipple_coord.x;\n"
+		"		const int bit_offset = (address & 31);\n"
+		"		const int word_index = _get_bits(address, 7, 3);\n"
+		"		const int sub_index = _get_bits(address, 5, 2);\n\n"
+
+		"		if (_test_bit(stipple_pattern[word_index][sub_index], bit_offset))\n"
+		"		{\n"
+		"			_kill();\n"
+		"		}\n"
+		"	}\n\n";
+	}
+
+	void insert_rop(std::ostream& OS, const shader_properties& props)
+	{
+		const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
+		const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
+		const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
+		const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
+
+		//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
+		if (props.disable_early_discard)
+		{
+			OS <<
+			"	if (_fragment_discard)\n"
+			"	{\n"
+			"		discard;\n"
+			"	}\n"
+			"	else if (_get_bits(rop_control, 0, 8) != 0)\n";
+		}
+		else
+		{
+			OS << "	if (_get_bits(rop_control, 0, 8) != 0)\n";
+		}
+
+		OS <<
+		"	{\n"
+		"		const bool alpha_test = _test_bit(rop_control, 0);\n"
+		"		const uint alpha_func = _get_bits(rop_control, 16, 3);\n";
+
+		if (!props.fp32_outputs)
+		{
+			OS << "		const bool srgb_convert = _test_bit(rop_control, 1);\n\n";
+		}
+
+		if (props.emulate_coverage_tests)
+		{
+			OS << "		const bool a2c_enabled = _test_bit(rop_control, 4);\n";
+		}
+
+		OS <<
+		"		if (alpha_test && !comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func))\n"
+		"		{\n"
+		"			discard;\n"
+		"		}\n";
+
+		if (props.emulate_coverage_tests)
+		{
+			OS <<
+			"		else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
+			"		{\n"
+			"			discard;\n"
+			"		}\n";
+		}
+
+		if (!props.fp32_outputs)
+		{
+			// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
+			if (props.supports_native_fp16)
+			{
+				OS <<
+				"		else if (srgb_convert)\n"
+				"		{\n"
+				"			" << reg0 << ".rgb = clamp16(linear_to_srgb(" << reg0 << ")).rgb;\n"
+				"			" << reg1 << ".rgb = clamp16(linear_to_srgb(" << reg1 << ")).rgb;\n"
+				"			" << reg2 << ".rgb = clamp16(linear_to_srgb(" << reg2 << ")).rgb;\n"
+				"			" << reg3 << ".rgb = clamp16(linear_to_srgb(" << reg3 << ")).rgb;\n"
+				"		}\n";
+			}
+			else
+			{
+				OS <<
+				"		else if (srgb_convert)\n"
+				"		{\n"
+				"			" << reg0 << ".rgb = linear_to_srgb(" << reg0 << ").rgb;\n"
+				"			" << reg1 << ".rgb = linear_to_srgb(" << reg1 << ").rgb;\n"
+				"			" << reg2 << ".rgb = linear_to_srgb(" << reg2 << ").rgb;\n"
+				"			" << reg3 << ".rgb = linear_to_srgb(" << reg3 << ").rgb;\n"
+				"		}\n";
+			}
+		}
+
+		OS <<
+		"	}\n\n"
+
+		"	ocol0 = " << reg0 << ";\n"
+		"	ocol1 = " << reg1 << ";\n"
+		"	ocol2 = " << reg2 << ";\n"
+		"	ocol3 = " << reg3 << ";\n\n";
+	}
+
+	void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
+	{
+		OS << "#define _select mix\n";
+		OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
+		OS << "#define _get_bits(x, off, count) bitfieldExtract(x, off, count)\n";
+		OS << "#define _set_bits(x, y, off, count) bitfieldInsert(x, y, off, count)\n";
+		OS << "#define _test_bit(x, y) (_get_bits(x, y, 1) != 0)\n";
+		OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
+
+		if (props.domain == glsl::program_domain::glsl_fragment_program)
+		{
+			OS << "// Workaround for broken early discard in some drivers\n";
+
+			if (props.disable_early_discard)
+			{
+				OS << "bool _fragment_discard = false;\n";
+				OS << "#define _kill() _fragment_discard = true\n\n";
+			}
+			else
+			{
+				OS << "#define _kill() discard\n\n";
+			}
+
+			if (props.require_texture_ops)
+			{
+				OS <<
+				// Declare special texture control flags
+				"#define GAMMA_R_MASK  (1 << " << rsx::texture_control_bits::GAMMA_R << ")\n"
+				"#define GAMMA_G_MASK  (1 << " << rsx::texture_control_bits::GAMMA_G << ")\n"
+				"#define GAMMA_B_MASK  (1 << " << rsx::texture_control_bits::GAMMA_B << ")\n"
+				"#define GAMMA_A_MASK  (1 << " << rsx::texture_control_bits::GAMMA_A << ")\n"
+				"#define EXPAND_R_MASK (1 << " << rsx::texture_control_bits::EXPAND_R << ")\n"
+				"#define EXPAND_G_MASK (1 << " << rsx::texture_control_bits::EXPAND_G << ")\n"
+				"#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n"
+				"#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n"
+
+				"#define ALPHAKILL    " << rsx::texture_control_bits::ALPHAKILL << "\n"
+				"#define RENORMALIZE  " << rsx::texture_control_bits::RENORMALIZE << "\n"
+				"#define DEPTH_FLOAT    " << rsx::texture_control_bits::DEPTH_FLOAT << "\n"
+				"#define GAMMA_CTRL_MASK  (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n"
+				"#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n\n";
+			}
+		}
+
+		if (props.require_lit_emulation)
+		{
+			OS <<
+			"vec4 lit_legacy(const in vec4 val)"
+			"{\n"
+			"	vec4 clamped_val = val;\n"
+			"	clamped_val.x = max(val.x, 0.);\n"
+			"	clamped_val.y = max(val.y, 0.);\n"
+			"	vec4 result;\n"
+			"	result.x = 1.;\n"
+			"	result.w = 1.;\n"
+			"	result.y = clamped_val.x;\n"
+			"	result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n"
+			"	return result;\n"
+			"}\n\n";
+		}
+
+		if (props.domain == glsl::program_domain::glsl_vertex_program && props.emulate_zclip_transform)
+		{
+			if (props.emulate_depth_clip_only)
+			{
+				// Declare rcp_precise. Requires f64 support in the drivers.
+				// This is required to handle precision drift during division for extended depth range.
+				OS <<
+				"double rcp_precise(double x)\n"
+				"{\n"
+				"	double scaled = x * 0.0009765625;\n"
+				"	double inv = 1.0 / scaled;\n"
+				"	return inv * 0.0009765625;\n"
+				"}\n"
+				"\n"
+				// Technically the depth value here is the 'final' depth that should be stored in the Z buffer.
+				// Forward mapping eqn is d' = d * (f - n) + n, where d' is the stored Z value (this) and d is the normalized API value.
+				"vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n"
+				"{\n"
+				"	if (far_plane != 0.0)\n"
+				"	{\n"
+				"		double z_range = (far_plane > near_plane)? (far_plane - near_plane) : far_plane;\n"
+				"		double inv_range = rcp_precise(z_range);\n"
+				"		float d = float(pos.z * rcp_precise(pos.w));\n"
+				"		float new_d = (d - near_plane) * float(inv_range);\n"
+				"		return vec4(pos.x, pos.y, (new_d * pos.w), pos.w);\n"
+				"	}\n"
+				"	else\n"
+				"	{\n"
+				"		return pos;\n" // Only values where Z=0 can ever pass this clip
+				"	}\n"
+				"}\n\n";
+			}
+			else
+			{
+				OS <<
+				"vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n"
+				"{\n"
+				"	float d = float(pos.z / pos.w);\n"
+				"	if (d < 0.f && d >= near_plane)\n"
+				"	{\n"
+				"		// Clamp\n"
+				"		d = 0.f;\n"
+				"	}\n"
+				"	else if (d > 1.f && d <= far_plane)\n"
+				"	{\n"
+				"		// Compress Z and store towards highest end of the range\n"
+				"		d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n"
+				"	}\n"
+				"	else\n"
+				"	{\n"
+				"		return pos;\n"
+				"	}\n"
+				"\n"
+				"	return vec4(pos.x, pos.y, d * pos.w, pos.w);\n"
+				"}\n\n";
+			}
+
+			return;
+		}
+
+		program_common::insert_compare_op(OS, props.low_precision_tests);
+
+		if (props.emulate_coverage_tests)
+		{
+			// Purely stochastic
+			OS <<
+			"bool coverage_test_passes(const in vec4 _sample, const in uint control)\n"
+			"{\n"
+			"	if (!_test_bit(control, 0)) return false;\n"
+			"\n"
+			"	float random  = _rand(gl_FragCoord);\n"
+			"	return (_sample.a > random);\n"
+			"}\n\n";
+		}
+
+		if (!props.fp32_outputs)
+		{
+			OS <<
+			"vec4 linear_to_srgb(const in vec4 cl)\n"
+			"{\n"
+			"	vec4 low = cl * 12.92;\n"
+			"	vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n"
+			"	bvec4 select = lessThan(cl, vec4(0.0031308));\n"
+			"	return clamp(mix(high, low, select), 0., 1.);\n"
+			"}\n\n";
+		}
+
+		if (props.require_depth_conversion)
+		{
+			ensure(props.require_texture_ops);
+
+			//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
+			//The A component (Z) is useless (should contain stencil8 or just 1)
+			OS <<
+			"vec4 decode_depth24(const in float depth_value, const in bool depth_float)\n"
+			"{\n"
+			"	uint value;\n"
+			"	if (!depth_float)\n"
+			"		value = uint(depth_value * 16777215.);\n"
+			"	else\n"
+			"		value = _get_bits(floatBitsToUint(depth_value), 7, 24);\n"
+			"\n"
+			"	uint b = _get_bits(value, 0, 8);\n"
+			"	uint g = _get_bits(value, 8, 8);\n"
+			"	uint r = _get_bits(value, 16, 8);\n"
+			"	return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n"
+			"}\n\n"
+
+			"vec4 remap_vector(const in vec4 color, const in uint remap)\n"
+			"{\n"
+			"	vec4 result;\n"
+			"	if (_get_bits(remap, 0, 8) == 0xE4)\n"
+			"	{\n"
+			"		result = color;\n"
+			"	}\n"
+			"	else\n"
+			"	{\n"
+			"		uvec4 remap_channel = uvec4(remap) >> uvec4(2, 4, 6, 0);\n"
+			"		remap_channel &= 3;\n"
+			"		remap_channel = (remap_channel + 3) % 4; // Map A-R-G-B to R-G-B-A\n\n"
+
+			"		// Generate remapped result\n"
+			"		result.a = color[remap_channel.a];\n"
+			"		result.r = color[remap_channel.r];\n"
+			"		result.g = color[remap_channel.g];\n"
+			"		result.b = color[remap_channel.b];\n"
+			"	}\n\n"
+
+			"	if (_get_bits(remap, 8, 8) == 0xAA)\n"
+			"		return result;\n\n"
+
+			"	uvec4 remap_select = uvec4(remap) >> uvec4(10, 12, 14, 8);\n"
+			"	remap_select &= 3;\n"
+			"	bvec4 choice = lessThan(remap_select, uvec4(2));\n"
+			"	return _select(result, vec4(remap_select), choice);\n"
+			"}\n\n"
+
+			"vec4 texture2DReconstruct(sampler2D tex, usampler2D stencil_tex, const in vec2 coord, const in uint remap, const in uint flags)\n"
+			"{\n"
+			"	vec4 result = decode_depth24(texture(tex, coord.xy).r, _test_bit(flags, DEPTH_FLOAT));\n"
+			"	result.z = float(texture(stencil_tex, coord.xy).x) / 255.f;\n\n"
+
+			"	if (remap == 0xAAE4)\n"
+			" 		return result;\n\n"
+
+			"	return remap_vector(result, remap);\n"
+			"}\n\n";
+		}
+
+		if (props.require_texture_ops)
+		{
+			OS <<
+
+#ifdef __APPLE__
+			"vec4 remap_vector(const in vec4 rgba, const in uint remap_bits)\n"
+			"{\n"
+			"	uvec4 selector = (uvec4(remap_bits) >> uvec4(3, 6, 9, 0)) & 0x7;\n"
+			"	bvec4 choice = greaterThan(selector, uvec4(1));\n"
+			"\n"
+			"	vec4 direct = vec4(selector);\n"
+			"	selector = min(selector - 2, selector);\n"
+			"	vec4 indexed = vec4(rgba[selector.r], rgba[selector.g], rgba[selector.b], rgba[selector.a]);\n"
+			"	return mix(direct, indexed, choice);\n"
+			"}\n\n"
+#endif
+			"vec4 srgb_to_linear(const in vec4 cs)\n"
+			"{\n"
+			"	vec4 a = cs / 12.92;\n"
+			"	vec4 b = pow((cs + 0.055) / 1.055, vec4(2.4));\n"
+			"	return _select(a, b, greaterThan(cs, vec4(0.04045)));\n"
+			"}\n\n"
+
+			//TODO: Move all the texture read control operations here
+			"vec4 process_texel(in vec4 rgba, const in uint control_bits)\n"
+			"{\n"
+#ifdef __APPLE__
+			"	uint remap_bits = (control_bits >> 16) & 0xFFFF;\n"
+			"	if (remap_bits != 0x8D5) rgba = remap_vector(rgba, remap_bits);\n\n"
+#endif
+			"	if (control_bits == 0)\n"
+			"	{\n"
+			"		return rgba;\n"
+			"	}\n"
+			"\n"
+			"	if (_test_bit(control_bits, ALPHAKILL))\n"
+			"	{\n"
+			"		// Alphakill\n"
+			"		if (rgba.a < 0.000001)\n"
+			"		{\n"
+			"			_kill();\n"
+			"			return rgba;\n"
+			"		}\n"
+			"	}\n"
+			"\n"
+			"	if (_test_bit(control_bits, RENORMALIZE))\n"
+			"	{\n"
+			"		// Renormalize to 8-bit (PS3) accuracy\n"
+			"		rgba = floor(rgba * 255.);\n"
+			"		rgba /= 255.;"
+			"	}\n"
+			"\n"
+			"	uvec4 mask;\n"
+			"	vec4 convert;\n"
+			"	uint op_mask = control_bits & SIGN_EXPAND_MASK;\n"
+			"\n"
+			"	if (op_mask != 0)\n"
+			"	{\n"
+			"		// Expand to signed normalized\n"
+			"		mask = uvec4(op_mask) & uvec4(EXPAND_R_MASK, EXPAND_G_MASK, EXPAND_B_MASK, EXPAND_A_MASK);\n"
+			"		convert = (rgba * 2.f - 1.f);\n"
+			"		rgba = _select(rgba, convert, notEqual(mask, uvec4(0)));\n"
+			"	}\n"
+			"\n"
+			"	op_mask = control_bits & GAMMA_CTRL_MASK;\n"
+			"	if (op_mask != 0u)\n"
+			"	{\n"
+			"		// Gamma correction\n"
+			"		mask = uvec4(op_mask) & uvec4(GAMMA_R_MASK, GAMMA_G_MASK, GAMMA_B_MASK, GAMMA_A_MASK);\n"
+			"		convert = srgb_to_linear(rgba);\n"
+			"		return _select(rgba, convert, notEqual(mask, uvec4(0)));\n"
+			"	}\n"
+			"\n"
+			"	return rgba;\n"
+			"}\n\n";
+
+			if (props.require_texture_expand)
+			{
+				OS <<
+				"uint _texture_flag_override = 0;\n"
+				"#define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK\n"
+				"#define _disable_texture_expand() _texture_flag_override = 0\n"
+				"#define TEX_FLAGS(index) (texture_parameters[index].flags | _texture_flag_override)\n";
+			}
+			else
+			{
+				OS <<
+				"#define TEX_FLAGS(index) texture_parameters[index].flags\n";
+			}
+
+			OS <<
+			"#define TEX_NAME(index) tex##index\n"
+			"#define TEX_NAME_STENCIL(index) tex##index##_stencil\n\n"
+
+			"#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x), TEX_FLAGS(index))\n"
+			"#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, bias), TEX_FLAGS(index))\n"
+			"#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, lod), TEX_FLAGS(index))\n"
+			"#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, dpdx, dpdy), TEX_FLAGS(index))\n"
+			"#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), coord2 * vec2(texture_parameters[index].scale.x, 1.)), TEX_FLAGS(index))\n"
+
+			"#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale), TEX_FLAGS(index))\n"
+			"#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale, bias), TEX_FLAGS(index))\n"
+			"#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), coord2 * texture_parameters[index].scale, lod), TEX_FLAGS(index))\n"
+			"#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].scale, dpdx, dpdy), TEX_FLAGS(index))\n"
+			"#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.)), TEX_FLAGS(index))\n"
+
+			"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].scale, texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n";
+
+			if (props.emulate_shadow_compare)
+			{
+				OS <<
+				"#define SHADOW_COORD(coord3, scale, flags) vec3(coord3.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord3.z : min(coord3.z, 1.0))\n"
+				"#define SHADOW_COORD_PROJ(coord4, scale, flags) vec4(coord4.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w)\n"
+				"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(coord3, texture_parameters[index].scale, TEX_FLAGS(index)))\n"
+				"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(coord4, texture_parameters[index].scale, TEX_FLAGS(index)))\n";
+			}
+			else
+			{
+				OS <<
+				"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].scale, 1.))\n"
+				"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.))\n";
+			}
+
+			OS <<
+			"#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), TEX_FLAGS(index))\n"
+			"#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), TEX_FLAGS(index))\n"
+			"#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), TEX_FLAGS(index))\n"
+			"#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord3, dpdx, dpdy), TEX_FLAGS(index))\n"
+			"#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), TEX_FLAGS(index))\n\n";
+		}
+
+		if (props.require_wpos)
+		{
+			OS <<
+			"vec4 get_wpos()\n"
+			"{\n"
+			"	float abs_scale = abs(wpos_scale);\n"
+			"	return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n"
+			"}\n\n";
+		}
+	}
+
+	void insert_fog_declaration(std::ostream& OS)
+	{
+		program_common::insert_fog_declaration(OS, "vec4", "fog_c");
+	}
+
+	std::string getFunctionImpl(FUNCTION f)
+	{
+		switch (f)
+		{
+		default:
+			abort();
+		case FUNCTION::FUNCTION_DP2:
+			return "$Ty(dot($0.xy, $1.xy))";
+		case FUNCTION::FUNCTION_DP2A:
+			return "$Ty(dot($0.xy, $1.xy) + $2.x)";
+		case FUNCTION::FUNCTION_DP3:
+			return "$Ty(dot($0.xyz, $1.xyz))";
+		case FUNCTION::FUNCTION_DP4:
+			return "$Ty(dot($0, $1))";
+		case FUNCTION::FUNCTION_DPH:
+			return "$Ty(dot(vec4($0.xyz, 1.0), $1))";
+		case FUNCTION::FUNCTION_SFL:
+			return "$Ty(0.)";
+		case FUNCTION::FUNCTION_STR:
+			return "$Ty(1.)";
+		case FUNCTION::FUNCTION_FRACT:
+			return "fract($0)";
+		case FUNCTION::FUNCTION_REFL:
+			return "reflect($0, $1)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D:
+			return "TEX1D($_i, $0.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_BIAS:
+			return "TEX1D_BIAS($_i, $0.x, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_PROJ:
+			return "TEX1D_PROJ($_i, $0.xy)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_LOD:
+			return "TEX1D_LOD($_i, $0.x, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_GRAD:
+			return "TEX1D_GRAD($_i, $0.x, $1.x, $2.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D:
+			return "TEX2D($_i, $0.xy)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_BIAS:
+			return "TEX2D_BIAS($_i, $0.xy, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_PROJ:
+			return "TEX2D_PROJ($_i, $0)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_LOD:
+			return "TEX2D_LOD($_i, $0.xy, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_GRAD:
+			return "TEX2D_GRAD($_i, $0.xy, $1.xy, $2.xy)";
+		case FUNCTION::FUNCTION_TEXTURE_SHADOW2D:
+			return "TEX2D_SHADOW($_i, $0.xyz)";
+		case FUNCTION::FUNCTION_TEXTURE_SHADOW2D_PROJ:
+			return "TEX2D_SHADOWPROJ($_i, $0)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE:
+			return "TEX3D($_i, $0.xyz)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_BIAS:
+			return "TEX3D_BIAS($_i, $0.xyz, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_PROJ:
+			return "TEX3D($_i, ($0.xyz / $0.w))";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_LOD:
+			return "TEX3D_LOD($_i, $0.xyz, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_GRAD:
+			return "TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D:
+			return "TEX3D($_i, $0.xyz)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_BIAS:
+			return "TEX3D_BIAS($_i, $0.xyz, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_PROJ:
+			return "TEX3D_PROJ($_i, $0)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_LOD:
+			return "TEX3D_LOD($_i, $0.xyz, $1.x)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_GRAD:
+			return "TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz)";
+		case FUNCTION::FUNCTION_DFDX:
+			return "dFdx($0)";
+		case FUNCTION::FUNCTION_DFDY:
+			return "dFdy($0)";
+		case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH1D:
+			return "textureLod($t, $0.x, 0)";
+		case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH2D:
+			return "textureLod($t, $0.xy, 0)";
+		case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH3D:
+		case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCHCUBE:
+			return "textureLod($t, $0.xyz, 0)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_DEPTH_RGBA:
+			return "TEX2D_DEPTH_RGBA8($_i, $0.xy)";
+		case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_DEPTH_RGBA_PROJ:
+			return "TEX2D_DEPTH_RGBA8($_i, ($0.xy / $0.w))";
+		}
+	}
+
+	void insert_subheader_block(std::ostream& OS)
+	{
+		// Global types and stuff
+		// Must be compatible with std140 packing rules
+		OS <<
+		"struct sampler_info\n"
+		"{\n"
+		"	vec2 scale;\n"
+		"	uint remap;\n"
+		"	uint flags;\n"
+		"};\n"
+		"\n";
+	}
+}
--- a/rpcs3/Emu/RSX/Common/GLSLCommon.h
+++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@ -4,6 +4,7 @@
 #include "Emu/Cell/PPUCallback.h"

 #include "Common/BufferUtils.h"
+#include "Common/GLSLCommon.h"
 #include "Common/texture_cache.h"
 #include "Common/surface_store.h"
 #include "Capture/rsx_capture.h"
@ -1853,7 +1854,7 @@ namespace rsx
 				if (tex.alpha_kill_enabled())
 				{
 					//alphakill can be ignored unless a valid comparison function is set
-					texture_control |= (1 << 4);
+					texture_control |= (1 << texture_control_bits::ALPHAKILL);
 				}

 				const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
@ -1865,37 +1866,34 @@ namespace rsx

 				if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR)
 				{
-					switch (format)
+					switch (sampler_descriptors[i]->format_class)
 					{
-					case CELL_GCM_TEXTURE_X16:
-					{
-						// A simple way to quickly read DEPTH16 data without shadow comparison
+					case RSX_FORMAT_CLASS_DEPTH16_FLOAT:
+					case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32:
+						texture_control |= (1 << texture_control_bits::DEPTH_FLOAT);
+						break;
+					default:
 						break;
 					}
+
+					switch (format)
+					{
 					case CELL_GCM_TEXTURE_A8R8G8B8:
 					case CELL_GCM_TEXTURE_D8R8G8B8:
 					{
-						// Reading depth data as XRGB8 is supported with in-shader conversion
-						// TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that)
-						u32 control_bits = sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32? (1u << 16) : 0u;
-						control_bits |= tex.remap() & 0xFFFF;
+						// Emulate bitcast in shader
 						current_fragment_program.redirected_textures |= (1 << i);
-						current_fragment_program.texture_scale[i][2] = std::bit_cast<f32>(control_bits);
+						const auto float_en = (sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)? 1 : 0;
+						texture_control |= (float_en << texture_control_bits::DEPTH_FLOAT);
 						break;
 					}
+					case CELL_GCM_TEXTURE_X16: // A simple way to quickly read DEPTH16 data without shadow comparison
 					case CELL_GCM_TEXTURE_DEPTH16:
-					case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
 					case CELL_GCM_TEXTURE_DEPTH24_D8:
+					case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
 					case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
 					{
-						const auto compare_mode = tex.zfunc();
-						if (!tex.alpha_kill_enabled() &&
-							compare_mode < rsx::comparison_function::always &&
-							compare_mode > rsx::comparison_function::never)
-						{
-							current_fragment_program.shadow_textures |= (1 << i);
-							texture_control |= u32(tex.zfunc()) << 8;
-						}
+						// Supported formats, nothing to do
 						break;
 					}
 					default:
@ -1912,7 +1910,7 @@ namespace rsx
 					case CELL_GCM_TEXTURE_R5G5B5A1:
 					case CELL_GCM_TEXTURE_R5G6B5:
 					case CELL_GCM_TEXTURE_R6G5B5:
-						texture_control |= (1 << 5);
+						texture_control |= (1 << texture_control_bits::RENORMALIZE);
 						break;
 					default:
 						break;
@ -1932,14 +1930,14 @@ namespace rsx
 					const auto remap_ctrl = (tex.remap() >> 8) & 0xAA;
 					if (remap_ctrl == 0xAA)
 					{
-						argb8_convert |= (sign_convert & 0xFu) << 6;
+						argb8_convert |= (sign_convert & 0xFu) << texture_control_bits::EXPAND_OFFSET;
 					}
 					else
 					{
-						if (remap_ctrl & 0x03) argb8_convert |= (sign_convert & 0x1u) << 6;
-						if (remap_ctrl & 0x0C) argb8_convert |= (sign_convert & 0x2u) << 6;
-						if (remap_ctrl & 0x30) argb8_convert |= (sign_convert & 0x4u) << 6;
-						if (remap_ctrl & 0xC0) argb8_convert |= (sign_convert & 0x8u) << 6;
+						if (remap_ctrl & 0x03) argb8_convert |= (sign_convert & 0x1u) << texture_control_bits::EXPAND_OFFSET;
+						if (remap_ctrl & 0x0C) argb8_convert |= (sign_convert & 0x2u) << texture_control_bits::EXPAND_OFFSET;
+						if (remap_ctrl & 0x30) argb8_convert |= (sign_convert & 0x4u) << texture_control_bits::EXPAND_OFFSET;
+						if (remap_ctrl & 0xC0) argb8_convert |= (sign_convert & 0x8u) << texture_control_bits::EXPAND_OFFSET;
 					}
 				}

--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@ -345,6 +345,7 @@
    <ClCompile Include="Emu\RSX\CgBinaryVertexProgram.cpp" />
    <ClCompile Include="Emu\RSX\Common\BufferUtils.cpp" />
    <ClCompile Include="Emu\RSX\Common\FragmentProgramDecompiler.cpp" />
+    <ClCompile Include="Emu\RSX\Common\GLSLCommon.cpp" />
    <ClCompile Include="Emu\RSX\Common\ProgramStateCache.cpp" />
    <ClCompile Include="Emu\RSX\Common\surface_store.cpp" />
    <ClCompile Include="Emu\RSX\Common\TextureUtils.cpp" />
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@ -206,6 +206,9 @@
    <ClCompile Include="Emu\RSX\Common\BufferUtils.cpp">
      <Filter>Emu\GPU\RSX\Common</Filter>
    </ClCompile>
+    <ClCompile Include="Emu\RSX\Common\GLSLCommon.cpp">
+      <Filter>Emu\GPU\RSX\Common</Filter>
+    </ClCompile>
    <ClCompile Include="Emu\RSX\Null\NullGSRender.cpp">
      <Filter>Emu\GPU\RSX\Null</Filter>
    </ClCompile>