From 82439327fab3af99c6ae880d16bdc81595e5098d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 26 Jun 2022 17:59:09 +0300 Subject: [PATCH] gl: Support loading data from SSBO using compute shaders - Gives better performance than using raw draw calls. - Does not work with all formats. The draw call version is still used when needed. --- rpcs3/Emu/RSX/GL/GLCompute.cpp | 43 ++++ rpcs3/Emu/RSX/GL/GLCompute.h | 7 + rpcs3/Emu/RSX/GL/GLOverlays.cpp | 8 +- rpcs3/Emu/RSX/GL/GLOverlays.h | 4 +- rpcs3/Emu/RSX/GL/GLProcTable.h | 3 + rpcs3/Emu/RSX/GL/GLTexture.cpp | 24 ++- rpcs3/Emu/RSX/GL/glutils/common.h | 1 + rpcs3/Emu/RSX/GL/glutils/image.cpp | 25 ++- rpcs3/Emu/RSX/GL/glutils/image.h | 27 +-- .../GLSLSnippets/CopyBufferToColorImage.glsl | 188 ++++++++++++++++++ ...age.glsl => CopyBufferToGenericImage.glsl} | 78 ++------ rpcs3/emucore.vcxproj | 3 +- rpcs3/emucore.vcxproj.filters | 9 +- 13 files changed, 326 insertions(+), 94 deletions(-) create mode 100644 rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToColorImage.glsl rename rpcs3/Emu/RSX/Program/GLSLSnippets/{CopyBufferToImage.glsl => CopyBufferToGenericImage.glsl} (68%) diff --git a/rpcs3/Emu/RSX/GL/GLCompute.cpp b/rpcs3/Emu/RSX/GL/GLCompute.cpp index eb55642ef4..f0f21194e3 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.cpp +++ b/rpcs3/Emu/RSX/GL/GLCompute.cpp @@ -351,4 +351,47 @@ namespace gl const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size); compute_task::run(cmd, num_invocations); } + + cs_ssbo_to_color_image::cs_ssbo_to_color_image() + { + initialize(); + + const auto raw_data = + #include "../Program/GLSLSnippets/CopyBufferToColorImage.glsl" + ; + + const std::pair repl_list[] = + { + { "%set, ", "" }, + { "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) }, + { "%ws", std::to_string(optimal_group_size) }, + { "%wks", std::to_string(optimal_kernel_size) } + }; + + m_src = fmt::replace_all(raw_data, repl_list); + } + + void cs_ssbo_to_color_image::run(gl::command_context& cmd, const buffer* src, const texture_view* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout) + { + const auto row_pitch = static_cast(dst_region.width); + const u32 bpp = dst->image()->pitch() / dst->image()->width(); + + m_program.uniforms["swap_bytes"] = layout.swap_bytes; + m_program.uniforms["src_pitch"] = row_pitch; + m_program.uniforms["format"] = static_cast(dst->image()->get_internal_format()); + m_program.uniforms["region_offset"] = color2i(dst_region.x, dst_region.y); + m_program.uniforms["region_size"] = color2i(dst_region.width, dst_region.height); + + src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_pitch * bpp * dst_region.height); + glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), dst->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, dst->view_format()); + + const int num_invocations = utils::aligned_div(dst_region.width * dst_region.height, optimal_kernel_size); + compute_task::run(cmd, num_invocations); + } + + void cs_ssbo_to_color_image::run(gl::command_context& cmd, const buffer* src, texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout) + { + gl::nil_texture_view view(dst); + run(cmd, src, &view, src_offset, dst_region, layout); + } } diff --git a/rpcs3/Emu/RSX/GL/GLCompute.h b/rpcs3/Emu/RSX/GL/GLCompute.h index 03dec70c49..ff62b01506 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.h +++ b/rpcs3/Emu/RSX/GL/GLCompute.h @@ -360,6 +360,13 @@ namespace gl void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) override; }; + struct cs_ssbo_to_color_image : compute_task + { + cs_ssbo_to_color_image(); + void run(gl::command_context& cmd, const buffer* src, const texture_view* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout); + void run(gl::command_context& cmd, const buffer* src, texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout); + }; + // TODO: Replace with a proper manager extern std::unordered_map> g_compute_tasks; diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 32f40b25ab..3d2f104574 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -595,14 +595,14 @@ namespace gl overlay_pass::run(cmd, viewport, GL_NONE, gl::image_aspect::color, false); } - rp_ssbo_to_texture::rp_ssbo_to_texture() + rp_ssbo_to_generic_texture::rp_ssbo_to_generic_texture() { vs_src = #include "../Program/GLSLSnippets/GenericVSPassthrough.glsl" ; fs_src = - #include "../Program/GLSLSnippets/CopyBufferToImage.glsl" + #include "../Program/GLSLSnippets/CopyBufferToGenericImage.glsl" ; std::pair repl_list[] = @@ -616,7 +616,7 @@ namespace gl fs_src = fmt::replace_all(fs_src, repl_list); } - void rp_ssbo_to_texture::run(gl::command_context& cmd, + void rp_ssbo_to_generic_texture::run(gl::command_context& cmd, const buffer* src, const texture_view* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout) @@ -634,7 +634,7 @@ namespace gl overlay_pass::run(cmd, dst_region, dst->id(), dst->aspect()); } - void rp_ssbo_to_texture::run(gl::command_context& cmd, + void rp_ssbo_to_generic_texture::run(gl::command_context& cmd, const buffer* src, texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout) diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index 92a2f31a6b..8a79b7913e 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -114,9 +114,9 @@ namespace gl void run(gl::command_context& cmd, const areau& viewport, const rsx::simple_array& source, f32 gamma, bool limited_rgb, bool _3d); }; - struct rp_ssbo_to_texture : public overlay_pass + struct rp_ssbo_to_generic_texture : public overlay_pass { - rp_ssbo_to_texture(); + rp_ssbo_to_generic_texture(); void run(gl::command_context& cmd, const buffer* src, texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout); void run(gl::command_context& cmd, const buffer* src, const texture_view* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout); }; diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 915f3e9268..28281c9e24 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -210,6 +210,9 @@ OPENGL_PROC(PFNGLNAMEDBUFFERDATAEXTPROC, NamedBufferDataEXT); OPENGL_PROC(PFNGLNAMEDBUFFERSUBDATAPROC, NamedBufferSubData); OPENGL_PROC(PFNGLNAMEDBUFFERSUBDATAEXTPROC, NamedBufferSubDataEXT); +// ARB_shader_image_load_store +OPENGL_PROC(PFNGLBINDIMAGETEXTUREPROC, BindImageTexture); + // Sampler Objects OPENGL_PROC(PFNGLGENSAMPLERSPROC, GenSamplers); OPENGL_PROC(PFNGLDELETESAMPLERSPROC, DeleteSamplers); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 86b7cab844..3a0958f578 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -631,7 +631,29 @@ namespace gl } } - gl::get_overlay_pass()->run(cmd, transfer_buf, scratch_view.get(), out_offset, image_region, unpack_info); + // If possible, decode using a compute transform to potentially have asynchronous scheduling + bool use_compute_transform = (dst->aspect() == gl::image_aspect::color); + switch (dst->get_internal_format()) + { + case texture::internal_format::bgr5a1: + case texture::internal_format::rgb5a1: + case texture::internal_format::rgb565: + case texture::internal_format::rgba4: + // Packed formats are a problem with image_load_store + use_compute_transform = false; + break; + default: + break; + } + + if (use_compute_transform) + { + gl::get_compute_task()->run(cmd, transfer_buf, scratch_view.get(), out_offset, image_region, unpack_info); + } + else + { + gl::get_overlay_pass()->run(cmd, transfer_buf, scratch_view.get(), out_offset, image_region, unpack_info); + } if (dst->get_target() == texture::target::texture3D) { diff --git a/rpcs3/Emu/RSX/GL/glutils/common.h b/rpcs3/Emu/RSX/GL/glutils/common.h index aecf24ad63..0819eb404b 100644 --- a/rpcs3/Emu/RSX/GL/glutils/common.h +++ b/rpcs3/Emu/RSX/GL/glutils/common.h @@ -21,6 +21,7 @@ #define GL_INTERPRETER_VERTEX_BLOCK SSBO_SLOT(0) #define GL_INTERPRETER_FRAGMENT_BLOCK SSBO_SLOT(1) #define GL_COMPUTE_BUFFER_SLOT(index) SSBO_SLOT(2 + index) +#define GL_COMPUTE_IMAGE_SLOT(index) UBO_SLOT(index) //Function call wrapped in ARB_DSA vs EXT_DSA compat check #define DSA_CALL(func, object_name, target, ...)\ diff --git a/rpcs3/Emu/RSX/GL/glutils/image.cpp b/rpcs3/Emu/RSX/GL/glutils/image.cpp index 9f268f0b1f..877177b8b7 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/image.cpp @@ -235,14 +235,15 @@ namespace gl void texture_view::create(texture* data, GLenum target, GLenum sized_format, const subresource_range& range, const GLenum* argb_swizzle) { m_target = target; - m_format = sizedfmt_to_ifmt(sized_format); + m_format = sized_format; + m_view_format = sizedfmt_to_ifmt(sized_format); m_image_data = data; m_aspect_flags = range.aspect_mask & data->aspect(); ensure(m_aspect_flags); glGenTextures(1, &m_id); - glTextureView(m_id, target, data->id(), m_format, range.min_level, range.num_levels, range.min_layer, range.num_layers); + glTextureView(m_id, target, data->id(), m_view_format, range.min_level, range.num_levels, range.min_layer, range.num_layers); if (argb_swizzle) { @@ -287,6 +288,26 @@ namespace gl cmd->bind_texture(layer, m_target, m_id); } + nil_texture_view::nil_texture_view(texture* data) + { + m_id = data->id(); + m_target = static_cast(data->get_target()); + m_format = static_cast(data->get_internal_format()); + m_view_format = sizedfmt_to_ifmt(m_format); + m_aspect_flags = data->aspect(); + m_image_data = data; + + component_swizzle[0] = GL_RED; + component_swizzle[1] = GL_GREEN; + component_swizzle[2] = GL_BLUE; + component_swizzle[3] = GL_ALPHA; + } + + nil_texture_view::~nil_texture_view() + { + m_id = GL_NONE; + } + texture_view* viewable_image::get_view(u32 remap_encoding, const std::pair, std::array>& remap_, GLenum aspect_flags) { auto remap = remap_; diff --git a/rpcs3/Emu/RSX/GL/glutils/image.h b/rpcs3/Emu/RSX/GL/glutils/image.h index 9a0f0bfff7..7e45ab7616 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.h +++ b/rpcs3/Emu/RSX/GL/glutils/image.h @@ -346,6 +346,7 @@ namespace gl GLuint m_id = GL_NONE; GLenum m_target = 0; GLenum m_format = 0; + GLenum m_view_format = 0; GLenum m_aspect_flags = 0; texture* m_image_data = nullptr; @@ -406,6 +407,11 @@ namespace gl return m_format; } + GLenum view_format() const + { + return m_view_format; + } + GLenum aspect() const { return m_aspect_flags; @@ -442,25 +448,8 @@ namespace gl class nil_texture_view : public texture_view { public: - nil_texture_view(texture* data) - : texture_view() - { - m_id = data->id(); - m_target = static_cast(data->get_target()); - m_format = static_cast(data->get_internal_format()); - m_aspect_flags = data->aspect(); - m_image_data = data; - - component_swizzle[0] = GL_RED; - component_swizzle[1] = GL_GREEN; - component_swizzle[2] = GL_BLUE; - component_swizzle[3] = GL_ALPHA; - } - - ~nil_texture_view() - { - m_id = GL_NONE; - } + nil_texture_view(texture* data); + ~nil_texture_view(); }; class viewable_image : public texture diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToColorImage.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToColorImage.glsl new file mode 100644 index 0000000000..46b421dbac --- /dev/null +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToColorImage.glsl @@ -0,0 +1,188 @@ +R"( +#version 450 +layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in; + +#define SSBO_LOCATION(x) (x + %loc) +#define IMAGE_LOCATION(x) (x) + +layout(%set, binding=IMAGE_LOCATION(0)) uniform writeonly restrict image2D output2D; + +#define FMT_GL_RGBA8 0x8058 +#define FMT_GL_BGRA8 0x80E1 +#define FMT_GL_R8 0x8229 +#define FMT_GL_R16 0x822A +#define FMT_GL_R32F 0x822E +#define FMT_GL_RG8 0x822B +#define FMT_GL_RG8_SNORM 0x8F95 +#define FMT_GL_RG16 0x822C +#define FMT_GL_RG16F 0x822F +#define FMT_GL_RGBA16F 0x881A +#define FMT_GL_RGBA32F 0x8814 + +#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8 +#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24 + +layout(%set, binding=SSBO_LOCATION(0), std430) readonly restrict buffer RawDataBlock +{ + uint data[]; +}; + +#if USE_UBO +layout(%push_block) uniform UnpackConfiguration +{ + uint swap_bytes; + uint src_pitch; + uint format; + uint reserved; + ivec2 region_offset; + ivec2 region_size; +}; +#else + uniform uint swap_bytes; + uniform uint src_pitch; + uniform uint format; + uniform ivec2 region_offset; + uniform ivec2 region_size; +#endif + +uint linear_invocation_id() +{ + uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x); + return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x; +} + +ivec2 linear_id_to_output_coord(uint index) +{ + return ivec2(int(index % src_pitch), int(index / src_pitch)); +} + +// Decoders. Beware of multi-wide swapped types (e.g swap(16x2) != swap(32x1)) +uint readUint8(const in uint address) +{ + const uint block = address / 4; + const uint offset = address % 4; + return bitfieldExtract(data[block], int(offset) * 8, 8); +} + +uint readUint16(const in uint address) +{ + const uint block = address / 2; + const uint offset = address % 2; + const uint value = bitfieldExtract(data[block], int(offset) * 16, 16); + + if (swap_bytes != 0) + { + return bswap_u16(value); + } + + return value; +} + +uint readUint32(const in uint address) +{ + const uint value = data[address]; + return (swap_bytes != 0) ? bswap_u32(value) : value; +} + +uvec2 readUint8x2(const in uint address) +{ + const uint raw = readUint16(address); + return uvec2(bitfieldExtract(raw, 0, 8), bitfieldExtract(raw, 8, 8)); +} + +ivec2 readInt8x2(const in uint address) +{ + const ivec2 raw = ivec2(readUint8x2(address)); + return raw - (ivec2(greaterThan(raw, ivec2(127))) * 256); +} + +#define readFixed8(address) readUint8(address) / 255.f +#define readFixed8x2(address) readUint8x2(address) / 255.f +#define readFixed8x2Snorm(address) readInt8x2(address) / 127.f + +vec4 readFixed8x4(const in uint address) +{ + const uint raw = readUint32(address); + return uvec4( + bitfieldExtract(raw, 0, 8), + bitfieldExtract(raw, 8, 8), + bitfieldExtract(raw, 16, 8), + bitfieldExtract(raw, 24, 8) + ) / 255.f; +} + +#define readFixed16(address) readUint16(uint(address)) / 65535.f +#define readFixed16x2(address) vec2(readFixed16(address * 2 + 0), readFixed16(address * 2 + 1)) +#define readFixed16x4(address) vec4(readFixed16(address * 4 + 0), readFixed16(address * 4 + 1), readFixed16(address * 4 + 2), readFixed16(address * 4 + 3)) + +#define readFloat16(address) unpackHalf2x16(readUint16(uint(address))).x +#define readFloat16x2(address) vec2(readFloat16(address * 2 + 0), readFloat16(address * 2 + 1)) +#define readFloat16x4(address) vec4(readFloat16(address * 4 + 0), readFloat16(address * 4 + 1), readFloat16(address * 4 + 2), readFloat16(address * 4 + 3)) + +#define readFloat32(address) uintBitsToFloat(readUint32(address)) +#define readFloat32x4(address) uintBitsToFloat(uvec4(readUint32(address * 4 + 0), readUint32(address * 4 + 1), readUint32(address * 4 + 2), readUint32(address * 4 + 3))) + +#define KERNEL_SIZE %wks + +void write_output(const in uint invocation_id) +{ + vec4 outColor; + uint utmp; + + switch (format) + { + // Simple color + case FMT_GL_RGBA8: + outColor = readFixed8x4(invocation_id); + break; + case FMT_GL_BGRA8: + outColor = readFixed8x4(invocation_id).bgra; + break; + case FMT_GL_R8: + outColor.r = readFixed8(invocation_id); + break; + case FMT_GL_R16: + outColor.r = readFixed16(invocation_id); + break; + case FMT_GL_R32F: + outColor.r = readFloat32(invocation_id); + break; + case FMT_GL_RG8: + outColor.rg = readFixed8x2(invocation_id); + break; + case FMT_GL_RG8_SNORM: + outColor.rg = readFixed8x2Snorm(invocation_id); + break; + case FMT_GL_RG16: + outColor.rg = readFixed16x2(invocation_id); + break; + case FMT_GL_RG16F: + outColor.rg = readFloat16x2(invocation_id); + break; + case FMT_GL_RGBA16F: + outColor = readFloat16x4(invocation_id); + break; + case FMT_GL_RGBA32F: + outColor = readFloat32x4(invocation_id); + break; + } + + const ivec2 coord = linear_id_to_output_coord(invocation_id); + if (any(greaterThan(coord, region_size))) + { + return; + } + + imageStore(output2D, coord + region_offset, outColor); +} + +void main() +{ + uint index = linear_invocation_id() * KERNEL_SIZE; + + for (int loop = 0; loop < KERNEL_SIZE; ++loop, ++index) + { + write_output(index); + } +} +)" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToImage.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToGenericImage.glsl similarity index 68% rename from rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToImage.glsl rename to rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToGenericImage.glsl index fdea953008..f73a54751d 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToImage.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToGenericImage.glsl @@ -9,26 +9,15 @@ R"( #define FMT_GL_DEPTH24_STENCIL8 0x88F0 #define FMT_GL_DEPTH32F_STENCIL8 0x8CAD -#define FMT_GL_RGBA8 0x8058 -#define FMT_GL_BGRA8 0x80E1 #define FMT_GL_RGB565 0x8D62 #define FMT_GL_RGB5_A1 0x8057 #define FMT_GL_BGR5_A1 0x99F0 #define FMT_GL_RGBA4 0x8056 -#define FMT_GL_R8 0x8229 -#define FMT_GL_R16 0x822A -#define FMT_GL_R32F 0x822E -#define FMT_GL_RG8 0x822B -#define FMT_GL_RG8_SNORM 0x8F95 -#define FMT_GL_RG16 0x822C -#define FMT_GL_RG16F 0x822F -#define FMT_GL_RGBA16F 0x881A -#define FMT_GL_RGBA32F 0x8814 #define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8 #define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24 -layout(location=0) out vec4 fragColor; +layout(location=0) out vec4 outColor; layout(%set, binding=%loc, std430) readonly restrict buffer RawDataBlock { @@ -165,68 +154,33 @@ void main() #endif - // Simple color - case FMT_GL_RGBA8: - fragColor = readFixed8x4(texel_address); - break; - case FMT_GL_BGRA8: - fragColor = readFixed8x4(texel_address).bgra; - break; - case FMT_GL_R8: - fragColor.r = readFixed8(texel_address); - break; - case FMT_GL_R16: - fragColor.r = readFixed16(texel_address); - break; - case FMT_GL_R32F: - fragColor.r = readFloat32(texel_address); - break; - case FMT_GL_RG8: - fragColor.rg = readFixed8x2(texel_address); - break; - case FMT_GL_RG8_SNORM: - fragColor.rg = readFixed8x2Snorm(texel_address); - break; - case FMT_GL_RG16: - fragColor.rg = readFixed16x2(texel_address); - break; - case FMT_GL_RG16F: - fragColor.rg = readFloat16x2(texel_address); - break; - case FMT_GL_RGBA16F: - fragColor = readFloat16x4(texel_address); - break; - case FMT_GL_RGBA32F: - fragColor = readFloat32x4(texel_address); - break; - // Packed color case FMT_GL_RGB565: utmp = readUint16(texel_address); - fragColor.b = bitfieldExtract(utmp, 0, 5) / 31.f; - fragColor.g = bitfieldExtract(utmp, 5, 6) / 63.f; - fragColor.r = bitfieldExtract(utmp, 11, 5) / 31.f; + outColor.b = bitfieldExtract(utmp, 0, 5) / 31.f; + outColor.g = bitfieldExtract(utmp, 5, 6) / 63.f; + outColor.r = bitfieldExtract(utmp, 11, 5) / 31.f; break; case FMT_GL_BGR5_A1: utmp = readUint16(texel_address); - fragColor.b = bitfieldExtract(utmp, 0, 5) / 31.f; - fragColor.g = bitfieldExtract(utmp, 5, 5) / 31.f; - fragColor.r = bitfieldExtract(utmp, 10, 5) / 31.f; - fragColor.a = bitfieldExtract(utmp, 15, 1) * 1.f; + outColor.b = bitfieldExtract(utmp, 0, 5) / 31.f; + outColor.g = bitfieldExtract(utmp, 5, 5) / 31.f; + outColor.r = bitfieldExtract(utmp, 10, 5) / 31.f; + outColor.a = bitfieldExtract(utmp, 15, 1) * 1.f; break; case FMT_GL_RGB5_A1: utmp = readUint16(texel_address); - fragColor.a = bitfieldExtract(utmp, 0, 1) * 1.f; - fragColor.b = bitfieldExtract(utmp, 1, 5) / 31.f; - fragColor.g = bitfieldExtract(utmp, 6, 5) / 31.f; - fragColor.r = bitfieldExtract(utmp, 11, 5) / 31.f; + outColor.a = bitfieldExtract(utmp, 0, 1) * 1.f; + outColor.b = bitfieldExtract(utmp, 1, 5) / 31.f; + outColor.g = bitfieldExtract(utmp, 6, 5) / 31.f; + outColor.r = bitfieldExtract(utmp, 11, 5) / 31.f; break; case FMT_GL_RGBA4: utmp = readUint16(texel_address); - fragColor.b = bitfieldExtract(utmp, 0, 4) / 15.f; - fragColor.g = bitfieldExtract(utmp, 4, 4) / 15.f; - fragColor.r = bitfieldExtract(utmp, 8, 4) / 15.f; - fragColor.a = bitfieldExtract(utmp, 12, 4) / 15.f; + outColor.b = bitfieldExtract(utmp, 0, 4) / 15.f; + outColor.g = bitfieldExtract(utmp, 4, 4) / 15.f; + outColor.r = bitfieldExtract(utmp, 8, 4) / 15.f; + outColor.a = bitfieldExtract(utmp, 12, 4) / 15.f; break; } } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 4155e3d736..475b4236bc 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -831,7 +831,8 @@ - + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index b5afd02822..a640af5b94 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2178,9 +2178,6 @@ Emu\GPU\RSX\Program\Snippets - - Emu\GPU\RSX\Program\Snippets - Emu\GPU\RSX\Program\Snippets @@ -2190,5 +2187,11 @@ Emu\GPU\RSX\Program\Snippets + + Emu\GPU\RSX\Program\Snippets + + + Emu\GPU\RSX\Program\Snippets +