From c2e32371f6ecda152efe5ec579bea9dd77b0a2ff Mon Sep 17 00:00:00 2001 From: Orphis Date: Tue, 22 Jun 2010 00:52:17 +0000 Subject: [PATCH] Refactor and prepare the OpenCL texture decoder for decoding textures to RGBA format required by DX11. Fix the decoder codepath when OpenCL is enabled and the DX11 plugin is used. Added the DX11 plugin to the Dolphin project dependencies. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5764 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Src/OpenCL/OCLTextureDecoder.cpp | 176 +++++++----------- .../Src/OpenCL/OCLTextureDecoder.h | 2 +- .../Core/VideoCommon/Src/TextureDecoder.cpp | 12 +- Source/Dolphin.sln | 1 + 4 files changed, 81 insertions(+), 110 deletions(-) diff --git a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp index 644279211d..73fe94879e 100644 --- a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp @@ -31,24 +31,53 @@ //#define DEBUG_OPENCL -struct sDecoders -{ - const char name[256]; // kernel name - cl_kernel kernel; // compute kernel +cl_program g_program; + +struct sDecoderParameter +{ + char* name; + cl_kernel kernel; + float sizeOfSrc; + float sizeOfDst; + int xSkip; + int ySkip; + PC_TexFormat format; }; -cl_program g_program; -// NULL terminated set of kernels -sDecoders Decoders[] = { -{"DecodeI4", NULL}, -{"DecodeI8", NULL}, -{"DecodeIA4", NULL}, -{"DecodeIA8", NULL}, -{"DecodeRGBA8", NULL}, -{"DecodeRGB565", NULL}, -{"DecodeRGB5A3", NULL}, -{"DecodeCMPR", NULL}, -{"", NULL}, +sDecoderParameter g_DecodeParametersNative[] = { + /* GX_TF_I4 */ { "DecodeI4", NULL, 0.5f, 1, 8, 8, PC_TEX_FMT_I4_AS_I8 }, + /* GX_TF_I8 */ { "DecodeI8", NULL, 1, 1, 8, 4, PC_TEX_FMT_I8 }, + /* GX_TF_IA4 */ { "DecodeIA4", NULL, 1, 2, 8, 4, PC_TEX_FMT_IA4_AS_IA8 }, + /* GX_TF_IA8 */ { "DecodeIA8", NULL, 2, 2, 4, 4, PC_TEX_FMT_IA8 }, + /* GX_TF_RGB565 */ { "DecodeRGB565", NULL, 2, 2, 4, 4, PC_TEX_FMT_RGB565 }, + /* GX_TF_RGB5A3 */ { "DecodeRGB5A3", NULL, 2, 4, 4, 4, PC_TEX_FMT_BGRA32 }, + /* GX_TF_RGBA8 */ { "DecodeRGBA8", NULL, 4, 4, 4, 4, PC_TEX_FMT_BGRA32 }, + /* 7 */ { NULL }, + /* GX_TF_C4 */ { NULL }, + /* GX_TF_C8 */ { NULL }, + /* GX_TF_C14X2 */ { NULL }, + /* B */ { NULL }, + /* C */ { NULL }, + /* D */ { NULL }, + /* GX_TF_CMPR */ { "DecodeCMPR", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_BGRA32 }, +}; + +sDecoderParameter g_DecodeParametersRGBA[] = { + /* GX_TF_I4 */ { "DecodeI4_RGBA", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_RGBA32 }, + /* GX_TF_I8 */ { "DecodeI8_RGBA", NULL, 1, 4, 8, 4, PC_TEX_FMT_RGBA32 }, + /* GX_TF_IA4 */ { "DecodeIA4_RGBA", NULL, 1, 4, 8, 4, PC_TEX_FMT_RGBA32 }, + /* GX_TF_IA8 */ { "DecodeIA8_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 }, + /* GX_TF_RGB565 */ { "DecodeRGB565_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 }, + /* GX_TF_RGB5A3 */ { "DecodeRGB5A3_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 }, + /* GX_TF_RGBA8 */ { "DecodeRGBA8_RGBA", NULL, 4, 4, 4, 4, PC_TEX_FMT_RGBA32 }, + /* 7 */ { NULL }, + /* GX_TF_C4 */ { NULL }, + /* GX_TF_C8 */ { NULL }, + /* GX_TF_C14X2 */ { NULL }, + /* B */ { NULL }, + /* C */ { NULL }, + /* D */ { NULL }, + /* GX_TF_CMPR */ { "DecodeCMPR_RGBA", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_RGBA32 }, }; bool g_Inited = false; @@ -73,10 +102,13 @@ void TexDecoder_OpenCL_Initialize() { g_program = OpenCL::CompileProgram(code.c_str()); int i = 0; - while(strlen(Decoders[i].name) > 0) { - Decoders[i].kernel = OpenCL::CompileKernel(g_program, Decoders[i].name); - i++; - } + for(int i = 0; i < GX_TF_CMPR; ++i) { + if(g_DecodeParametersNative[i].name) + g_DecodeParametersNative[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersNative[i].name); + + if(false && g_DecodeParametersRGBA[i].name) + g_DecodeParametersRGBA[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersRGBA[i].name); + } // Allocating maximal Wii texture size in advance, so that we don't have to allocate/deallocate per texture #ifndef DEBUG_OPENCL @@ -94,11 +126,14 @@ void TexDecoder_OpenCL_Shutdown() { clReleaseProgram(g_program); int i = 0; - while(strlen(Decoders[i].name) > 0) - { - clReleaseKernel(Decoders[i].kernel); - i++; - } + + for(int i = 0; i < GX_TF_CMPR; ++i) { + if(g_DecodeParametersNative[i].kernel) + clReleaseKernel(g_DecodeParametersNative[i].kernel); + + if(g_DecodeParametersRGBA[i].kernel) + clReleaseKernel(g_DecodeParametersRGBA[i].kernel); + } if(g_clsrc) clReleaseMemObject(g_clsrc); @@ -110,93 +145,26 @@ void TexDecoder_OpenCL_Shutdown() { #endif } -PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt, bool rgba) { #if defined(HAVE_OPENCL) && HAVE_OPENCL cl_int err; - cl_kernel kernelToRun = Decoders[0].kernel; - float sizeOfDst = sizeof(u8), sizeOfSrc = sizeof(u8), xSkip, ySkip; - PC_TexFormat formatResult; - - switch(texformat) - { - case GX_TF_I4: - kernelToRun = Decoders[0].kernel; - sizeOfSrc = sizeof(u8) / 2.0f; - sizeOfDst = sizeof(u8); - xSkip = 8; - ySkip = 8; - formatResult = PC_TEX_FMT_I4_AS_I8; - break; - case GX_TF_I8: - kernelToRun = Decoders[1].kernel; - sizeOfSrc = sizeOfDst = sizeof(u8); - xSkip = 8; - ySkip = 4; - formatResult = PC_TEX_FMT_I8; - break; - case GX_TF_IA4: - kernelToRun = Decoders[2].kernel; - sizeOfSrc = sizeof(u8); - sizeOfDst = sizeof(u16); - xSkip = 8; - ySkip = 4; - formatResult = PC_TEX_FMT_IA4_AS_IA8; - break; - case GX_TF_IA8: - kernelToRun = Decoders[3].kernel; - sizeOfSrc = sizeOfDst = sizeof(u16); - xSkip = 4; - ySkip = 4; - formatResult = PC_TEX_FMT_IA8; - break; - case GX_TF_RGBA8: - kernelToRun = Decoders[4].kernel; - sizeOfSrc = sizeOfDst = sizeof(u32); - xSkip = 4; - ySkip = 4; - formatResult = PC_TEX_FMT_BGRA32; - break; - case GX_TF_RGB565: - kernelToRun = Decoders[5].kernel; - sizeOfSrc = sizeOfDst = sizeof(u16); - xSkip = 4; - ySkip = 4; - formatResult = PC_TEX_FMT_RGB565; - break; - case GX_TF_RGB5A3: - // Reported issues with Sonic Adventure 2: Battle opening sequence? - kernelToRun = Decoders[6].kernel; - sizeOfSrc = sizeof(u16); - sizeOfDst = sizeof(u32); - xSkip = 4; - ySkip = 4; - formatResult = PC_TEX_FMT_BGRA32; - break; - case GX_TF_CMPR: - kernelToRun = Decoders[7].kernel; - sizeOfSrc = sizeof(u8) / 2.0f; - sizeOfDst = sizeof(u32); - xSkip = 8; - ySkip = 8; - formatResult = PC_TEX_FMT_BGRA32; - break; - default: - return PC_TEX_FMT_NONE; - } + sDecoderParameter& decoder = rgba ? g_DecodeParametersRGBA[texformat] : g_DecodeParametersNative[texformat]; + if(!decoder.name || !decoder.kernel || decoder.format == PC_TEX_FMT_NONE) + return PC_TEX_FMT_NONE; #ifdef DEBUG_OPENCL g_clsrc = clCreateBuffer(OpenCL::GetContext(), CL_MEM_READ_ONLY , 1024 * 1024 * sizeof(u32), NULL, NULL); g_cldst = clCreateBuffer(OpenCL::GetContext(), CL_MEM_WRITE_ONLY, 1024 * 1024 * sizeof(u32), NULL, NULL); #endif - clEnqueueWriteBuffer(OpenCL::GetCommandQueue(), g_clsrc, CL_TRUE, 0, (size_t)(width * height * sizeOfSrc), src, 0, NULL, NULL); + clEnqueueWriteBuffer(OpenCL::GetCommandQueue(), g_clsrc, CL_TRUE, 0, (size_t)(width * height * decoder.sizeOfSrc), src, 0, NULL, NULL); - clSetKernelArg(kernelToRun, 0, sizeof(cl_mem), &g_cldst); - clSetKernelArg(kernelToRun, 1, sizeof(cl_mem), &g_clsrc); - clSetKernelArg(kernelToRun, 2, sizeof(cl_int), &width); + clSetKernelArg(decoder.kernel, 0, sizeof(cl_mem), &g_cldst); + clSetKernelArg(decoder.kernel, 1, sizeof(cl_mem), &g_clsrc); + clSetKernelArg(decoder.kernel, 2, sizeof(cl_int), &width); - size_t global[] = { (size_t)(width / xSkip), (size_t)(height / ySkip) }; + size_t global[] = { (size_t)(width / decoder.xSkip), (size_t)(height / decoder.ySkip) }; // No work-groups for now /* @@ -206,20 +174,20 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei PanicAlert("Error obtaining work-group information"); */ - err = clEnqueueNDRangeKernel(OpenCL::GetCommandQueue(), kernelToRun, 2, NULL, global, NULL, 0, NULL, NULL); + err = clEnqueueNDRangeKernel(OpenCL::GetCommandQueue(), decoder.kernel, 2, NULL, global, NULL, 0, NULL, NULL); if(err) OpenCL::HandleCLError(err, "Failed to enqueue kernel"); clFinish(OpenCL::GetCommandQueue()); - clEnqueueReadBuffer(OpenCL::GetCommandQueue(), g_cldst, CL_TRUE, 0, (size_t)(width * height * sizeOfDst), dst, 0, NULL, NULL); + clEnqueueReadBuffer(OpenCL::GetCommandQueue(), g_cldst, CL_TRUE, 0, (size_t)(width * height * decoder.sizeOfDst), dst, 0, NULL, NULL); #ifdef DEBUG_OPENCL clReleaseMemObject(g_clsrc); clReleaseMemObject(g_cldst); #endif - return formatResult; + return decoder.format; #else return PC_TEX_FMT_NONE; #endif diff --git a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.h b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.h index 43a59dab8f..c821f3132f 100644 --- a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.h +++ b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.h @@ -23,6 +23,6 @@ void TexDecoder_OpenCL_Initialize(); void TexDecoder_OpenCL_Shutdown(); -PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt); +PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt, bool rgba); #endif diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index 91cf47b7f9..ae41f822c2 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -1266,13 +1266,15 @@ void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center) PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly) { + PC_TexFormat retval = PC_TEX_FMT_NONE; + #if defined(HAVE_OPENCL) && HAVE_OPENCL - PC_TexFormat retval = TexDecoder_Decode_OpenCL(dst, src, width, height, texformat, tlutaddr, tlutfmt); - if(retval == PC_TEX_FMT_NONE) - retval = TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt); -#else - PC_TexFormat retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst,src,width,height,texformat,tlutaddr,tlutfmt) : TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt); + retval = TexDecoder_Decode_OpenCL(dst, src, width, height, texformat, tlutaddr, tlutfmt, rgbaOnly); #endif + + if(retval == PC_TEX_FMT_NONE) + retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst,src,width,height,texformat,tlutaddr,tlutfmt) : TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt); + if ((!TexFmt_Overlay_Enable)|| (retval == PC_TEX_FMT_NONE)) return retval; diff --git a/Source/Dolphin.sln b/Source/Dolphin.sln index da93d658cf..2fb10790a1 100644 --- a/Source/Dolphin.sln +++ b/Source/Dolphin.sln @@ -47,6 +47,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Core\Common\Commo EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Dolphin", "Core\DolphinWX\DolphinWX.vcproj", "{A72606EF-C5C1-4954-90AD-F0F93A8D97D9}" ProjectSection(ProjectDependencies) = postProject + {21DBE606-2958-43AC-A14E-B6B798D56554} = {21DBE606-2958-43AC-A14E-B6B798D56554} {C7E5D50A-2916-464B-86A7-E10B3CC88ADA} = {C7E5D50A-2916-464B-86A7-E10B3CC88ADA} {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} = {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} {D6E56527-BBB9-4EAD-A6EC-49D4BF6AFCD8} = {D6E56527-BBB9-4EAD-A6EC-49D4BF6AFCD8}