Refactor and prepare the OpenCL texture decoder for decoding textures to RGBA format required by DX11.

Fix the decoder codepath when OpenCL is enabled and the DX11 plugin is used.
Added the DX11 plugin to the Dolphin project dependencies.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5764 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Orphis 2010-06-22 00:52:17 +00:00
parent 1198ae1c97
commit c2e32371f6
4 changed files with 81 additions and 110 deletions

View File

@ -31,24 +31,53 @@
//#define DEBUG_OPENCL
struct sDecoders
{
const char name[256]; // kernel name
cl_kernel kernel; // compute kernel
cl_program g_program;
struct sDecoderParameter
{
char* name;
cl_kernel kernel;
float sizeOfSrc;
float sizeOfDst;
int xSkip;
int ySkip;
PC_TexFormat format;
};
cl_program g_program;
// NULL terminated set of kernels
sDecoders Decoders[] = {
{"DecodeI4", NULL},
{"DecodeI8", NULL},
{"DecodeIA4", NULL},
{"DecodeIA8", NULL},
{"DecodeRGBA8", NULL},
{"DecodeRGB565", NULL},
{"DecodeRGB5A3", NULL},
{"DecodeCMPR", NULL},
{"", NULL},
sDecoderParameter g_DecodeParametersNative[] = {
/* GX_TF_I4 */ { "DecodeI4", NULL, 0.5f, 1, 8, 8, PC_TEX_FMT_I4_AS_I8 },
/* GX_TF_I8 */ { "DecodeI8", NULL, 1, 1, 8, 4, PC_TEX_FMT_I8 },
/* GX_TF_IA4 */ { "DecodeIA4", NULL, 1, 2, 8, 4, PC_TEX_FMT_IA4_AS_IA8 },
/* GX_TF_IA8 */ { "DecodeIA8", NULL, 2, 2, 4, 4, PC_TEX_FMT_IA8 },
/* GX_TF_RGB565 */ { "DecodeRGB565", NULL, 2, 2, 4, 4, PC_TEX_FMT_RGB565 },
/* GX_TF_RGB5A3 */ { "DecodeRGB5A3", NULL, 2, 4, 4, 4, PC_TEX_FMT_BGRA32 },
/* GX_TF_RGBA8 */ { "DecodeRGBA8", NULL, 4, 4, 4, 4, PC_TEX_FMT_BGRA32 },
/* 7 */ { NULL },
/* GX_TF_C4 */ { NULL },
/* GX_TF_C8 */ { NULL },
/* GX_TF_C14X2 */ { NULL },
/* B */ { NULL },
/* C */ { NULL },
/* D */ { NULL },
/* GX_TF_CMPR */ { "DecodeCMPR", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_BGRA32 },
};
sDecoderParameter g_DecodeParametersRGBA[] = {
/* GX_TF_I4 */ { "DecodeI4_RGBA", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_RGBA32 },
/* GX_TF_I8 */ { "DecodeI8_RGBA", NULL, 1, 4, 8, 4, PC_TEX_FMT_RGBA32 },
/* GX_TF_IA4 */ { "DecodeIA4_RGBA", NULL, 1, 4, 8, 4, PC_TEX_FMT_RGBA32 },
/* GX_TF_IA8 */ { "DecodeIA8_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 },
/* GX_TF_RGB565 */ { "DecodeRGB565_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 },
/* GX_TF_RGB5A3 */ { "DecodeRGB5A3_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 },
/* GX_TF_RGBA8 */ { "DecodeRGBA8_RGBA", NULL, 4, 4, 4, 4, PC_TEX_FMT_RGBA32 },
/* 7 */ { NULL },
/* GX_TF_C4 */ { NULL },
/* GX_TF_C8 */ { NULL },
/* GX_TF_C14X2 */ { NULL },
/* B */ { NULL },
/* C */ { NULL },
/* D */ { NULL },
/* GX_TF_CMPR */ { "DecodeCMPR_RGBA", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_RGBA32 },
};
bool g_Inited = false;
@ -73,10 +102,13 @@ void TexDecoder_OpenCL_Initialize() {
g_program = OpenCL::CompileProgram(code.c_str());
int i = 0;
while(strlen(Decoders[i].name) > 0) {
Decoders[i].kernel = OpenCL::CompileKernel(g_program, Decoders[i].name);
i++;
}
for(int i = 0; i < GX_TF_CMPR; ++i) {
if(g_DecodeParametersNative[i].name)
g_DecodeParametersNative[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersNative[i].name);
if(false && g_DecodeParametersRGBA[i].name)
g_DecodeParametersRGBA[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersRGBA[i].name);
}
// Allocating maximal Wii texture size in advance, so that we don't have to allocate/deallocate per texture
#ifndef DEBUG_OPENCL
@ -94,11 +126,14 @@ void TexDecoder_OpenCL_Shutdown() {
clReleaseProgram(g_program);
int i = 0;
while(strlen(Decoders[i].name) > 0)
{
clReleaseKernel(Decoders[i].kernel);
i++;
}
for(int i = 0; i < GX_TF_CMPR; ++i) {
if(g_DecodeParametersNative[i].kernel)
clReleaseKernel(g_DecodeParametersNative[i].kernel);
if(g_DecodeParametersRGBA[i].kernel)
clReleaseKernel(g_DecodeParametersRGBA[i].kernel);
}
if(g_clsrc)
clReleaseMemObject(g_clsrc);
@ -110,93 +145,26 @@ void TexDecoder_OpenCL_Shutdown() {
#endif
}
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt)
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt, bool rgba)
{
#if defined(HAVE_OPENCL) && HAVE_OPENCL
cl_int err;
cl_kernel kernelToRun = Decoders[0].kernel;
float sizeOfDst = sizeof(u8), sizeOfSrc = sizeof(u8), xSkip, ySkip;
PC_TexFormat formatResult;
switch(texformat)
{
case GX_TF_I4:
kernelToRun = Decoders[0].kernel;
sizeOfSrc = sizeof(u8) / 2.0f;
sizeOfDst = sizeof(u8);
xSkip = 8;
ySkip = 8;
formatResult = PC_TEX_FMT_I4_AS_I8;
break;
case GX_TF_I8:
kernelToRun = Decoders[1].kernel;
sizeOfSrc = sizeOfDst = sizeof(u8);
xSkip = 8;
ySkip = 4;
formatResult = PC_TEX_FMT_I8;
break;
case GX_TF_IA4:
kernelToRun = Decoders[2].kernel;
sizeOfSrc = sizeof(u8);
sizeOfDst = sizeof(u16);
xSkip = 8;
ySkip = 4;
formatResult = PC_TEX_FMT_IA4_AS_IA8;
break;
case GX_TF_IA8:
kernelToRun = Decoders[3].kernel;
sizeOfSrc = sizeOfDst = sizeof(u16);
xSkip = 4;
ySkip = 4;
formatResult = PC_TEX_FMT_IA8;
break;
case GX_TF_RGBA8:
kernelToRun = Decoders[4].kernel;
sizeOfSrc = sizeOfDst = sizeof(u32);
xSkip = 4;
ySkip = 4;
formatResult = PC_TEX_FMT_BGRA32;
break;
case GX_TF_RGB565:
kernelToRun = Decoders[5].kernel;
sizeOfSrc = sizeOfDst = sizeof(u16);
xSkip = 4;
ySkip = 4;
formatResult = PC_TEX_FMT_RGB565;
break;
case GX_TF_RGB5A3:
// Reported issues with Sonic Adventure 2: Battle opening sequence?
kernelToRun = Decoders[6].kernel;
sizeOfSrc = sizeof(u16);
sizeOfDst = sizeof(u32);
xSkip = 4;
ySkip = 4;
formatResult = PC_TEX_FMT_BGRA32;
break;
case GX_TF_CMPR:
kernelToRun = Decoders[7].kernel;
sizeOfSrc = sizeof(u8) / 2.0f;
sizeOfDst = sizeof(u32);
xSkip = 8;
ySkip = 8;
formatResult = PC_TEX_FMT_BGRA32;
break;
default:
return PC_TEX_FMT_NONE;
}
sDecoderParameter& decoder = rgba ? g_DecodeParametersRGBA[texformat] : g_DecodeParametersNative[texformat];
if(!decoder.name || !decoder.kernel || decoder.format == PC_TEX_FMT_NONE)
return PC_TEX_FMT_NONE;
#ifdef DEBUG_OPENCL
g_clsrc = clCreateBuffer(OpenCL::GetContext(), CL_MEM_READ_ONLY , 1024 * 1024 * sizeof(u32), NULL, NULL);
g_cldst = clCreateBuffer(OpenCL::GetContext(), CL_MEM_WRITE_ONLY, 1024 * 1024 * sizeof(u32), NULL, NULL);
#endif
clEnqueueWriteBuffer(OpenCL::GetCommandQueue(), g_clsrc, CL_TRUE, 0, (size_t)(width * height * sizeOfSrc), src, 0, NULL, NULL);
clEnqueueWriteBuffer(OpenCL::GetCommandQueue(), g_clsrc, CL_TRUE, 0, (size_t)(width * height * decoder.sizeOfSrc), src, 0, NULL, NULL);
clSetKernelArg(kernelToRun, 0, sizeof(cl_mem), &g_cldst);
clSetKernelArg(kernelToRun, 1, sizeof(cl_mem), &g_clsrc);
clSetKernelArg(kernelToRun, 2, sizeof(cl_int), &width);
clSetKernelArg(decoder.kernel, 0, sizeof(cl_mem), &g_cldst);
clSetKernelArg(decoder.kernel, 1, sizeof(cl_mem), &g_clsrc);
clSetKernelArg(decoder.kernel, 2, sizeof(cl_int), &width);
size_t global[] = { (size_t)(width / xSkip), (size_t)(height / ySkip) };
size_t global[] = { (size_t)(width / decoder.xSkip), (size_t)(height / decoder.ySkip) };
// No work-groups for now
/*
@ -206,20 +174,20 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
PanicAlert("Error obtaining work-group information");
*/
err = clEnqueueNDRangeKernel(OpenCL::GetCommandQueue(), kernelToRun, 2, NULL, global, NULL, 0, NULL, NULL);
err = clEnqueueNDRangeKernel(OpenCL::GetCommandQueue(), decoder.kernel, 2, NULL, global, NULL, 0, NULL, NULL);
if(err)
OpenCL::HandleCLError(err, "Failed to enqueue kernel");
clFinish(OpenCL::GetCommandQueue());
clEnqueueReadBuffer(OpenCL::GetCommandQueue(), g_cldst, CL_TRUE, 0, (size_t)(width * height * sizeOfDst), dst, 0, NULL, NULL);
clEnqueueReadBuffer(OpenCL::GetCommandQueue(), g_cldst, CL_TRUE, 0, (size_t)(width * height * decoder.sizeOfDst), dst, 0, NULL, NULL);
#ifdef DEBUG_OPENCL
clReleaseMemObject(g_clsrc);
clReleaseMemObject(g_cldst);
#endif
return formatResult;
return decoder.format;
#else
return PC_TEX_FMT_NONE;
#endif

View File

@ -23,6 +23,6 @@
void TexDecoder_OpenCL_Initialize();
void TexDecoder_OpenCL_Shutdown();
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt);
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt, bool rgba);
#endif

View File

@ -1266,13 +1266,15 @@ void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center)
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly)
{
PC_TexFormat retval = PC_TEX_FMT_NONE;
#if defined(HAVE_OPENCL) && HAVE_OPENCL
PC_TexFormat retval = TexDecoder_Decode_OpenCL(dst, src, width, height, texformat, tlutaddr, tlutfmt);
if(retval == PC_TEX_FMT_NONE)
retval = TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt);
#else
PC_TexFormat retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst,src,width,height,texformat,tlutaddr,tlutfmt) : TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt);
retval = TexDecoder_Decode_OpenCL(dst, src, width, height, texformat, tlutaddr, tlutfmt, rgbaOnly);
#endif
if(retval == PC_TEX_FMT_NONE)
retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst,src,width,height,texformat,tlutaddr,tlutfmt) : TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt);
if ((!TexFmt_Overlay_Enable)|| (retval == PC_TEX_FMT_NONE))
return retval;

View File

@ -47,6 +47,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Core\Common\Commo
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Dolphin", "Core\DolphinWX\DolphinWX.vcproj", "{A72606EF-C5C1-4954-90AD-F0F93A8D97D9}"
ProjectSection(ProjectDependencies) = postProject
{21DBE606-2958-43AC-A14E-B6B798D56554} = {21DBE606-2958-43AC-A14E-B6B798D56554}
{C7E5D50A-2916-464B-86A7-E10B3CC88ADA} = {C7E5D50A-2916-464B-86A7-E10B3CC88ADA}
{CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} = {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160}
{D6E56527-BBB9-4EAD-A6EC-49D4BF6AFCD8} = {D6E56527-BBB9-4EAD-A6EC-49D4BF6AFCD8}