Refactor and prepare the OpenCL texture decoder for decoding textures to RGBA format required by DX11.
Fix the decoder codepath when OpenCL is enabled and the DX11 plugin is used. Added the DX11 plugin to the Dolphin project dependencies. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5764 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
1198ae1c97
commit
c2e32371f6
|
@ -31,24 +31,53 @@
|
|||
|
||||
//#define DEBUG_OPENCL
|
||||
|
||||
struct sDecoders
|
||||
{
|
||||
const char name[256]; // kernel name
|
||||
cl_kernel kernel; // compute kernel
|
||||
cl_program g_program;
|
||||
|
||||
struct sDecoderParameter
|
||||
{
|
||||
char* name;
|
||||
cl_kernel kernel;
|
||||
float sizeOfSrc;
|
||||
float sizeOfDst;
|
||||
int xSkip;
|
||||
int ySkip;
|
||||
PC_TexFormat format;
|
||||
};
|
||||
|
||||
cl_program g_program;
|
||||
// NULL terminated set of kernels
|
||||
sDecoders Decoders[] = {
|
||||
{"DecodeI4", NULL},
|
||||
{"DecodeI8", NULL},
|
||||
{"DecodeIA4", NULL},
|
||||
{"DecodeIA8", NULL},
|
||||
{"DecodeRGBA8", NULL},
|
||||
{"DecodeRGB565", NULL},
|
||||
{"DecodeRGB5A3", NULL},
|
||||
{"DecodeCMPR", NULL},
|
||||
{"", NULL},
|
||||
sDecoderParameter g_DecodeParametersNative[] = {
|
||||
/* GX_TF_I4 */ { "DecodeI4", NULL, 0.5f, 1, 8, 8, PC_TEX_FMT_I4_AS_I8 },
|
||||
/* GX_TF_I8 */ { "DecodeI8", NULL, 1, 1, 8, 4, PC_TEX_FMT_I8 },
|
||||
/* GX_TF_IA4 */ { "DecodeIA4", NULL, 1, 2, 8, 4, PC_TEX_FMT_IA4_AS_IA8 },
|
||||
/* GX_TF_IA8 */ { "DecodeIA8", NULL, 2, 2, 4, 4, PC_TEX_FMT_IA8 },
|
||||
/* GX_TF_RGB565 */ { "DecodeRGB565", NULL, 2, 2, 4, 4, PC_TEX_FMT_RGB565 },
|
||||
/* GX_TF_RGB5A3 */ { "DecodeRGB5A3", NULL, 2, 4, 4, 4, PC_TEX_FMT_BGRA32 },
|
||||
/* GX_TF_RGBA8 */ { "DecodeRGBA8", NULL, 4, 4, 4, 4, PC_TEX_FMT_BGRA32 },
|
||||
/* 7 */ { NULL },
|
||||
/* GX_TF_C4 */ { NULL },
|
||||
/* GX_TF_C8 */ { NULL },
|
||||
/* GX_TF_C14X2 */ { NULL },
|
||||
/* B */ { NULL },
|
||||
/* C */ { NULL },
|
||||
/* D */ { NULL },
|
||||
/* GX_TF_CMPR */ { "DecodeCMPR", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_BGRA32 },
|
||||
};
|
||||
|
||||
sDecoderParameter g_DecodeParametersRGBA[] = {
|
||||
/* GX_TF_I4 */ { "DecodeI4_RGBA", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_RGBA32 },
|
||||
/* GX_TF_I8 */ { "DecodeI8_RGBA", NULL, 1, 4, 8, 4, PC_TEX_FMT_RGBA32 },
|
||||
/* GX_TF_IA4 */ { "DecodeIA4_RGBA", NULL, 1, 4, 8, 4, PC_TEX_FMT_RGBA32 },
|
||||
/* GX_TF_IA8 */ { "DecodeIA8_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 },
|
||||
/* GX_TF_RGB565 */ { "DecodeRGB565_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 },
|
||||
/* GX_TF_RGB5A3 */ { "DecodeRGB5A3_RGBA", NULL, 2, 4, 4, 4, PC_TEX_FMT_RGBA32 },
|
||||
/* GX_TF_RGBA8 */ { "DecodeRGBA8_RGBA", NULL, 4, 4, 4, 4, PC_TEX_FMT_RGBA32 },
|
||||
/* 7 */ { NULL },
|
||||
/* GX_TF_C4 */ { NULL },
|
||||
/* GX_TF_C8 */ { NULL },
|
||||
/* GX_TF_C14X2 */ { NULL },
|
||||
/* B */ { NULL },
|
||||
/* C */ { NULL },
|
||||
/* D */ { NULL },
|
||||
/* GX_TF_CMPR */ { "DecodeCMPR_RGBA", NULL, 0.5f, 4, 8, 8, PC_TEX_FMT_RGBA32 },
|
||||
};
|
||||
|
||||
bool g_Inited = false;
|
||||
|
@ -73,10 +102,13 @@ void TexDecoder_OpenCL_Initialize() {
|
|||
g_program = OpenCL::CompileProgram(code.c_str());
|
||||
|
||||
int i = 0;
|
||||
while(strlen(Decoders[i].name) > 0) {
|
||||
Decoders[i].kernel = OpenCL::CompileKernel(g_program, Decoders[i].name);
|
||||
i++;
|
||||
}
|
||||
for(int i = 0; i < GX_TF_CMPR; ++i) {
|
||||
if(g_DecodeParametersNative[i].name)
|
||||
g_DecodeParametersNative[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersNative[i].name);
|
||||
|
||||
if(false && g_DecodeParametersRGBA[i].name)
|
||||
g_DecodeParametersRGBA[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersRGBA[i].name);
|
||||
}
|
||||
|
||||
// Allocating maximal Wii texture size in advance, so that we don't have to allocate/deallocate per texture
|
||||
#ifndef DEBUG_OPENCL
|
||||
|
@ -94,11 +126,14 @@ void TexDecoder_OpenCL_Shutdown() {
|
|||
|
||||
clReleaseProgram(g_program);
|
||||
int i = 0;
|
||||
while(strlen(Decoders[i].name) > 0)
|
||||
{
|
||||
clReleaseKernel(Decoders[i].kernel);
|
||||
i++;
|
||||
}
|
||||
|
||||
for(int i = 0; i < GX_TF_CMPR; ++i) {
|
||||
if(g_DecodeParametersNative[i].kernel)
|
||||
clReleaseKernel(g_DecodeParametersNative[i].kernel);
|
||||
|
||||
if(g_DecodeParametersRGBA[i].kernel)
|
||||
clReleaseKernel(g_DecodeParametersRGBA[i].kernel);
|
||||
}
|
||||
|
||||
if(g_clsrc)
|
||||
clReleaseMemObject(g_clsrc);
|
||||
|
@ -110,93 +145,26 @@ void TexDecoder_OpenCL_Shutdown() {
|
|||
#endif
|
||||
}
|
||||
|
||||
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt)
|
||||
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt, bool rgba)
|
||||
{
|
||||
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
||||
cl_int err;
|
||||
cl_kernel kernelToRun = Decoders[0].kernel;
|
||||
float sizeOfDst = sizeof(u8), sizeOfSrc = sizeof(u8), xSkip, ySkip;
|
||||
PC_TexFormat formatResult;
|
||||
|
||||
switch(texformat)
|
||||
{
|
||||
case GX_TF_I4:
|
||||
kernelToRun = Decoders[0].kernel;
|
||||
sizeOfSrc = sizeof(u8) / 2.0f;
|
||||
sizeOfDst = sizeof(u8);
|
||||
xSkip = 8;
|
||||
ySkip = 8;
|
||||
formatResult = PC_TEX_FMT_I4_AS_I8;
|
||||
break;
|
||||
case GX_TF_I8:
|
||||
kernelToRun = Decoders[1].kernel;
|
||||
sizeOfSrc = sizeOfDst = sizeof(u8);
|
||||
xSkip = 8;
|
||||
ySkip = 4;
|
||||
formatResult = PC_TEX_FMT_I8;
|
||||
break;
|
||||
case GX_TF_IA4:
|
||||
kernelToRun = Decoders[2].kernel;
|
||||
sizeOfSrc = sizeof(u8);
|
||||
sizeOfDst = sizeof(u16);
|
||||
xSkip = 8;
|
||||
ySkip = 4;
|
||||
formatResult = PC_TEX_FMT_IA4_AS_IA8;
|
||||
break;
|
||||
case GX_TF_IA8:
|
||||
kernelToRun = Decoders[3].kernel;
|
||||
sizeOfSrc = sizeOfDst = sizeof(u16);
|
||||
xSkip = 4;
|
||||
ySkip = 4;
|
||||
formatResult = PC_TEX_FMT_IA8;
|
||||
break;
|
||||
case GX_TF_RGBA8:
|
||||
kernelToRun = Decoders[4].kernel;
|
||||
sizeOfSrc = sizeOfDst = sizeof(u32);
|
||||
xSkip = 4;
|
||||
ySkip = 4;
|
||||
formatResult = PC_TEX_FMT_BGRA32;
|
||||
break;
|
||||
case GX_TF_RGB565:
|
||||
kernelToRun = Decoders[5].kernel;
|
||||
sizeOfSrc = sizeOfDst = sizeof(u16);
|
||||
xSkip = 4;
|
||||
ySkip = 4;
|
||||
formatResult = PC_TEX_FMT_RGB565;
|
||||
break;
|
||||
case GX_TF_RGB5A3:
|
||||
// Reported issues with Sonic Adventure 2: Battle opening sequence?
|
||||
kernelToRun = Decoders[6].kernel;
|
||||
sizeOfSrc = sizeof(u16);
|
||||
sizeOfDst = sizeof(u32);
|
||||
xSkip = 4;
|
||||
ySkip = 4;
|
||||
formatResult = PC_TEX_FMT_BGRA32;
|
||||
break;
|
||||
case GX_TF_CMPR:
|
||||
kernelToRun = Decoders[7].kernel;
|
||||
sizeOfSrc = sizeof(u8) / 2.0f;
|
||||
sizeOfDst = sizeof(u32);
|
||||
xSkip = 8;
|
||||
ySkip = 8;
|
||||
formatResult = PC_TEX_FMT_BGRA32;
|
||||
break;
|
||||
default:
|
||||
return PC_TEX_FMT_NONE;
|
||||
}
|
||||
sDecoderParameter& decoder = rgba ? g_DecodeParametersRGBA[texformat] : g_DecodeParametersNative[texformat];
|
||||
if(!decoder.name || !decoder.kernel || decoder.format == PC_TEX_FMT_NONE)
|
||||
return PC_TEX_FMT_NONE;
|
||||
|
||||
#ifdef DEBUG_OPENCL
|
||||
g_clsrc = clCreateBuffer(OpenCL::GetContext(), CL_MEM_READ_ONLY , 1024 * 1024 * sizeof(u32), NULL, NULL);
|
||||
g_cldst = clCreateBuffer(OpenCL::GetContext(), CL_MEM_WRITE_ONLY, 1024 * 1024 * sizeof(u32), NULL, NULL);
|
||||
#endif
|
||||
|
||||
clEnqueueWriteBuffer(OpenCL::GetCommandQueue(), g_clsrc, CL_TRUE, 0, (size_t)(width * height * sizeOfSrc), src, 0, NULL, NULL);
|
||||
clEnqueueWriteBuffer(OpenCL::GetCommandQueue(), g_clsrc, CL_TRUE, 0, (size_t)(width * height * decoder.sizeOfSrc), src, 0, NULL, NULL);
|
||||
|
||||
clSetKernelArg(kernelToRun, 0, sizeof(cl_mem), &g_cldst);
|
||||
clSetKernelArg(kernelToRun, 1, sizeof(cl_mem), &g_clsrc);
|
||||
clSetKernelArg(kernelToRun, 2, sizeof(cl_int), &width);
|
||||
clSetKernelArg(decoder.kernel, 0, sizeof(cl_mem), &g_cldst);
|
||||
clSetKernelArg(decoder.kernel, 1, sizeof(cl_mem), &g_clsrc);
|
||||
clSetKernelArg(decoder.kernel, 2, sizeof(cl_int), &width);
|
||||
|
||||
size_t global[] = { (size_t)(width / xSkip), (size_t)(height / ySkip) };
|
||||
size_t global[] = { (size_t)(width / decoder.xSkip), (size_t)(height / decoder.ySkip) };
|
||||
|
||||
// No work-groups for now
|
||||
/*
|
||||
|
@ -206,20 +174,20 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
|
|||
PanicAlert("Error obtaining work-group information");
|
||||
*/
|
||||
|
||||
err = clEnqueueNDRangeKernel(OpenCL::GetCommandQueue(), kernelToRun, 2, NULL, global, NULL, 0, NULL, NULL);
|
||||
err = clEnqueueNDRangeKernel(OpenCL::GetCommandQueue(), decoder.kernel, 2, NULL, global, NULL, 0, NULL, NULL);
|
||||
if(err)
|
||||
OpenCL::HandleCLError(err, "Failed to enqueue kernel");
|
||||
|
||||
clFinish(OpenCL::GetCommandQueue());
|
||||
|
||||
clEnqueueReadBuffer(OpenCL::GetCommandQueue(), g_cldst, CL_TRUE, 0, (size_t)(width * height * sizeOfDst), dst, 0, NULL, NULL);
|
||||
clEnqueueReadBuffer(OpenCL::GetCommandQueue(), g_cldst, CL_TRUE, 0, (size_t)(width * height * decoder.sizeOfDst), dst, 0, NULL, NULL);
|
||||
|
||||
#ifdef DEBUG_OPENCL
|
||||
clReleaseMemObject(g_clsrc);
|
||||
clReleaseMemObject(g_cldst);
|
||||
#endif
|
||||
|
||||
return formatResult;
|
||||
return decoder.format;
|
||||
#else
|
||||
return PC_TEX_FMT_NONE;
|
||||
#endif
|
||||
|
|
|
@ -23,6 +23,6 @@
|
|||
void TexDecoder_OpenCL_Initialize();
|
||||
void TexDecoder_OpenCL_Shutdown();
|
||||
|
||||
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt);
|
||||
PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt, bool rgba);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1266,13 +1266,15 @@ void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center)
|
|||
|
||||
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly)
|
||||
{
|
||||
PC_TexFormat retval = PC_TEX_FMT_NONE;
|
||||
|
||||
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
||||
PC_TexFormat retval = TexDecoder_Decode_OpenCL(dst, src, width, height, texformat, tlutaddr, tlutfmt);
|
||||
if(retval == PC_TEX_FMT_NONE)
|
||||
retval = TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt);
|
||||
#else
|
||||
PC_TexFormat retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst,src,width,height,texformat,tlutaddr,tlutfmt) : TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt);
|
||||
retval = TexDecoder_Decode_OpenCL(dst, src, width, height, texformat, tlutaddr, tlutfmt, rgbaOnly);
|
||||
#endif
|
||||
|
||||
if(retval == PC_TEX_FMT_NONE)
|
||||
retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst,src,width,height,texformat,tlutaddr,tlutfmt) : TexDecoder_Decode_real(dst,src,width,height,texformat,tlutaddr,tlutfmt);
|
||||
|
||||
if ((!TexFmt_Overlay_Enable)|| (retval == PC_TEX_FMT_NONE))
|
||||
return retval;
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Core\Common\Commo
|
|||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Dolphin", "Core\DolphinWX\DolphinWX.vcproj", "{A72606EF-C5C1-4954-90AD-F0F93A8D97D9}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{21DBE606-2958-43AC-A14E-B6B798D56554} = {21DBE606-2958-43AC-A14E-B6B798D56554}
|
||||
{C7E5D50A-2916-464B-86A7-E10B3CC88ADA} = {C7E5D50A-2916-464B-86A7-E10B3CC88ADA}
|
||||
{CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} = {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160}
|
||||
{D6E56527-BBB9-4EAD-A6EC-49D4BF6AFCD8} = {D6E56527-BBB9-4EAD-A6EC-49D4BF6AFCD8}
|
||||
|
|
Loading…
Reference in New Issue