Fix a few things I missed
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4370 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
0146f3f58f
commit
dde693afb8
|
@ -18,7 +18,7 @@
|
||||||
#ifndef __OPENCL_H__
|
#ifndef __OPENCL_H__
|
||||||
#define __OPENCL_H__
|
#define __OPENCL_H__
|
||||||
|
|
||||||
#include "Config.h"
|
#include "Common.h"
|
||||||
// Change to #if 1 if you want to test OpenCL (and you have it) on Windows
|
// Change to #if 1 if you want to test OpenCL (and you have it) on Windows
|
||||||
#if 0
|
#if 0
|
||||||
#pragma comment(lib, "OpenCL.lib")
|
#pragma comment(lib, "OpenCL.lib")
|
||||||
|
|
|
@ -43,27 +43,13 @@ __m128i _b1[256];
|
||||||
__m128i _b2[256];
|
__m128i _b2[256];
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
||||||
|
bool Inited = false;
|
||||||
|
|
||||||
void InitXFBConvTables()
|
cl_kernel To_kernel;
|
||||||
{
|
cl_program To_program;
|
||||||
for (int i = 0; i < 256; i++)
|
cl_kernel From_kernel;
|
||||||
{
|
cl_program From_program;
|
||||||
_y[i] = _mm_set_epi32(0xFFFFFFF, 76283*(i - 16), 76283*(i - 16), 76283*(i - 16));
|
|
||||||
_u[i] = _mm_set_epi32( 0, 0, -25624 * (i - 128), 132252 * (i - 128));
|
|
||||||
_v[i] = _mm_set_epi32( 0, 104595 * (i - 128), -53281 * (i - 128), 0);
|
|
||||||
|
|
||||||
_r1[i] = _mm_add_epi32(_mm_set_epi32( 28770 * i / 2, 0, -9699 * i / 2, 16843 * i),
|
|
||||||
_bias1);
|
|
||||||
_g1[i] = _mm_set_epi32(-24117 * i / 2, 0, -19071 * i / 2, 33030 * i);
|
|
||||||
_b1[i] = _mm_set_epi32( -4653 * i / 2, 0, 28770 * i / 2, 6423 * i);
|
|
||||||
|
|
||||||
_r2[i] = _mm_add_epi32(_mm_set_epi32( 28770 * i / 2, 16843 * i, -9699 * i / 2, 0),
|
|
||||||
_bias2);
|
|
||||||
_g2[i] = _mm_set_epi32(-24117 * i / 2, 33030 * i, -19071 * i / 2, 0);
|
|
||||||
_b2[i] = _mm_set_epi32( -4653 * i / 2, 6423 * i, 28770 * i / 2, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
const char *__ConvertFromXFB = "int bound(int i) \n \
|
const char *__ConvertFromXFB = "int bound(int i) \n \
|
||||||
{ \n \
|
{ \n \
|
||||||
|
@ -97,6 +83,55 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB) \n \
|
||||||
dst[dstOffset + 1] = 0xFF000000 | (r<<16) | (g<<8) | (b); \n \
|
dst[dstOffset + 1] = 0xFF000000 | (r<<16) | (g<<8) | (b); \n \
|
||||||
} \n";
|
} \n";
|
||||||
|
|
||||||
|
const char *__ConvertToXFB = "__kernel void ConvertToXFB(__global unsigned int *dst, __global const unsigned char* _pEFB) \n \
|
||||||
|
{ \n \
|
||||||
|
const unsigned char *src = _pEFB;\n \
|
||||||
|
int id = get_global_id(0);\n \
|
||||||
|
src += id * 8; \n \
|
||||||
|
\n \
|
||||||
|
int y1 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16; \n \
|
||||||
|
int u1 = ((-(9699 * src[0]) - (19071 * src[1]) + (28770 * src[2])) >> 16) + 128;\n \
|
||||||
|
src += 4;\n \
|
||||||
|
\n \
|
||||||
|
int y2 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16;\n \
|
||||||
|
int v2 = (((28770 * src[0]) - (24117 * src[1]) - (4653 * src[2])) >> 16) + 128;\n \
|
||||||
|
src += 4;\n \
|
||||||
|
\n \
|
||||||
|
dst[id] = (v2 << 24) | (y2 << 16) | (u1 << 8) | (y1); \n \
|
||||||
|
} \n ";
|
||||||
|
|
||||||
|
void InitKernels()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
From_program = OpenCL::CompileProgram(__ConvertFromXFB);
|
||||||
|
From_kernel = OpenCL::CompileKernel(From_program, "ConvertFromXFB");
|
||||||
|
|
||||||
|
To_program = OpenCL::CompileProgram(__ConvertToXFB);
|
||||||
|
To_kernel = OpenCL::CompileKernel(To_program, "ConvertToXFB");
|
||||||
|
Inited = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
void InitXFBConvTables()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
_y[i] = _mm_set_epi32(0xFFFFFFF, 76283*(i - 16), 76283*(i - 16), 76283*(i - 16));
|
||||||
|
_u[i] = _mm_set_epi32( 0, 0, -25624 * (i - 128), 132252 * (i - 128));
|
||||||
|
_v[i] = _mm_set_epi32( 0, 104595 * (i - 128), -53281 * (i - 128), 0);
|
||||||
|
|
||||||
|
_r1[i] = _mm_add_epi32(_mm_set_epi32( 28770 * i / 2, 0, -9699 * i / 2, 16843 * i),
|
||||||
|
_bias1);
|
||||||
|
_g1[i] = _mm_set_epi32(-24117 * i / 2, 0, -19071 * i / 2, 33030 * i);
|
||||||
|
_b1[i] = _mm_set_epi32( -4653 * i / 2, 0, 28770 * i / 2, 6423 * i);
|
||||||
|
|
||||||
|
_r2[i] = _mm_add_epi32(_mm_set_epi32( 28770 * i / 2, 16843 * i, -9699 * i / 2, 0),
|
||||||
|
_bias2);
|
||||||
|
_g2[i] = _mm_set_epi32(-24117 * i / 2, 33030 * i, -19071 * i / 2, 0);
|
||||||
|
_b2[i] = _mm_set_epi32( -4653 * i / 2, 6423 * i, 28770 * i / 2, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||||
{
|
{
|
||||||
if (((size_t)dst & 0xF) != 0) {
|
if (((size_t)dst & 0xF) != 0) {
|
||||||
|
@ -105,10 +140,8 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||||
const unsigned char *src = _pXFB;
|
const unsigned char *src = _pXFB;
|
||||||
u32 numBlocks = ((width * height) / 2) / 2;
|
u32 numBlocks = ((width * height) / 2) / 2;
|
||||||
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
||||||
cl_kernel kernel;
|
if(!Inited)
|
||||||
cl_program program;
|
InitKernels();
|
||||||
program = OpenCL::CompileProgram(__ConvertFromXFB);
|
|
||||||
kernel = OpenCL::CompileKernel(program, "ConvertFromXFB");
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
size_t global = 0; // global domain size for our calculation
|
size_t global = 0; // global domain size for our calculation
|
||||||
|
@ -131,8 +164,8 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||||
// Set the arguments to our compute kernel
|
// Set the arguments to our compute kernel
|
||||||
//
|
//
|
||||||
err = 0;
|
err = 0;
|
||||||
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &_dst);
|
err = clSetKernelArg(From_kernel, 0, sizeof(cl_mem), &_dst);
|
||||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &_src);
|
err |= clSetKernelArg(From_kernel, 1, sizeof(cl_mem), &_src);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
printf("Error: Failed to set kernel arguments! %d\n", err);
|
printf("Error: Failed to set kernel arguments! %d\n", err);
|
||||||
|
@ -141,7 +174,7 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||||
|
|
||||||
// Get the maximum work group size for executing the kernel on the device
|
// Get the maximum work group size for executing the kernel on the device
|
||||||
//
|
//
|
||||||
err = clGetKernelWorkGroupInfo(Decoders[0].kernel, OpenCL::device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL);
|
err = clGetKernelWorkGroupInfo(From_kernel, OpenCL::device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
||||||
|
@ -155,7 +188,7 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||||
{
|
{
|
||||||
// Global can't be less than local
|
// Global can't be less than local
|
||||||
}
|
}
|
||||||
err = clEnqueueNDRangeKernel(OpenCL::g_cmdq, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
err = clEnqueueNDRangeKernel(OpenCL::g_cmdq, From_kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
printf("Error: Failed to execute kernel! %d\n", err);
|
printf("Error: Failed to execute kernel! %d\n", err);
|
||||||
|
@ -201,23 +234,6 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *__ConvertToXFB = "__kernel void ConvertToXFB(__global unsigned int *dst, __global const unsigned char* _pEFB) \n \
|
|
||||||
{ \n \
|
|
||||||
const unsigned char *src = _pEFB;\n \
|
|
||||||
int id = get_global_id(0);\n \
|
|
||||||
src += id * 8; \n \
|
|
||||||
\n \
|
|
||||||
int y1 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16; \n \
|
|
||||||
int u1 = ((-(9699 * src[0]) - (19071 * src[1]) + (28770 * src[2])) >> 16) + 128;\n \
|
|
||||||
src += 4;\n \
|
|
||||||
\n \
|
|
||||||
int y2 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16;\n \
|
|
||||||
int v2 = (((28770 * src[0]) - (24117 * src[1]) - (4653 * src[2])) >> 16) + 128;\n \
|
|
||||||
src += 4;\n \
|
|
||||||
\n \
|
|
||||||
dst[id] = (v2 << 24) | (y2 << 16) | (u1 << 8) | (y1); \n \
|
|
||||||
} \n ";
|
|
||||||
|
|
||||||
|
|
||||||
void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
||||||
{
|
{
|
||||||
|
@ -228,10 +244,9 @@ void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
||||||
PanicAlert("ConvertToXFB - unaligned XFB");
|
PanicAlert("ConvertToXFB - unaligned XFB");
|
||||||
}
|
}
|
||||||
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
#if defined(HAVE_OPENCL) && HAVE_OPENCL
|
||||||
cl_kernel kernel;
|
if(!Inited)
|
||||||
cl_program program;
|
InitKernels();
|
||||||
program = OpenCL::CompileProgram(__ConvertToXFB);
|
|
||||||
kernel = OpenCL::CompileKernel(program, "ConvertToXFB");
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
size_t global = 0; // global domain size for our calculation
|
size_t global = 0; // global domain size for our calculation
|
||||||
|
@ -254,8 +269,8 @@ void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
||||||
// Set the arguments to our compute kernel
|
// Set the arguments to our compute kernel
|
||||||
//
|
//
|
||||||
err = 0;
|
err = 0;
|
||||||
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &_dst);
|
err = clSetKernelArg(To_kernel, 0, sizeof(cl_mem), &_dst);
|
||||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &_src);
|
err |= clSetKernelArg(To_kernel, 1, sizeof(cl_mem), &_src);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
printf("Error: Failed to set kernel arguments! %d\n", err);
|
printf("Error: Failed to set kernel arguments! %d\n", err);
|
||||||
|
@ -264,7 +279,7 @@ void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
||||||
|
|
||||||
// Get the maximum work group size for executing the kernel on the device
|
// Get the maximum work group size for executing the kernel on the device
|
||||||
//
|
//
|
||||||
err = clGetKernelWorkGroupInfo(Decoders[0].kernel, OpenCL::device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL);
|
err = clGetKernelWorkGroupInfo(To_kernel, OpenCL::device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
||||||
|
@ -278,7 +293,7 @@ void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
||||||
{
|
{
|
||||||
// Global can't be less than local
|
// Global can't be less than local
|
||||||
}
|
}
|
||||||
err = clEnqueueNDRangeKernel(OpenCL::g_cmdq, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
err = clEnqueueNDRangeKernel(OpenCL::g_cmdq, To_kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
printf("Error: Failed to execute kernel! %d\n", err);
|
printf("Error: Failed to execute kernel! %d\n", err);
|
||||||
|
|
Loading…
Reference in New Issue