Cached the compiled OpenCL kernels to disk (User/OpenCL/kernel.bin). This reduces the start up delay in OpenCL mode. Added a 10 byte header to the file which contains the current svn rev string. This will recompile the OpenCL kernels with every new revision.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6608 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-12-19 11:03:09 +00:00
parent ef0762c6a5
commit 9114c96c78
2 changed files with 135 additions and 14 deletions

View File

@ -150,10 +150,10 @@ cl_program CompileProgram(const char *Kernel)
cl_program program;
program = clCreateProgramWithSource(OpenCL::g_context, 1,
(const char **) & Kernel, NULL, &err);
if (!program)
{
HandleCLError(err, "Error: Failed to create compute program!");
return NULL;
}
// Build the program executable
@ -181,7 +181,7 @@ cl_program CompileProgram(const char *Kernel)
return NULL;
}
NOTICE_LOG(COMMON, "OpenCL CompileProgram took %.3f seconds",
INFO_LOG(COMMON, "OpenCL CompileProgram took %.3f seconds",
(float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0);
return program;
}
@ -200,7 +200,7 @@ cl_kernel CompileKernel(cl_program program, const char *Function)
HandleCLError(err, buffer);
return NULL;
}
NOTICE_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds",
INFO_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds",
(float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0);
return kernel;
}

View File

@ -20,6 +20,8 @@
#include "OpenCL.h"
#include "FileUtil.h"
#include "svnrev.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@ -90,27 +92,147 @@ void TexDecoder_OpenCL_Initialize()
if(!OpenCL::Initialize())
return;
std::string code;
cl_int err = 1;
u64 binary_size = 0;
char *binary = NULL;
char *header = NULL;
u32 nDevices = 0;
cl_device_id *devices = NULL;
cl_program program = NULL;
u64 *binary_sizes = NULL;
char **binaries = NULL;
char filename[1024];
sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX));
if (!File::ReadFileToString(true, filename, code))
sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX));
FILE *input = NULL;
input = fopen(filename, "rb");
if (input == NULL)
{
ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename);
return;
binary_size = 0;
}
else
{
binary_size = File::GetSize(input);
header = new char[10];
binary = new char[binary_size];
fread(header, sizeof(char), 10, input);
binary_size = fread(binary, sizeof(char), binary_size - 10, input);
fclose(input);
}
g_program = OpenCL::CompileProgram(code.c_str());
if (binary_size > 0)
{
if (binary_size > 10)
{
header[9] = '\0';
std::string current_rev = SVN_REV_STR;
std::string file_rev = header;
if (!current_rev.compare(0, 9, file_rev))
{
g_program = clCreateProgramWithBinary(OpenCL::GetContext(), 1, &OpenCL::device_id, &binary_size, (const unsigned char**)&binary, NULL, &err);
if (err != CL_SUCCESS)
{
OpenCL::HandleCLError(err, "clCreateProgramWithBinary");
}
if (!err)
{
err = clBuildProgram(g_program, 1, &OpenCL::device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
OpenCL::HandleCLError(err, "clBuildProgram");
}
}
}
}
delete header;
delete binary;
}
if (err)
{
std::string code;
sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX));
if (!File::ReadFileToString(true, filename, code))
{
ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename);
return;
}
g_program = OpenCL::CompileProgram(code.c_str());
err = clGetProgramInfo(g_program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL);
if (err != CL_SUCCESS)
{
OpenCL::HandleCLError(err, "clGetProgramInfo");
}
devices = (cl_device_id *)malloc( sizeof(cl_device_id) *nDevices);
err = clGetProgramInfo(g_program, CL_PROGRAM_DEVICES, sizeof(cl_device_id)*nDevices, devices, NULL);
if (err != CL_SUCCESS)
{
OpenCL::HandleCLError(err, "clGetProgramInfo");
}
binary_sizes = (size_t *)malloc(sizeof(size_t)*nDevices);
err = clGetProgramInfo(g_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL);
if (err != CL_SUCCESS)
{
OpenCL::HandleCLError(err, "clGetProgramInfo");
}
binaries = (char **)malloc(sizeof(char *)*nDevices);
for (u32 i = 0; i < nDevices; ++i)
{
if( binary_sizes[i] != 0 )
{
binaries[i] = (char *)malloc(sizeof(char)*binary_sizes[i] + 10);
}
else
{
binaries[i] = NULL;
}
}
err = clGetProgramInfo( g_program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
if (err != CL_SUCCESS)
{
OpenCL::HandleCLError(err, "clGetProgramInfo");
}
if (!err)
{
sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX));
const char *current_rev = SVN_REV_STR + '\0';
FILE *output = NULL;
output = fopen(filename, "wb");
if (output == NULL)
{
binary_size = 0;
}
else
{
// Supporting one OpenCL device for now
fwrite(current_rev, sizeof(char), 10, output);
fwrite(binaries[0], sizeof(char), binary_sizes[0], output);
fclose(output);
}
}
}
for (int i = 0; i <= GX_TF_CMPR; ++i) {
if (g_DecodeParametersNative[i].name)
g_DecodeParametersNative[i].kernel =
OpenCL::CompileKernel(g_program,
g_DecodeParametersNative[i].name);
OpenCL::CompileKernel(g_program,
g_DecodeParametersNative[i].name);
if (g_DecodeParametersRGBA[i].name)
g_DecodeParametersRGBA[i].kernel =
OpenCL::CompileKernel(g_program,
g_DecodeParametersRGBA[i].name);
OpenCL::CompileKernel(g_program,
g_DecodeParametersRGBA[i].name);
}
// Allocating maximal Wii texture size in advance, so that we don't have to allocate/deallocate per texture
@ -195,4 +317,3 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
return PC_TEX_FMT_NONE;
#endif
}