Cached the compiled OpenCL kernels to disk (User/OpenCL/kernel.bin). This reduces the start up delay in OpenCL mode. Added a 10 byte header to the file which contains the current svn rev string. This will recompile the OpenCL kernels with every new revision.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6608 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
ef0762c6a5
commit
9114c96c78
|
@ -150,10 +150,10 @@ cl_program CompileProgram(const char *Kernel)
|
|||
cl_program program;
|
||||
program = clCreateProgramWithSource(OpenCL::g_context, 1,
|
||||
(const char **) & Kernel, NULL, &err);
|
||||
|
||||
if (!program)
|
||||
{
|
||||
HandleCLError(err, "Error: Failed to create compute program!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Build the program executable
|
||||
|
@ -181,7 +181,7 @@ cl_program CompileProgram(const char *Kernel)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
NOTICE_LOG(COMMON, "OpenCL CompileProgram took %.3f seconds",
|
||||
INFO_LOG(COMMON, "OpenCL CompileProgram took %.3f seconds",
|
||||
(float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0);
|
||||
return program;
|
||||
}
|
||||
|
@ -200,7 +200,7 @@ cl_kernel CompileKernel(cl_program program, const char *Function)
|
|||
HandleCLError(err, buffer);
|
||||
return NULL;
|
||||
}
|
||||
NOTICE_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds",
|
||||
INFO_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds",
|
||||
(float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0);
|
||||
return kernel;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "OpenCL.h"
|
||||
#include "FileUtil.h"
|
||||
|
||||
#include "svnrev.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -90,27 +92,147 @@ void TexDecoder_OpenCL_Initialize()
|
|||
if(!OpenCL::Initialize())
|
||||
return;
|
||||
|
||||
std::string code;
|
||||
cl_int err = 1;
|
||||
u64 binary_size = 0;
|
||||
char *binary = NULL;
|
||||
char *header = NULL;
|
||||
u32 nDevices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_program program = NULL;
|
||||
u64 *binary_sizes = NULL;
|
||||
char **binaries = NULL;
|
||||
char filename[1024];
|
||||
sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX));
|
||||
if (!File::ReadFileToString(true, filename, code))
|
||||
|
||||
sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX));
|
||||
|
||||
FILE *input = NULL;
|
||||
|
||||
input = fopen(filename, "rb");
|
||||
if (input == NULL)
|
||||
{
|
||||
ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename);
|
||||
return;
|
||||
binary_size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
binary_size = File::GetSize(input);
|
||||
header = new char[10];
|
||||
binary = new char[binary_size];
|
||||
fread(header, sizeof(char), 10, input);
|
||||
binary_size = fread(binary, sizeof(char), binary_size - 10, input);
|
||||
fclose(input);
|
||||
}
|
||||
|
||||
g_program = OpenCL::CompileProgram(code.c_str());
|
||||
if (binary_size > 0)
|
||||
{
|
||||
if (binary_size > 10)
|
||||
{
|
||||
header[9] = '\0';
|
||||
std::string current_rev = SVN_REV_STR;
|
||||
std::string file_rev = header;
|
||||
|
||||
if (!current_rev.compare(0, 9, file_rev))
|
||||
{
|
||||
g_program = clCreateProgramWithBinary(OpenCL::GetContext(), 1, &OpenCL::device_id, &binary_size, (const unsigned char**)&binary, NULL, &err);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
OpenCL::HandleCLError(err, "clCreateProgramWithBinary");
|
||||
}
|
||||
|
||||
if (!err)
|
||||
{
|
||||
err = clBuildProgram(g_program, 1, &OpenCL::device_id, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
OpenCL::HandleCLError(err, "clBuildProgram");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delete header;
|
||||
delete binary;
|
||||
}
|
||||
|
||||
if (err)
|
||||
{
|
||||
std::string code;
|
||||
sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX));
|
||||
if (!File::ReadFileToString(true, filename, code))
|
||||
{
|
||||
ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename);
|
||||
return;
|
||||
}
|
||||
|
||||
g_program = OpenCL::CompileProgram(code.c_str());
|
||||
|
||||
err = clGetProgramInfo(g_program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
OpenCL::HandleCLError(err, "clGetProgramInfo");
|
||||
}
|
||||
devices = (cl_device_id *)malloc( sizeof(cl_device_id) *nDevices);
|
||||
|
||||
err = clGetProgramInfo(g_program, CL_PROGRAM_DEVICES, sizeof(cl_device_id)*nDevices, devices, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
OpenCL::HandleCLError(err, "clGetProgramInfo");
|
||||
}
|
||||
|
||||
binary_sizes = (size_t *)malloc(sizeof(size_t)*nDevices);
|
||||
err = clGetProgramInfo(g_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
OpenCL::HandleCLError(err, "clGetProgramInfo");
|
||||
}
|
||||
|
||||
binaries = (char **)malloc(sizeof(char *)*nDevices);
|
||||
for (u32 i = 0; i < nDevices; ++i)
|
||||
{
|
||||
if( binary_sizes[i] != 0 )
|
||||
{
|
||||
binaries[i] = (char *)malloc(sizeof(char)*binary_sizes[i] + 10);
|
||||
}
|
||||
else
|
||||
{
|
||||
binaries[i] = NULL;
|
||||
}
|
||||
}
|
||||
err = clGetProgramInfo( g_program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
OpenCL::HandleCLError(err, "clGetProgramInfo");
|
||||
}
|
||||
|
||||
if (!err)
|
||||
{
|
||||
sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX));
|
||||
const char *current_rev = SVN_REV_STR + '\0';
|
||||
|
||||
FILE *output = NULL;
|
||||
output = fopen(filename, "wb");
|
||||
if (output == NULL)
|
||||
{
|
||||
binary_size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Supporting one OpenCL device for now
|
||||
fwrite(current_rev, sizeof(char), 10, output);
|
||||
fwrite(binaries[0], sizeof(char), binary_sizes[0], output);
|
||||
fclose(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i <= GX_TF_CMPR; ++i) {
|
||||
if (g_DecodeParametersNative[i].name)
|
||||
g_DecodeParametersNative[i].kernel =
|
||||
OpenCL::CompileKernel(g_program,
|
||||
g_DecodeParametersNative[i].name);
|
||||
OpenCL::CompileKernel(g_program,
|
||||
g_DecodeParametersNative[i].name);
|
||||
|
||||
if (g_DecodeParametersRGBA[i].name)
|
||||
g_DecodeParametersRGBA[i].kernel =
|
||||
OpenCL::CompileKernel(g_program,
|
||||
g_DecodeParametersRGBA[i].name);
|
||||
OpenCL::CompileKernel(g_program,
|
||||
g_DecodeParametersRGBA[i].name);
|
||||
}
|
||||
|
||||
// Allocating maximal Wii texture size in advance, so that we don't have to allocate/deallocate per texture
|
||||
|
@ -195,4 +317,3 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
|
|||
return PC_TEX_FMT_NONE;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue