From 9114c96c784ae16e0c66d6016d489564be4319b4 Mon Sep 17 00:00:00 2001 From: skidau Date: Sun, 19 Dec 2010 11:03:09 +0000 Subject: [PATCH] Cached the compiled OpenCL kernels to disk (User/OpenCL/kernel.bin). This reduces the start up delay in OpenCL mode. Added a 10 byte header to the file which contains the current svn rev string. This will recompile the OpenCL kernels with every new revision. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6608 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/OpenCL.cpp | 6 +- .../Src/OpenCL/OCLTextureDecoder.cpp | 143 ++++++++++++++++-- 2 files changed, 135 insertions(+), 14 deletions(-) diff --git a/Source/Core/Common/Src/OpenCL.cpp b/Source/Core/Common/Src/OpenCL.cpp index 5c36e16b2c..c3d3f7e796 100644 --- a/Source/Core/Common/Src/OpenCL.cpp +++ b/Source/Core/Common/Src/OpenCL.cpp @@ -150,10 +150,10 @@ cl_program CompileProgram(const char *Kernel) cl_program program; program = clCreateProgramWithSource(OpenCL::g_context, 1, (const char **) & Kernel, NULL, &err); + if (!program) { HandleCLError(err, "Error: Failed to create compute program!"); - return NULL; } // Build the program executable @@ -181,7 +181,7 @@ cl_program CompileProgram(const char *Kernel) return NULL; } - NOTICE_LOG(COMMON, "OpenCL CompileProgram took %.3f seconds", + INFO_LOG(COMMON, "OpenCL CompileProgram took %.3f seconds", (float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0); return program; } @@ -200,7 +200,7 @@ cl_kernel CompileKernel(cl_program program, const char *Function) HandleCLError(err, buffer); return NULL; } - NOTICE_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds", + INFO_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds", (float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0); return kernel; } diff --git a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp index 38ee283433..aec7106757 100644 --- a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp @@ -20,6 +20,8 @@ #include "OpenCL.h" #include "FileUtil.h" +#include "svnrev.h" + #include #include #include @@ -90,27 +92,147 @@ void TexDecoder_OpenCL_Initialize() if(!OpenCL::Initialize()) return; - std::string code; + cl_int err = 1; + u64 binary_size = 0; + char *binary = NULL; + char *header = NULL; + u32 nDevices = 0; + cl_device_id *devices = NULL; + cl_program program = NULL; + u64 *binary_sizes = NULL; + char **binaries = NULL; char filename[1024]; - sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX)); - if (!File::ReadFileToString(true, filename, code)) + + sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX)); + + FILE *input = NULL; + + input = fopen(filename, "rb"); + if (input == NULL) { - ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename); - return; + binary_size = 0; + } + else + { + binary_size = File::GetSize(input); + header = new char[10]; + binary = new char[binary_size]; + fread(header, sizeof(char), 10, input); + binary_size = fread(binary, sizeof(char), binary_size - 10, input); + fclose(input); } - g_program = OpenCL::CompileProgram(code.c_str()); + if (binary_size > 0) + { + if (binary_size > 10) + { + header[9] = '\0'; + std::string current_rev = SVN_REV_STR; + std::string file_rev = header; + + if (!current_rev.compare(0, 9, file_rev)) + { + g_program = clCreateProgramWithBinary(OpenCL::GetContext(), 1, &OpenCL::device_id, &binary_size, (const unsigned char**)&binary, NULL, &err); + if (err != CL_SUCCESS) + { + OpenCL::HandleCLError(err, "clCreateProgramWithBinary"); + } + + if (!err) + { + err = clBuildProgram(g_program, 1, &OpenCL::device_id, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + OpenCL::HandleCLError(err, "clBuildProgram"); + } + } + } + } + delete header; + delete binary; + } + + if (err) + { + std::string code; + sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX)); + if (!File::ReadFileToString(true, filename, code)) + { + ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename); + return; + } + + g_program = OpenCL::CompileProgram(code.c_str()); + + err = clGetProgramInfo(g_program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL); + if (err != CL_SUCCESS) + { + OpenCL::HandleCLError(err, "clGetProgramInfo"); + } + devices = (cl_device_id *)malloc( sizeof(cl_device_id) *nDevices); + + err = clGetProgramInfo(g_program, CL_PROGRAM_DEVICES, sizeof(cl_device_id)*nDevices, devices, NULL); + if (err != CL_SUCCESS) + { + OpenCL::HandleCLError(err, "clGetProgramInfo"); + } + + binary_sizes = (size_t *)malloc(sizeof(size_t)*nDevices); + err = clGetProgramInfo(g_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL); + if (err != CL_SUCCESS) + { + OpenCL::HandleCLError(err, "clGetProgramInfo"); + } + + binaries = (char **)malloc(sizeof(char *)*nDevices); + for (u32 i = 0; i < nDevices; ++i) + { + if( binary_sizes[i] != 0 ) + { + binaries[i] = (char *)malloc(sizeof(char)*binary_sizes[i] + 10); + } + else + { + binaries[i] = NULL; + } + } + err = clGetProgramInfo( g_program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); + if (err != CL_SUCCESS) + { + OpenCL::HandleCLError(err, "clGetProgramInfo"); + } + + if (!err) + { + sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX)); + const char *current_rev = SVN_REV_STR + '\0'; + + FILE *output = NULL; + output = fopen(filename, "wb"); + if (output == NULL) + { + binary_size = 0; + } + else + { + // Supporting one OpenCL device for now + fwrite(current_rev, sizeof(char), 10, output); + fwrite(binaries[0], sizeof(char), binary_sizes[0], output); + fclose(output); + } + } + } for (int i = 0; i <= GX_TF_CMPR; ++i) { if (g_DecodeParametersNative[i].name) g_DecodeParametersNative[i].kernel = - OpenCL::CompileKernel(g_program, - g_DecodeParametersNative[i].name); + OpenCL::CompileKernel(g_program, + g_DecodeParametersNative[i].name); if (g_DecodeParametersRGBA[i].name) g_DecodeParametersRGBA[i].kernel = - OpenCL::CompileKernel(g_program, - g_DecodeParametersRGBA[i].name); + OpenCL::CompileKernel(g_program, + g_DecodeParametersRGBA[i].name); } // Allocating maximal Wii texture size in advance, so that we don't have to allocate/deallocate per texture @@ -195,4 +317,3 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei return PC_TEX_FMT_NONE; #endif } -