mirror of https://github.com/PCSX2/pcsx2.git
implemented opencl program caching on disk under the system default temp folder, needs some additional work on linux
This commit is contained in:
parent
ba59036a97
commit
b9b02cf749
|
@ -37,6 +37,7 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
|||
#define MAX_BIN_PER_BATCH ((MAX_FRAME_SIZE / BIN_SIZE) * (MAX_FRAME_SIZE / BIN_SIZE))
|
||||
#define MAX_BIN_COUNT (MAX_BIN_PER_BATCH * MAX_BATCH_COUNT)
|
||||
#define TFX_PARAM_SIZE 2048
|
||||
#define TFX_PROGRAM_VERSION 1
|
||||
|
||||
#if MAX_PRIM_PER_BATCH == 64u
|
||||
#define BIN_TYPE cl_ulong
|
||||
|
@ -1622,6 +1623,7 @@ GSVector4i* GSRendererCL::TFXJob::GetDstPages()
|
|||
GSRendererCL::CL::CL()
|
||||
{
|
||||
WIs = INT_MAX;
|
||||
version = INT_MAX;
|
||||
|
||||
std::string ocldev = theApp.GetConfig("ocldev", "");
|
||||
|
||||
|
@ -1629,37 +1631,43 @@ GSRendererCL::CL::CL()
|
|||
ocldev = "Intel(R) Corporation Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz OpenCL C 1.2 CPU";
|
||||
#endif
|
||||
|
||||
list<OCLDevice> ocldevs;
|
||||
list<OCLDeviceDesc> dl;
|
||||
|
||||
GSUtil::GetOCLDevices(ocldevs);
|
||||
GSUtil::GetDeviceDescs(dl);
|
||||
|
||||
for(auto dev : ocldevs)
|
||||
for(auto d : dl)
|
||||
{
|
||||
if(dev.name == ocldev)
|
||||
if(d.name == ocldev)
|
||||
{
|
||||
devices.push_back(dev.device);
|
||||
devs.push_back(d);
|
||||
|
||||
WIs = std::min(WIs, (uint32)dev.device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
|
||||
WIs = std::min(WIs, (uint32)d.device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
|
||||
version = std::min(version, d.version);
|
||||
|
||||
break; // TODO: multiple devices?
|
||||
}
|
||||
}
|
||||
|
||||
if(devices.empty() && !ocldevs.empty())
|
||||
if(devs.empty() && !dl.empty())
|
||||
{
|
||||
auto dev = ocldevs.front();
|
||||
auto d = dl.front();
|
||||
|
||||
devices.push_back(dev.device);
|
||||
devs.push_back(d);
|
||||
|
||||
WIs = std::min(WIs, (uint32)dev.device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
|
||||
WIs = std::min(WIs, (uint32)d.device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
|
||||
version = std::min(version, d.version);
|
||||
}
|
||||
|
||||
if(devices.empty())
|
||||
if(devs.empty())
|
||||
{
|
||||
throw new std::exception("OpenCL device not found");
|
||||
}
|
||||
|
||||
context = cl::Context(devices);
|
||||
vector<cl::Device> tmp;
|
||||
|
||||
for(auto d : devs) tmp.push_back(d.device);
|
||||
|
||||
context = cl::Context(tmp);
|
||||
|
||||
queue[0] = cl::CommandQueue(context);
|
||||
queue[1] = cl::CommandQueue(context);
|
||||
|
@ -1699,24 +1707,24 @@ void GSRendererCL::CL::Map()
|
|||
{
|
||||
Unmap();
|
||||
|
||||
// TODO: CL_MAP_WRITE_INVALIDATE_REGION if 1.2+
|
||||
cl_map_flags flags = version >= 120 ? CL_MAP_WRITE_INVALIDATE_REGION : CL_MAP_WRITE;
|
||||
|
||||
if(vb.head < vb.size)
|
||||
{
|
||||
vb.mapped_ptr = wq->enqueueMapBuffer(vb.buff[wqidx], CL_TRUE, CL_MAP_WRITE, vb.head, vb.size - vb.head);
|
||||
vb.mapped_ptr = wq->enqueueMapBuffer(vb.buff[wqidx], CL_TRUE, flags, vb.head, vb.size - vb.head);
|
||||
vb.ptr = (unsigned char*)vb.mapped_ptr - vb.head;
|
||||
ASSERT(((size_t)vb.ptr & 15) == 0);
|
||||
}
|
||||
|
||||
if(ib.head < ib.size)
|
||||
{
|
||||
ib.mapped_ptr = wq->enqueueMapBuffer(ib.buff[wqidx], CL_TRUE, CL_MAP_WRITE, ib.head, ib.size - ib.head);
|
||||
ib.mapped_ptr = wq->enqueueMapBuffer(ib.buff[wqidx], CL_TRUE, flags, ib.head, ib.size - ib.head);
|
||||
ib.ptr = (unsigned char*)ib.mapped_ptr - ib.head;
|
||||
}
|
||||
|
||||
if(pb.head < pb.size)
|
||||
{
|
||||
pb.mapped_ptr = wq->enqueueMapBuffer(pb.buff[wqidx], CL_TRUE, CL_MAP_WRITE, pb.head, pb.size - pb.head);
|
||||
pb.mapped_ptr = wq->enqueueMapBuffer(pb.buff[wqidx], CL_TRUE, flags, pb.head, pb.size - pb.head);
|
||||
pb.ptr = (unsigned char*)pb.mapped_ptr - pb.head;
|
||||
ASSERT(((size_t)pb.ptr & 15) == 0);
|
||||
}
|
||||
|
@ -1733,34 +1741,75 @@ void GSRendererCL::CL::Unmap()
|
|||
pb.mapped_ptr = pb.ptr = NULL;
|
||||
}
|
||||
|
||||
static void AddDefs(ostringstream& opt)
|
||||
{
|
||||
opt << "-cl-std=CL1.1 ";
|
||||
opt << "-D MAX_FRAME_SIZE=" << MAX_FRAME_SIZE << "u ";
|
||||
opt << "-D MAX_PRIM_COUNT=" << MAX_PRIM_COUNT << "u ";
|
||||
opt << "-D MAX_PRIM_PER_BATCH_BITS=" << MAX_PRIM_PER_BATCH_BITS << "u ";
|
||||
opt << "-D MAX_PRIM_PER_BATCH=" << MAX_PRIM_PER_BATCH << "u ";
|
||||
opt << "-D MAX_BATCH_COUNT=" << MAX_BATCH_COUNT << "u ";
|
||||
opt << "-D BIN_SIZE_BITS=" << BIN_SIZE_BITS << " ";
|
||||
opt << "-D BIN_SIZE=" << BIN_SIZE << "u ";
|
||||
opt << "-D MAX_BIN_PER_BATCH=" << MAX_BIN_PER_BATCH << "u ";
|
||||
opt << "-D MAX_BIN_COUNT=" << MAX_BIN_COUNT << "u ";
|
||||
opt << "-D TFX_PARAM_SIZE=" << TFX_PARAM_SIZE << "u ";
|
||||
#ifdef IOCL_DEBUG
|
||||
opt << "-g -s \"E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\tfx.cl\" ";
|
||||
#endif
|
||||
}
|
||||
|
||||
cl::Kernel GSRendererCL::CL::Build(const char* entry, ostringstream& opt)
|
||||
{
|
||||
// TODO: cache binary on disk
|
||||
|
||||
printf("building kernel (%s)\n", entry);
|
||||
cl::Program program;
|
||||
|
||||
cl::Program program = cl::Program(context, kernel_str);
|
||||
if(version >= 120)
|
||||
{
|
||||
cl::Program::Binaries binaries;
|
||||
|
||||
try
|
||||
{
|
||||
for(auto d : devs)
|
||||
{
|
||||
string path = d.tmppath + "/" + entry;
|
||||
|
||||
FILE* f = fopen(path.c_str(), "rb");
|
||||
|
||||
if(f != NULL)
|
||||
{
|
||||
fseek(f, 0, SEEK_END);
|
||||
long size = ftell(f);
|
||||
pair<void*, size_t> b(new char[size], size);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
fread(b.first, b.second, 1, f);
|
||||
fclose(f);
|
||||
|
||||
binaries.push_back(b);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(binaries.size() == devs.size())
|
||||
{
|
||||
vector<cl::Device> tmp;
|
||||
|
||||
for(auto d : devs) tmp.push_back(d.device);
|
||||
|
||||
program = cl::Program(context, tmp, binaries);
|
||||
|
||||
AddDefs(opt);
|
||||
|
||||
program.build(opt.str().c_str());
|
||||
|
||||
cl::Kernel kernel = cl::Kernel(program, entry);
|
||||
|
||||
return kernel;
|
||||
}
|
||||
}
|
||||
catch(cl::Error err)
|
||||
{
|
||||
printf("%s (%d)\n", err.what(), err.err());
|
||||
}
|
||||
|
||||
for(auto b : binaries)
|
||||
{
|
||||
delete [] b.first;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
printf("building kernel (%s)\n", entry);
|
||||
|
||||
program = cl::Program(context, kernel_str);
|
||||
|
||||
AddDefs(opt);
|
||||
|
||||
program.build(opt.str().c_str());
|
||||
|
@ -1769,9 +1818,9 @@ cl::Kernel GSRendererCL::CL::Build(const char* entry, ostringstream& opt)
|
|||
{
|
||||
if(err.err() == CL_BUILD_PROGRAM_FAILURE)
|
||||
{
|
||||
for(auto device : devices)
|
||||
for(auto d : devs)
|
||||
{
|
||||
auto s = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
|
||||
auto s = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(d.device);
|
||||
|
||||
printf("kernel (%s) build error: %s\n", entry, s.c_str());
|
||||
}
|
||||
|
@ -1780,9 +1829,56 @@ cl::Kernel GSRendererCL::CL::Build(const char* entry, ostringstream& opt)
|
|||
throw err;
|
||||
}
|
||||
|
||||
if(version >= 120)
|
||||
{
|
||||
try
|
||||
{
|
||||
vector<size_t> sizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
|
||||
vector<char*> binaries = program.getInfo<CL_PROGRAM_BINARIES>();
|
||||
|
||||
for(int i = 0; i < binaries.size(); i++)
|
||||
{
|
||||
string path = devs[i].tmppath + "/" + entry;
|
||||
|
||||
FILE* f = fopen(path.c_str(), "wb");
|
||||
|
||||
if(f != NULL)
|
||||
{
|
||||
fwrite(binaries[i], sizes[i], 1, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
delete[] binaries[i];
|
||||
}
|
||||
}
|
||||
catch(cl::Error err)
|
||||
{
|
||||
printf("%s (%d)\n", err.what(), err.err());
|
||||
}
|
||||
}
|
||||
|
||||
return cl::Kernel(program, entry);
|
||||
}
|
||||
|
||||
void GSRendererCL::CL::AddDefs(ostringstream& opt)
|
||||
{
|
||||
if(version == 110) opt << "-cl-std=CL1.1 ";
|
||||
else opt << "-cl-std=CL1.2 ";
|
||||
opt << "-D MAX_FRAME_SIZE=" << MAX_FRAME_SIZE << "u ";
|
||||
opt << "-D MAX_PRIM_COUNT=" << MAX_PRIM_COUNT << "u ";
|
||||
opt << "-D MAX_PRIM_PER_BATCH_BITS=" << MAX_PRIM_PER_BATCH_BITS << "u ";
|
||||
opt << "-D MAX_PRIM_PER_BATCH=" << MAX_PRIM_PER_BATCH << "u ";
|
||||
opt << "-D MAX_BATCH_COUNT=" << MAX_BATCH_COUNT << "u ";
|
||||
opt << "-D BIN_SIZE_BITS=" << BIN_SIZE_BITS << " ";
|
||||
opt << "-D BIN_SIZE=" << BIN_SIZE << "u ";
|
||||
opt << "-D MAX_BIN_PER_BATCH=" << MAX_BIN_PER_BATCH << "u ";
|
||||
opt << "-D MAX_BIN_COUNT=" << MAX_BIN_COUNT << "u ";
|
||||
opt << "-D TFX_PARAM_SIZE=" << TFX_PARAM_SIZE << "u ";
|
||||
#ifdef IOCL_DEBUG
|
||||
opt << "-g -s \"E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\tfx.cl\" ";
|
||||
#endif
|
||||
}
|
||||
|
||||
cl::Kernel& GSRendererCL::CL::GetPrimKernel(const PrimSelector& sel)
|
||||
{
|
||||
auto i = prim_map.find(sel);
|
||||
|
|
|
@ -188,9 +188,10 @@ class GSRendererCL : public GSRenderer
|
|||
std::map<uint64, cl::Kernel> tfx_map;
|
||||
|
||||
cl::Kernel Build(const char* entry, ostringstream& opt);
|
||||
void AddDefs(ostringstream& opt);
|
||||
|
||||
public:
|
||||
std::vector<cl::Device> devices;
|
||||
std::vector<OCLDeviceDesc> devs;
|
||||
cl::Context context;
|
||||
cl::CommandQueue queue[3];
|
||||
cl::Buffer vm;
|
||||
|
@ -200,6 +201,7 @@ class GSRendererCL : public GSRenderer
|
|||
cl::CommandQueue* wq;
|
||||
int wqidx;
|
||||
uint32 WIs;
|
||||
int version;
|
||||
|
||||
public:
|
||||
CL();
|
||||
|
|
|
@ -31,9 +31,9 @@ GSSettingsDlg::GSSettingsDlg(bool isOpen2)
|
|||
: GSDialog(isOpen2 ? IDD_CONFIG2 : IDD_CONFIG)
|
||||
, m_IsOpen2(isOpen2)
|
||||
{
|
||||
list<OCLDevice> ocldevs;
|
||||
list<OCLDeviceDesc> ocldevs;
|
||||
|
||||
GSUtil::GetOCLDevices(ocldevs);
|
||||
GSUtil::GetDeviceDescs(ocldevs);
|
||||
|
||||
int index = 0;
|
||||
|
||||
|
|
|
@ -226,9 +226,11 @@ bool GSUtil::CheckSSE()
|
|||
return true;
|
||||
}
|
||||
|
||||
void GSUtil::GetOCLDevices(list<OCLDevice>& devs)
|
||||
#define OCL_PROGRAM_VERSION 1
|
||||
|
||||
void GSUtil::GetDeviceDescs(list<OCLDeviceDesc>& dl)
|
||||
{
|
||||
devs.clear();
|
||||
dl.clear();
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -246,10 +248,6 @@ void GSUtil::GetOCLDevices(list<OCLDevice>& devs)
|
|||
|
||||
for(auto& device : ds)
|
||||
{
|
||||
std::string vendor = device.getInfo<CL_DEVICE_VENDOR>();
|
||||
std::string name = device.getInfo<CL_DEVICE_NAME>();
|
||||
std::string version = device.getInfo<CL_DEVICE_OPENCL_C_VERSION>();
|
||||
|
||||
string type;
|
||||
|
||||
switch(device.getInfo<CL_DEVICE_TYPE>())
|
||||
|
@ -258,19 +256,41 @@ void GSUtil::GetOCLDevices(list<OCLDevice>& devs)
|
|||
case CL_DEVICE_TYPE_CPU: type = "CPU"; break;
|
||||
}
|
||||
|
||||
if(type.empty()) continue;
|
||||
|
||||
std::string version = device.getInfo<CL_DEVICE_OPENCL_C_VERSION>();
|
||||
|
||||
int major = 0;
|
||||
int minor = 0;
|
||||
|
||||
if(!type.empty() && sscanf(version.c_str(), "OpenCL C %d.%d", &major, &minor) == 2 && major == 1 && minor >= 1 || major > 1)
|
||||
{
|
||||
name = vendor + " " + name + " " + version + type;
|
||||
OCLDeviceDesc desc;
|
||||
|
||||
OCLDevice dev;
|
||||
desc.device = device;
|
||||
desc.name = GetDeviceUniqueName(device);
|
||||
desc.version = major * 100 + minor * 10;
|
||||
|
||||
dev.device = device;
|
||||
dev.name = name;
|
||||
// TODO: linux
|
||||
|
||||
devs.push_back(dev);
|
||||
char* buff = new char[MAX_PATH + 1];
|
||||
GetTempPath(MAX_PATH, buff);
|
||||
desc.tmppath = string(buff) + "/" + desc.name;
|
||||
|
||||
WIN32_FIND_DATA FindFileData;
|
||||
HANDLE hFind = FindFirstFile(desc.tmppath.c_str(), &FindFileData);
|
||||
if(hFind != INVALID_HANDLE_VALUE) FindClose(hFind);
|
||||
else CreateDirectory(desc.tmppath.c_str(), NULL);
|
||||
|
||||
sprintf(buff, "/%d", OCL_PROGRAM_VERSION);
|
||||
desc.tmppath += buff;
|
||||
delete[] buff;
|
||||
|
||||
hFind = FindFirstFile(desc.tmppath.c_str(), &FindFileData);
|
||||
if(hFind != INVALID_HANDLE_VALUE) FindClose(hFind);
|
||||
else CreateDirectory(desc.tmppath.c_str(), NULL);
|
||||
|
||||
dl.push_back(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -281,6 +301,25 @@ void GSUtil::GetOCLDevices(list<OCLDevice>& devs)
|
|||
}
|
||||
}
|
||||
|
||||
string GSUtil::GetDeviceUniqueName(cl::Device& device)
|
||||
{
|
||||
std::string vendor = device.getInfo<CL_DEVICE_VENDOR>();
|
||||
std::string name = device.getInfo<CL_DEVICE_NAME>();
|
||||
std::string version = device.getInfo<CL_DEVICE_OPENCL_C_VERSION>();
|
||||
|
||||
string type;
|
||||
|
||||
switch(device.getInfo<CL_DEVICE_TYPE>())
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: type = "GPU"; break;
|
||||
case CL_DEVICE_TYPE_CPU: type = "CPU"; break;
|
||||
}
|
||||
|
||||
version.erase(version.find_last_not_of(' ') + 1);
|
||||
|
||||
return vendor + " " + name + " " + version + " " + type;
|
||||
}
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
bool GSUtil::CheckDirectX()
|
||||
|
|
|
@ -23,10 +23,12 @@
|
|||
|
||||
#include "GS.h"
|
||||
|
||||
struct OCLDevice
|
||||
struct OCLDeviceDesc
|
||||
{
|
||||
cl::Device device;
|
||||
string name;
|
||||
int version;
|
||||
string tmppath;
|
||||
};
|
||||
|
||||
class GSUtil
|
||||
|
@ -45,7 +47,9 @@ public:
|
|||
static bool HasCompatibleBits(uint32 spsm, uint32 dpsm);
|
||||
|
||||
static bool CheckSSE();
|
||||
static void GetOCLDevices(list<OCLDevice>& devs);
|
||||
|
||||
static void GetDeviceDescs(list<OCLDeviceDesc>& dl);
|
||||
static string GetDeviceUniqueName(cl::Device& device);
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
#ifdef cl_amd_printf
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#else
|
||||
#define printf(x)
|
||||
#endif
|
||||
|
||||
#ifdef cl_amd_media_ops
|
||||
|
@ -639,9 +637,9 @@ __kernel void KERNEL_PRIM(
|
|||
dp1.xy = dp1.xy * sign(cp);
|
||||
dp2.xy = dp2.xy * sign(cp);
|
||||
|
||||
b.zero.x = select(0.0f, CL_FLT_EPSILON, (dp1.y < 0) | (dp1.y == 0) & (dp1.x > 0));
|
||||
b.zero.y = select(0.0f, CL_FLT_EPSILON, (dp0.y < 0) | (dp0.y == 0) & (dp0.x > 0));
|
||||
b.zero.z = select(0.0f, CL_FLT_EPSILON, (dp2.y < 0) | (dp2.y == 0) & (dp2.x > 0));
|
||||
b.zero.x = select(0.0f, CL_FLT_EPSILON, (dp1.y < 0) | ((dp1.y == 0) & (dp1.x > 0)));
|
||||
b.zero.y = select(0.0f, CL_FLT_EPSILON, (dp0.y < 0) | ((dp0.y == 0) & (dp0.x > 0)));
|
||||
b.zero.z = select(0.0f, CL_FLT_EPSILON, (dp2.y < 0) | ((dp2.y == 0) & (dp2.x > 0)));
|
||||
|
||||
// any barycentric(reject_corner) < 0, tile outside the triangle
|
||||
|
||||
|
|
|
@ -44,8 +44,6 @@
|
|||
#include <comutil.h>
|
||||
#include "../../common/include/comptr.h"
|
||||
|
||||
#include <CL/cl.h>
|
||||
#undef CL_VERSION_1_2
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#define __CL_ENABLE_EXCEPTIONS
|
||||
#include <CL/cl.hpp>
|
||||
|
|
Loading…
Reference in New Issue