mirror of https://github.com/PCSX2/pcsx2.git
269 lines
6.1 KiB
C++
269 lines
6.1 KiB
C++
/*
|
|
* Copyright (C) 2007-2009 Gabest
|
|
* http://www.gabest.org
|
|
*
|
|
* This Program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This Program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GNU Make; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "GSRenderer.h"
|
|
|
|
#ifdef ENABLE_OPENCL
|
|
|
|
__aligned(struct, 32) GSVertexCL
|
|
{
|
|
GSVector4 p, t;
|
|
};
|
|
|
|
class GSRendererCL : public GSRenderer
|
|
{
|
|
typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
|
|
|
ConvertVertexBufferPtr m_cvb[4][2][2];
|
|
|
|
template<uint32 primclass, uint32 tme, uint32 fst>
|
|
void ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
|
|
|
union PrimSelector
|
|
{
|
|
struct
|
|
{
|
|
uint32 prim:2; // 0
|
|
};
|
|
|
|
uint32 key;
|
|
|
|
operator uint32() const { return key; }
|
|
};
|
|
|
|
union TileSelector
|
|
{
|
|
struct
|
|
{
|
|
uint32 prim:2; // 0
|
|
uint32 mode:2; // 2
|
|
uint32 clear:1; // 4
|
|
};
|
|
|
|
uint32 key;
|
|
|
|
operator uint32() const { return key; }
|
|
};
|
|
|
|
union TFXSelector
|
|
{
|
|
struct
|
|
{
|
|
uint32 fpsm:3; // 0
|
|
uint32 zpsm:3; // 3
|
|
uint32 ztst:2; // 6 (0: off, 1: write, 2: test (ge), 3: test (g))
|
|
uint32 atst:3; // 8
|
|
uint32 afail:2; // 11
|
|
uint32 iip:1; // 13
|
|
uint32 tfx:3; // 14
|
|
uint32 tcc:1; // 17
|
|
uint32 fst:1; // 18
|
|
uint32 ltf:1; // 19
|
|
uint32 tlu:1; // 20
|
|
uint32 fge:1; // 21
|
|
uint32 date:1; // 22
|
|
uint32 abe:1; // 23
|
|
uint32 aba:2; // 24
|
|
uint32 abb:2; // 26
|
|
uint32 abc:2; // 28
|
|
uint32 abd:2; // 30
|
|
|
|
uint32 pabe:1; // 32
|
|
uint32 aa1:1; // 33
|
|
uint32 fwrite:1; // 34
|
|
uint32 ftest:1; // 35
|
|
uint32 rfb:1; // 36
|
|
uint32 zwrite:1; // 37
|
|
uint32 ztest:1; // 38
|
|
uint32 rzb:1; // 39
|
|
uint32 wms:2; // 40
|
|
uint32 wmt:2; // 42
|
|
uint32 datm:1; // 44
|
|
uint32 colclamp:1; // 45
|
|
uint32 fba:1; // 46
|
|
uint32 dthe:1; // 47
|
|
uint32 prim:2; // 48
|
|
uint32 lcm:1; // 50
|
|
uint32 mmin:2; // 51
|
|
uint32 noscissor:1; // 53
|
|
uint32 tpsm:4; // 54
|
|
uint32 aem:1; // 58
|
|
uint32 merged:1; // 59
|
|
// TODO
|
|
};
|
|
|
|
struct
|
|
{
|
|
uint32 _pad1:24;
|
|
uint32 ababcd:8;
|
|
uint32 _pad2:2;
|
|
uint32 fb:2;
|
|
uint32 _pad3:1;
|
|
uint32 zb:2;
|
|
};
|
|
|
|
struct
|
|
{
|
|
uint32 lo;
|
|
uint32 hi;
|
|
};
|
|
|
|
uint64 key;
|
|
|
|
operator uint64() const { return key; }
|
|
|
|
bool IsSolidRect() const
|
|
{
|
|
return prim == GS_SPRITE_CLASS
|
|
&& iip == 0
|
|
&& tfx == TFX_NONE
|
|
&& abe == 0
|
|
&& ztst <= 1
|
|
&& atst <= 1
|
|
&& date == 0
|
|
&& fge == 0;
|
|
}
|
|
};
|
|
|
|
__aligned(struct, 32) TFXParameter
|
|
{
|
|
GSVector4i scissor;
|
|
GSVector4i dimx; // 4x4 signed char
|
|
TFXSelector sel;
|
|
uint32 fbp, zbp, bw;
|
|
uint32 fm, zm;
|
|
uint32 fog; // rgb
|
|
uint8 aref, afix;
|
|
uint8 ta0, ta1;
|
|
uint32 tbp[7], tbw[7];
|
|
int minu, maxu, minv, maxv; // umsk, ufix, vmsk, vfix
|
|
int lod; // lcm == 1
|
|
int mxl;
|
|
float l; // TEX1.L * -0x10000
|
|
float k; // TEX1.K * 0x10000
|
|
uint32 clut[256];
|
|
};
|
|
|
|
class TFXJob
|
|
{
|
|
public:
|
|
struct { int x, y, z, w; } rect;
|
|
TFXSelector sel;
|
|
uint32 ib_start;
|
|
uint32 prim_count;
|
|
GSVector4i* src_pages; // read by any texture level
|
|
GSVector4i* dst_pages; // f/z writes to it
|
|
uint32 fbp, zbp, bw;
|
|
uint32 fpsm, zpsm, tpsm;
|
|
#ifdef DEBUG
|
|
TFXParameter* pb;
|
|
#endif
|
|
TFXJob();
|
|
virtual ~TFXJob();
|
|
|
|
GSVector4i* GetSrcPages();
|
|
GSVector4i* GetDstPages();
|
|
};
|
|
|
|
class CL
|
|
{
|
|
std::string kernel_str;
|
|
std::map<uint32, cl::Kernel> prim_map;
|
|
std::map<uint32, cl::Kernel> tile_map;
|
|
std::map<uint64, cl::Kernel> tfx_map;
|
|
|
|
cl::Kernel Build(const char* entry, ostringstream& opt);
|
|
void AddDefs(ostringstream& opt);
|
|
|
|
public:
|
|
std::vector<OCLDeviceDesc> devs;
|
|
cl::Context context;
|
|
cl::CommandQueue queue[3];
|
|
cl::Buffer vm;
|
|
cl::Buffer tex;
|
|
struct { cl::Buffer buff[2]; size_t head, tail, size; unsigned char* ptr; void* mapped_ptr; } vb, ib, pb;
|
|
cl::Buffer env;
|
|
cl::CommandQueue* wq;
|
|
int wqidx;
|
|
uint32 WIs;
|
|
int version;
|
|
|
|
public:
|
|
CL();
|
|
virtual ~CL();
|
|
|
|
cl::Kernel& GetPrimKernel(const PrimSelector& sel);
|
|
cl::Kernel& GetTileKernel(const TileSelector& sel);
|
|
cl::Kernel& GetTFXKernel(const TFXSelector& sel);
|
|
|
|
void Map();
|
|
void Unmap();
|
|
};
|
|
|
|
CL m_cl;
|
|
std::list<shared_ptr<TFXJob>> m_jobs;
|
|
uint32 m_vb_start;
|
|
uint32 m_vb_count;
|
|
uint32 m_pb_start;
|
|
uint32 m_pb_count;
|
|
bool m_synced;
|
|
|
|
void Enqueue();
|
|
void EnqueueTFX(std::list<shared_ptr<TFXJob>>& jobs, uint32 bin_count, const cl_uchar4& bin_dim);
|
|
void JoinTFX(std::list<shared_ptr<TFXJob>>& jobs);
|
|
bool UpdateTextureCache(TFXJob* job);
|
|
void InvalidateTextureCache(TFXJob* job);
|
|
void UsePages(uint32* pages);
|
|
void ReleasePages(uint32* pages);
|
|
|
|
static void CL_CALLBACK ReleasePageEvent(cl_event event, cl_int event_command_exec_status, void* user_data);
|
|
|
|
protected:
|
|
GSTexture* m_texture[2];
|
|
uint8* m_output;
|
|
|
|
GSVector4i m_rw_pages[2][4]; // pages that may be read or modified by the rendering queue, f/z rw, tex r
|
|
GSVector4i m_tc_pages[4]; // invalidated texture cache pages (split this into 8:24?) // TODO: this should be block level, too many overlaps inside pages with render targets
|
|
GSVector4i m_tmp_pages[4];
|
|
uint32 m_tmp_pages2[MAX_PAGES + 1];
|
|
uint32 m_rw_pages_rendering[512]; // pages that are currently in-use
|
|
|
|
void Reset();
|
|
void VSync(int field);
|
|
void ResetDevice();
|
|
GSTexture* GetOutput(int i, int& y_offset);
|
|
|
|
void Draw();
|
|
void Sync(int reason);
|
|
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
|
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
|
|
|
bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count);
|
|
|
|
public:
|
|
GSRendererCL();
|
|
virtual ~GSRendererCL();
|
|
};
|
|
|
|
#endif
|