/* * Copyright (C) 2007-2009 Gabest * http://www.gabest.org * * This Program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. * http://www.gnu.org/copyleft/gpl.html * */ #pragma once #include "GSRenderer.h" #ifdef ENABLE_OPENCL __aligned(struct, 32) GSVertexCL { GSVector4 p, t; }; class GSRendererCL : public GSRenderer { typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); ConvertVertexBufferPtr m_cvb[4][2][2]; template void ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); union PrimSelector { struct { uint32 prim:2; // 0 }; uint32 key; operator uint32() const { return key; } }; union TileSelector { struct { uint32 prim:2; // 0 uint32 mode:2; // 2 uint32 clear:1; // 4 }; uint32 key; operator uint32() const { return key; } }; union TFXSelector { struct { uint32 fpsm:3; // 0 uint32 zpsm:3; // 3 uint32 ztst:2; // 6 (0: off, 1: write, 2: test (ge), 3: test (g)) uint32 atst:3; // 8 uint32 afail:2; // 11 uint32 iip:1; // 13 uint32 tfx:3; // 14 uint32 tcc:1; // 17 uint32 fst:1; // 18 uint32 ltf:1; // 19 uint32 tlu:1; // 20 uint32 fge:1; // 21 uint32 date:1; // 22 uint32 abe:1; // 23 uint32 aba:2; // 24 uint32 abb:2; // 26 uint32 abc:2; // 28 uint32 abd:2; // 30 uint32 pabe:1; // 32 uint32 aa1:1; // 33 uint32 fwrite:1; // 34 uint32 ftest:1; // 35 uint32 rfb:1; // 36 uint32 zwrite:1; // 37 uint32 ztest:1; // 38 uint32 rzb:1; // 39 uint32 wms:2; // 40 uint32 wmt:2; // 42 uint32 datm:1; // 44 uint32 colclamp:1; // 45 uint32 fba:1; // 46 uint32 dthe:1; // 47 uint32 prim:2; // 48 uint32 lcm:1; // 50 uint32 mmin:2; // 51 uint32 noscissor:1; // 53 uint32 tpsm:4; // 54 uint32 aem:1; // 58 uint32 merged:1; // 59 // TODO }; struct { uint32 _pad1:24; uint32 ababcd:8; uint32 _pad2:2; uint32 fb:2; uint32 _pad3:1; uint32 zb:2; }; struct { uint32 lo; uint32 hi; }; uint64 key; operator uint64() const { return key; } bool IsSolidRect() const { return prim == GS_SPRITE_CLASS && iip == 0 && tfx == TFX_NONE && abe == 0 && ztst <= 1 && atst <= 1 && date == 0 && fge == 0; } }; __aligned(struct, 32) TFXParameter { GSVector4i scissor; GSVector4i dimx; // 4x4 signed char TFXSelector sel; uint32 fbp, zbp, bw; uint32 fm, zm; uint32 fog; // rgb uint8 aref, afix; uint8 ta0, ta1; uint32 tbp[7], tbw[7]; int minu, maxu, minv, maxv; // umsk, ufix, vmsk, vfix int lod; // lcm == 1 int mxl; float l; // TEX1.L * -0x10000 float k; // TEX1.K * 0x10000 uint32 clut[256]; }; class TFXJob { public: struct { int x, y, z, w; } rect; TFXSelector sel; uint32 ib_start; uint32 prim_count; GSVector4i* src_pages; // read by any texture level GSVector4i* dst_pages; // f/z writes to it uint32 fbp, zbp, bw; uint32 fpsm, zpsm, tpsm; #ifdef DEBUG TFXParameter* pb; #endif TFXJob(); virtual ~TFXJob(); GSVector4i* GetSrcPages(); GSVector4i* GetDstPages(); }; class CL { std::string kernel_str; std::map prim_map; std::map tile_map; std::map tfx_map; cl::Kernel Build(const char* entry, ostringstream& opt); void AddDefs(ostringstream& opt); public: std::vector devs; cl::Context context; cl::CommandQueue queue[3]; cl::Buffer vm; cl::Buffer tex; struct { cl::Buffer buff[2]; size_t head, tail, size; unsigned char* ptr; void* mapped_ptr; } vb, ib, pb; cl::Buffer env; cl::CommandQueue* wq; int wqidx; uint32 WIs; int version; public: CL(); virtual ~CL(); cl::Kernel& GetPrimKernel(const PrimSelector& sel); cl::Kernel& GetTileKernel(const TileSelector& sel); cl::Kernel& GetTFXKernel(const TFXSelector& sel); void Map(); void Unmap(); }; CL m_cl; std::list> m_jobs; uint32 m_vb_start; uint32 m_vb_count; uint32 m_pb_start; uint32 m_pb_count; bool m_synced; void Enqueue(); void EnqueueTFX(std::list>& jobs, uint32 bin_count, const cl_uchar4& bin_dim); void JoinTFX(std::list>& jobs); bool UpdateTextureCache(TFXJob* job); void InvalidateTextureCache(TFXJob* job); void UsePages(uint32* pages); void ReleasePages(uint32* pages); static void CL_CALLBACK ReleasePageEvent(cl_event event, cl_int event_command_exec_status, void* user_data); protected: GSTexture* m_texture[2]; uint8* m_output; GSVector4i m_rw_pages[2][4]; // pages that may be read or modified by the rendering queue, f/z rw, tex r GSVector4i m_tc_pages[4]; // invalidated texture cache pages (split this into 8:24?) // TODO: this should be block level, too many overlaps inside pages with render targets GSVector4i m_tmp_pages[4]; uint32 m_tmp_pages2[MAX_PAGES + 1]; uint32 m_rw_pages_rendering[512]; // pages that are currently in-use void Reset(); void VSync(int field); void ResetDevice(); GSTexture* GetOutput(int i, int& y_offset); void Draw(); void Sync(int reason); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count); public: GSRendererCL(); virtual ~GSRendererCL(); }; #endif