From 610df4ab78eefa79aca731768dd6f11d4564b34c Mon Sep 17 00:00:00 2001 From: Soren Jorvang Date: Tue, 8 Mar 2011 23:25:37 +0000 Subject: [PATCH] Fix OS X nowx. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7321 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/DolphinWX/Src/MainNoGUI.cpp | 13 +- .../Plugin_VideoDX11/Src/PSTextureEncoder.cpp | 2872 ++++++++--------- .../Plugin_VideoDX11/Src/PSTextureEncoder.h | 238 +- .../Plugin_VideoDX11/Src/TextureEncoder.h | 270 +- 4 files changed, 1697 insertions(+), 1696 deletions(-) diff --git a/Source/Core/DolphinWX/Src/MainNoGUI.cpp b/Source/Core/DolphinWX/Src/MainNoGUI.cpp index 37dc0ff515..e0afea9ac7 100644 --- a/Source/Core/DolphinWX/Src/MainNoGUI.cpp +++ b/Source/Core/DolphinWX/Src/MainNoGUI.cpp @@ -274,6 +274,13 @@ void X11_MainLoop() int main(int argc, char* argv[]) { +#ifdef __APPLE__ + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + NSEvent *event = [[NSEvent alloc] init]; + [NSApplication sharedApplication]; + [NSApp activateIgnoringOtherApps: YES]; + [NSApp finishLaunching]; +#endif int ch, help = 0; struct option longopts[] = { { "exec", no_argument, NULL, 'e' }, @@ -317,12 +324,6 @@ int main(int argc, char* argv[]) if (BootManager::BootCore(argv[optind])) { #ifdef __APPLE__ - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - NSEvent *event = [[NSEvent alloc] init]; - [NSApplication sharedApplication]; - [NSApp activateIgnoringOtherApps: YES]; - [NSApp finishLaunching]; - while (running) { event = [NSApp nextEventMatchingMask: NSAnyEventMask diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp index b66640326a..b0e1bd7d75 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp @@ -1,1436 +1,1436 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include "PSTextureEncoder.h" - -#include "D3DBase.h" -#include "D3DShader.h" -#include "GfxState.h" -#include "BPMemory.h" -#include "FramebufferManager.h" -#include "Render.h" -#include "HW/Memmap.h" -#include "TextureCache.h" - -// "Static mode" will compile a new EFB encoder shader for every combination of -// encoding configurations. It's compatible with Shader Model 4. - -// "Dynamic mode" will use the dynamic-linking feature of Shader Model 5. Only -// one shader needs to be compiled. - -// Unfortunately, the June 2010 DirectX SDK includes a broken HLSL compiler -// which cripples dynamic linking for us. -// See . -// Dynamic mode is disabled for now. To enable it, uncomment the line below. - -//#define USE_DYNAMIC_MODE - -// FIXME: When Microsoft fixes their HLSL compiler, make Dolphin enable dynamic -// mode on Shader Model 5-compatible cards. - -namespace DX11 -{ - -union EFBEncodeParams -{ - struct - { - FLOAT NumHalfCacheLinesX; - FLOAT NumBlocksY; - FLOAT PosX; - FLOAT PosY; - FLOAT TexLeft; - FLOAT TexTop; - FLOAT TexRight; - FLOAT TexBottom; - }; - // Constant buffers must be a multiple of 16 bytes in size. - u8 pad[32]; // Pad to the next multiple of 16 bytes -}; - -static const char EFB_ENCODE_VS[] = -"// dolphin-emu EFB encoder vertex shader\n" - -"cbuffer cbParams : register(b0)\n" -"{\n" - "struct\n" // Should match EFBEncodeParams above - "{\n" - "float NumHalfCacheLinesX;\n" - "float NumBlocksY;\n" - "float PosX;\n" // Upper-left corner of source - "float PosY;\n" - "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) - "float TexTop;\n" - "float TexRight;\n" - "float TexBottom;\n" - "} Params;\n" -"}\n" - -"struct Output\n" -"{\n" - "float4 Pos : SV_Position;\n" - "float2 Coord : ENCODECOORD;\n" -"};\n" - -"Output main(in float2 Pos : POSITION)\n" -"{\n" - "Output result;\n" - "result.Pos = float4(2*Pos.x-1, -2*Pos.y+1, 0.0, 1.0);\n" - "result.Coord = Pos * float2(Params.NumHalfCacheLinesX, Params.NumBlocksY);\n" - "return result;\n" -"}\n" -; - -static const char EFB_ENCODE_PS[] = -"// dolphin-emu EFB encoder pixel shader\n" - -// Input - -"cbuffer cbParams : register(b0)\n" -"{\n" - "struct\n" // Should match EFBEncodeParams above - "{\n" - "float NumHalfCacheLinesX;\n" - "float NumBlocksY;\n" - "float PosX;\n" // Upper-left corner of source - "float PosY;\n" - "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) - "float TexTop;\n" - "float TexRight;\n" - "float TexBottom;\n" - "} Params;\n" -"}\n" - -"Texture2D EFBTexture : register(t0);\n" -"sampler EFBSampler : register(s0);\n" - -// Constants - -"static const float2 INV_EFB_DIMS = float2(1.0/640.0, 1.0/528.0);\n" - -// FIXME: Is this correct? -"static const float3 INTENSITY_COEFFS = float3(0.257, 0.504, 0.098);\n" -"static const float INTENSITY_ADD = 16.0/255.0;\n" - -// Utility functions - -"uint4 Swap4_32(uint4 v) {\n" - "return (((v >> 24) & 0xFF) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | ((v << 24) & 0xFF000000));\n" -"}\n" - -"uint4 UINT4_8888_BE(uint4 a, uint4 b, uint4 c, uint4 d) {\n" - "return (d << 24) | (c << 16) | (b << 8) | a;\n" -"}\n" - -"uint UINT_44444444_BE(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n" - "return (g << 28) | (h << 24) | (e << 20) | (f << 16) | (c << 12) | (d << 8) | (a << 4) | b;\n" -"}\n" - -"uint UINT_1555(uint a, uint b, uint c, uint d) {\n" - "return (a << 15) | (b << 10) | (c << 5) | d;\n" -"}\n" - -"uint UINT_3444(uint a, uint b, uint c, uint d) {\n" - "return (a << 12) | (b << 8) | (c << 4) | d;\n" -"}\n" - -"uint UINT_565(uint a, uint b, uint c) {\n" - "return (a << 11) | (b << 5) | c;\n" -"}\n" - -"uint UINT_1616(uint a, uint b) {\n" - "return (a << 16) | b;\n" -"}\n" - -"uint EncodeRGB5A3(float4 pixel) {\n" - "if (pixel.a >= 224.0/255.0) {\n" - // Encode to ARGB1555 - "return UINT_1555(1, pixel.r*31, pixel.g*31, pixel.b*31);\n" - "} else {\n" - // Encode to ARGB3444 - "return UINT_3444(pixel.a*7, pixel.r*15, pixel.g*15, pixel.b*15);\n" - "}\n" -"}\n" - -"uint EncodeRGB565(float4 pixel) {\n" - "return UINT_565(pixel.r*31, pixel.g*63, pixel.b*31);\n" -"}\n" - -"float2 CalcTexCoord(float2 coord)\n" -"{\n" - // Add 0.5,0.5 to sample from the center of the EFB pixel - "float2 efbCoord = coord + float2(0.5,0.5);\n" - "return lerp(float2(Params.TexLeft,Params.TexTop), float2(Params.TexRight,Params.TexBottom), efbCoord * INV_EFB_DIMS);\n" -"}\n" - -// Interface and classes for different source formats - -"float4 Fetch_0(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n" - "result.a = 1.0;\n" - "return result;\n" -"}\n" - -"float4 Fetch_1(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "return EFBTexture.Sample(EFBSampler, texCoord);\n" -"}\n" - -"float4 Fetch_2(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n" - "result.a = 1.0;\n" - "return result;\n" -"}\n" - -"float4 Fetch_3(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - - "uint depth24 = 0xFFFFFF * EFBTexture.Sample(EFBSampler, texCoord).r;\n" - "uint4 bytes = uint4(\n" - "(depth24 >> 16) & 0xFF,\n" // r - "(depth24 >> 8) & 0xFF,\n" // g - "depth24 & 0xFF,\n" // b - "255);\n" // a - "return bytes / 255.0;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iFetch\n" -"{\n" - "float4 Fetch(float2 coord);\n" -"};\n" - -// Source format 0 -"class cFetch_0 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_0(coord); }\n" -"};\n" - - -// Source format 1 -"class cFetch_1 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_1(coord); }\n" -"};\n" - -// Source format 2 -"class cFetch_2 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_2(coord); }\n" -"};\n" - -// Source format 3 -"class cFetch_3 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_3(coord); }\n" -"};\n" - -// Declare fetch interface; must be set by application -"iFetch g_fetch;\n" -"#define IMP_FETCH g_fetch.Fetch\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_FETCH\n" -"#error No Fetch specified\n" -"#endif\n" - -// Interface and classes for different intensity settings (on or off) - -"float4 Intensity_0(float4 sample)\n" -"{\n" - "return sample;\n" -"}\n" - -"float4 Intensity_1(float4 sample)\n" -"{\n" - "sample.r = dot(INTENSITY_COEFFS, sample.rgb) + INTENSITY_ADD;\n" - // FIXME: Is this correct? What happens if you use one of the non-R - // formats with intensity on? - "sample = sample.rrrr;\n" - "return sample;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iIntensity\n" -"{\n" - "float4 Intensity(float4 sample);\n" -"};\n" - -// Intensity off -"class cIntensity_0 : iIntensity\n" -"{\n" - "float4 Intensity(float4 sample)\n" - "{ return Intensity_0(sample); }\n" -"};\n" - -// Intensity on -"class cIntensity_1 : iIntensity\n" -"{\n" - "float4 Intensity(float4 sample)\n" - "{ return Intensity_1(sample); }\n" -"};\n" - -// Declare intensity interface; must be set by application -"iIntensity g_intensity;\n" -"#define IMP_INTENSITY g_intensity.Intensity\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_INTENSITY\n" -"#error No Intensity specified\n" -"#endif\n" - - -// Interface and classes for different scale/filter settings (on or off) - -"float4 ScaledFetch_0(float2 coord)\n" -"{\n" - "return IMP_FETCH(float2(Params.PosX,Params.PosY) + coord);\n" -"}\n" - -"float4 ScaledFetch_1(float2 coord)\n" -"{\n" - "float2 ul = float2(Params.PosX,Params.PosY) + 2*coord;\n" - "float4 sample0 = IMP_FETCH(ul+float2(0,0));\n" - "float4 sample1 = IMP_FETCH(ul+float2(1,0));\n" - "float4 sample2 = IMP_FETCH(ul+float2(0,1));\n" - "float4 sample3 = IMP_FETCH(ul+float2(1,1));\n" - // Average all four samples together - // FIXME: Is this correct? - "return 0.25 * (sample0+sample1+sample2+sample3);\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord);\n" -"};\n" - -// Scale off -"class cScaledFetch_0 : iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord)\n" - "{ return ScaledFetch_0(coord); }\n" -"};\n" - -// Scale on -"class cScaledFetch_1 : iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord)\n" - "{ return ScaledFetch_1(coord); }\n" -"};\n" - -// Declare scaled fetch interface; must be set by application code -"iScaledFetch g_scaledFetch;\n" -"#define IMP_SCALEDFETCH g_scaledFetch.ScaledFetch\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_SCALEDFETCH\n" -"#error No ScaledFetch specified\n" -"#endif\n" - -// Main EFB-sampling function: performs all steps of fetching pixels, scaling, -// applying intensity function - -"float4 SampleEFB(float2 coord)\n" -"{\n" - // FIXME: Does intensity happen before or after scaling? Or does - // it matter? - "float4 sample = IMP_SCALEDFETCH(coord);\n" - "return IMP_INTENSITY(sample);\n" -"}\n" - -// Interfaces and classes for different destination formats - -"uint4 Generate_0(float2 cacheCoord)\n" // R4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,8);\n" - "float2 subBlockUL = blockUL + float2(0, 4*(cacheCoord.x%2));\n" - - "float4 sample[32];\n" - "for (uint y = 0; y < 4; ++y) {\n" - "for (uint x = 0; x < 8; ++x) {\n" - "sample[y*8+x] = SampleEFB(subBlockUL+float2(x,y));\n" - "}\n" - "}\n" - - "uint dw[4];\n" - "for (uint i = 0; i < 4; ++i) {\n" - "dw[i] = UINT_44444444_BE(\n" - "15*sample[8*i+0].r,\n" - "15*sample[8*i+1].r,\n" - "15*sample[8*i+2].r,\n" - "15*sample[8*i+3].r,\n" - "15*sample[8*i+4].r,\n" - "15*sample[8*i+5].r,\n" - "15*sample[8*i+6].r,\n" - "15*sample[8*i+7].r\n" - ");\n" - "}\n" - - "return uint4(dw[0], dw[1], dw[2], dw[3]);\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_1(float2 cacheCoord)\n" // R8 (FIXME: Duplicate of R8 below?) -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.r, sample4.r, sample8.r, sampleC.r),\n" - "255*float4(sample1.r, sample5.r, sample9.r, sampleD.r),\n" - "255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n" - "255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_2(float2 cacheCoord)\n" // A4 R4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint dw0 = UINT_44444444_BE(\n" - "15*sample0.a, 15*sample0.r,\n" - "15*sample1.a, 15*sample1.r,\n" - "15*sample2.a, 15*sample2.r,\n" - "15*sample3.a, 15*sample3.r\n" - ");\n" - "uint dw1 = UINT_44444444_BE(\n" - "15*sample4.a, 15*sample4.r,\n" - "15*sample5.a, 15*sample5.r,\n" - "15*sample6.a, 15*sample6.r,\n" - "15*sample7.a, 15*sample7.r\n" - ");\n" - "uint dw2 = UINT_44444444_BE(\n" - "15*sample8.a, 15*sample8.r,\n" - "15*sample9.a, 15*sample9.r,\n" - "15*sampleA.a, 15*sampleA.r,\n" - "15*sampleB.a, 15*sampleB.r\n" - ");\n" - "uint dw3 = UINT_44444444_BE(\n" - "15*sampleC.a, 15*sampleC.r,\n" - "15*sampleD.a, 15*sampleD.r,\n" - "15*sampleE.a, 15*sampleE.r,\n" - "15*sampleF.a, 15*sampleF.r\n" - ");\n" - - "return uint4(dw0, dw1, dw2, dw3);\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_3(float2 cacheCoord)\n" // A8 R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.a, sample2.a, sample4.a, sample6.a),\n" - "255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n" - "255*float4(sample1.a, sample3.a, sample5.a, sample7.a),\n" - "255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_4(float2 cacheCoord)\n" // R5 G6 B5 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint dw0 = UINT_1616(EncodeRGB565(sample0), EncodeRGB565(sample1));\n" - "uint dw1 = UINT_1616(EncodeRGB565(sample2), EncodeRGB565(sample3));\n" - "uint dw2 = UINT_1616(EncodeRGB565(sample4), EncodeRGB565(sample5));\n" - "uint dw3 = UINT_1616(EncodeRGB565(sample6), EncodeRGB565(sample7));\n" - - "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" -"}\n" - -"uint4 Generate_5(float2 cacheCoord)\n" // 1 R5 G5 B5 or 0 A3 R4 G4 G4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint dw0 = UINT_1616(EncodeRGB5A3(sample0), EncodeRGB5A3(sample1));\n" - "uint dw1 = UINT_1616(EncodeRGB5A3(sample2), EncodeRGB5A3(sample3));\n" - "uint dw2 = UINT_1616(EncodeRGB5A3(sample4), EncodeRGB5A3(sample5));\n" - "uint dw3 = UINT_1616(EncodeRGB5A3(sample6), EncodeRGB5A3(sample7));\n" - - "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" -"}\n" - -"uint4 Generate_6(float2 cacheCoord)\n" // A8 R8 A8 R8 | G8 B8 G8 B8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(4,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4;\n" - "if (cacheCoord.x % 4 < 2)\n" - "{\n" - // First cache line gets AR - "dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.a, sample2.a, sample4.a, sample6.a),\n" - "255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n" - "255*float4(sample1.a, sample3.a, sample5.a, sample7.a),\n" - "255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n" - ");\n" - "}\n" - "else\n" - "{\n" - // Second cache line gets GB - "dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n" - "255*float4(sample0.b, sample2.b, sample4.b, sample6.b),\n" - "255*float4(sample1.g, sample3.g, sample5.g, sample7.g),\n" - "255*float4(sample1.b, sample3.b, sample5.b, sample7.b)\n" - ");\n" - "}\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_7(float2 cacheCoord)\n" // A8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.a, sample4.a, sample8.a, sampleC.a),\n" - "255*float4(sample1.a, sample5.a, sample9.a, sampleD.a),\n" - "255*float4(sample2.a, sample6.a, sampleA.a, sampleE.a),\n" - "255*float4(sample3.a, sample7.a, sampleB.a, sampleF.a)\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_8(float2 cacheCoord)\n" // R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.r, sample4.r, sample8.r, sampleC.r),\n" - "255*float4(sample1.r, sample5.r, sample9.r, sampleD.r),\n" - "255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n" - "255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_9(float2 cacheCoord)\n" // G8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.g, sample4.g, sample8.g, sampleC.g),\n" - "255*float4(sample1.g, sample5.g, sample9.g, sampleD.g),\n" - "255*float4(sample2.g, sample6.g, sampleA.g, sampleE.g),\n" - "255*float4(sample3.g, sample7.g, sampleB.g, sampleF.g)\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_A(float2 cacheCoord)\n" // B8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.b, sample4.b, sample8.b, sampleC.b),\n" - "255*float4(sample1.b, sample5.b, sample9.b, sampleD.b),\n" - "255*float4(sample2.b, sample6.b, sampleA.b, sampleE.b),\n" - "255*float4(sample3.b, sample7.b, sampleB.b, sampleF.b)\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_B(float2 cacheCoord)\n" // G8 R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n" - "255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n" - "255*float4(sample1.g, sample3.g, sample5.g, sample7.g),\n" - "255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_C(float2 cacheCoord)\n" // B8 G8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "255*float4(sample0.b, sample2.b, sample4.b, sample6.b),\n" - "255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n" - "255*float4(sample1.b, sample3.b, sample5.b, sample7.b),\n" - "255*float4(sample1.g, sample3.g, sample5.g, sample7.g)\n" - ");\n" - - "return dw4;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord);\n" -"};\n" - -"class cGenerator_4 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_4(cacheCoord); }\n" -"};\n" - -"class cGenerator_5 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_5(cacheCoord); }\n" -"};\n" - -"class cGenerator_6 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_6(cacheCoord); }\n" -"};\n" - -"class cGenerator_8 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_8(cacheCoord); }\n" -"};\n" - -"class cGenerator_B : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_B(cacheCoord); }\n" -"};\n" - -// Declare generator interface; must be set by application -"iGenerator g_generator;\n" -"#define IMP_GENERATOR g_generator.Generate\n" - -"#endif\n" - -"#ifndef IMP_GENERATOR\n" -"#error No generator specified\n" -"#endif\n" - -"void main(out uint4 ocol0 : SV_Target, in float4 Pos : SV_Position, in float2 fCacheCoord : ENCODECOORD)\n" -"{\n" - "float2 cacheCoord = floor(fCacheCoord);\n" - "ocol0 = IMP_GENERATOR(cacheCoord);\n" -"}\n" -; - -PSTextureEncoder::PSTextureEncoder() - : m_ready(false), m_out(NULL), m_outRTV(NULL), m_outStage(NULL), - m_encodeParams(NULL), - m_quad(NULL), m_vShader(NULL), m_quadLayout(NULL), - m_efbEncodeBlendState(NULL), m_efbEncodeDepthState(NULL), - m_efbEncodeRastState(NULL), m_efbSampler(NULL), - m_dynamicShader(NULL), m_classLinkage(NULL) -{ - for (size_t i = 0; i < 4; ++i) - m_fetchClass[i] = NULL; - for (size_t i = 0; i < 2; ++i) - m_scaledFetchClass[i] = NULL; - for (size_t i = 0; i < 2; ++i) - m_intensityClass[i] = NULL; - for (size_t i = 0; i < 16; ++i) - m_generatorClass[i] = NULL; -} - -static const D3D11_INPUT_ELEMENT_DESC QUAD_LAYOUT_DESC[] = { - { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 } -}; - -static const struct QuadVertex -{ - float posX; - float posY; -} QUAD_VERTS[4] = { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } }; - -void PSTextureEncoder::Init() -{ - m_ready = false; - - HRESULT hr; - - // Create output texture RGBA format - - // This format allows us to generate one cache line in two pixels. - D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC( - DXGI_FORMAT_R32G32B32A32_UINT, - EFB_WIDTH, EFB_HEIGHT/4, 1, 1, D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&t2dd, NULL, &m_out); - CHECK(SUCCEEDED(hr), "create efb encode output texture"); - D3D::SetDebugObjectName(m_out, "efb encoder output texture"); - - // Create output render target view - - D3D11_RENDER_TARGET_VIEW_DESC rtvd = CD3D11_RENDER_TARGET_VIEW_DESC(m_out, - D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R32G32B32A32_UINT); - hr = D3D::device->CreateRenderTargetView(m_out, &rtvd, &m_outRTV); - CHECK(SUCCEEDED(hr), "create efb encode output render target view"); - D3D::SetDebugObjectName(m_outRTV, "efb encoder output rtv"); - - // Create output staging buffer - - t2dd.Usage = D3D11_USAGE_STAGING; - t2dd.BindFlags = 0; - t2dd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - hr = D3D::device->CreateTexture2D(&t2dd, NULL, &m_outStage); - CHECK(SUCCEEDED(hr), "create efb encode output staging buffer"); - D3D::SetDebugObjectName(m_outStage, "efb encoder output staging buffer"); - - // Create constant buffer for uploading data to shaders - - D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), - D3D11_BIND_CONSTANT_BUFFER); - hr = D3D::device->CreateBuffer(&bd, NULL, &m_encodeParams); - CHECK(SUCCEEDED(hr), "create efb encode params buffer"); - D3D::SetDebugObjectName(m_encodeParams, "efb encoder params buffer"); - - // Create vertex quad - - bd = CD3D11_BUFFER_DESC(sizeof(QUAD_VERTS), D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_IMMUTABLE); - D3D11_SUBRESOURCE_DATA srd = { QUAD_VERTS, 0, 0 }; - - hr = D3D::device->CreateBuffer(&bd, &srd, &m_quad); - CHECK(SUCCEEDED(hr), "create efb encode quad vertex buffer"); - D3D::SetDebugObjectName(m_quad, "efb encoder quad vertex buffer"); - - // Create vertex shader - - D3DBlob* bytecode = NULL; - if (!D3D::CompileVertexShader(EFB_ENCODE_VS, sizeof(EFB_ENCODE_VS), &bytecode)) - { - ERROR_LOG(VIDEO, "EFB encode vertex shader failed to compile"); - return; - } - - hr = D3D::device->CreateVertexShader(bytecode->Data(), bytecode->Size(), NULL, &m_vShader); - CHECK(SUCCEEDED(hr), "create efb encode vertex shader"); - D3D::SetDebugObjectName(m_vShader, "efb encoder vertex shader"); - - // Create input layout for vertex quad using bytecode from vertex shader - - hr = D3D::device->CreateInputLayout(QUAD_LAYOUT_DESC, - sizeof(QUAD_LAYOUT_DESC)/sizeof(D3D11_INPUT_ELEMENT_DESC), - bytecode->Data(), bytecode->Size(), &m_quadLayout); - CHECK(SUCCEEDED(hr), "create efb encode quad vertex layout"); - D3D::SetDebugObjectName(m_quadLayout, "efb encoder quad layout"); - - bytecode->Release(); - - // Create pixel shader - -#ifdef USE_DYNAMIC_MODE - if (!InitDynamicMode()) -#else - if (!InitStaticMode()) -#endif - return; - - // Create blend state - - D3D11_BLEND_DESC bld = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = D3D::device->CreateBlendState(&bld, &m_efbEncodeBlendState); - CHECK(SUCCEEDED(hr), "create efb encode blend state"); - D3D::SetDebugObjectName(m_efbEncodeBlendState, "efb encoder blend state"); - - // Create depth state - - D3D11_DEPTH_STENCIL_DESC dsd = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - dsd.DepthEnable = FALSE; - hr = D3D::device->CreateDepthStencilState(&dsd, &m_efbEncodeDepthState); - CHECK(SUCCEEDED(hr), "create efb encode depth state"); - D3D::SetDebugObjectName(m_efbEncodeDepthState, "efb encoder depth state"); - - // Create rasterizer state - - D3D11_RASTERIZER_DESC rd = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rd.CullMode = D3D11_CULL_NONE; - rd.DepthClipEnable = FALSE; - hr = D3D::device->CreateRasterizerState(&rd, &m_efbEncodeRastState); - CHECK(SUCCEEDED(hr), "create efb encode rast state"); - D3D::SetDebugObjectName(m_efbEncodeRastState, "efb encoder rast state"); - - // Create efb texture sampler - - D3D11_SAMPLER_DESC sd = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - hr = D3D::device->CreateSamplerState(&sd, &m_efbSampler); - CHECK(SUCCEEDED(hr), "create efb encode texture sampler"); - D3D::SetDebugObjectName(m_efbSampler, "efb encoder texture sampler"); - - m_ready = true; -} - -void PSTextureEncoder::Shutdown() -{ - m_ready = false; - - for (size_t i = 0; i < 4; ++i) - SAFE_RELEASE(m_fetchClass[i]); - for (size_t i = 0; i < 2; ++i) - SAFE_RELEASE(m_scaledFetchClass[i]); - for (size_t i = 0; i < 2; ++i) - SAFE_RELEASE(m_intensityClass[i]); - for (size_t i = 0; i < 16; ++i) - SAFE_RELEASE(m_generatorClass[i]); - m_linkageArray.clear(); - - SAFE_RELEASE(m_classLinkage); - SAFE_RELEASE(m_dynamicShader); - - for (ComboMap::iterator it = m_staticShaders.begin(); - it != m_staticShaders.end(); ++it) - { - SAFE_RELEASE(it->second); - } - m_staticShaders.clear(); - - SAFE_RELEASE(m_efbSampler); - SAFE_RELEASE(m_efbEncodeRastState); - SAFE_RELEASE(m_efbEncodeDepthState); - SAFE_RELEASE(m_efbEncodeBlendState); - SAFE_RELEASE(m_quadLayout); - SAFE_RELEASE(m_vShader); - SAFE_RELEASE(m_quad); - SAFE_RELEASE(m_encodeParams); - SAFE_RELEASE(m_outStage); - SAFE_RELEASE(m_outRTV); - SAFE_RELEASE(m_out); -} - -size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, - unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf) -{ - if (!m_ready) // Make sure we initialized OK - return 0; - - HRESULT hr; - - unsigned int blockW = BLOCK_WIDTHS[dstFormat]; - unsigned int blockH = BLOCK_HEIGHTS[dstFormat]; - - // Round up source dims to multiple of block size - unsigned int actualWidth = srcRect.GetWidth() / (scaleByHalf ? 2 : 1); - actualWidth = (actualWidth + blockW-1) & ~(blockW-1); - unsigned int actualHeight = srcRect.GetHeight() / (scaleByHalf ? 2 : 1); - actualHeight = (actualHeight + blockH-1) & ~(blockH-1); - - unsigned int numBlocksX = actualWidth/blockW; - unsigned int numBlocksY = actualHeight/blockH; - - unsigned int cacheLinesPerRow; - if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one - cacheLinesPerRow = numBlocksX*2; - else - cacheLinesPerRow = numBlocksX; - CHECK(cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check"); - - unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY; - CHECK(totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check"); - - size_t encodeSize = 0; - - // Reset API - - g_renderer->ResetAPIState(); - - // Set up all the state for EFB encoding - -#ifdef USE_DYNAMIC_MODE - if (SetDynamicShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) -#else - if (SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) -#endif - { - D3D::context->VSSetShader(m_vShader, NULL, 0); - - D3D::stateman->PushBlendState(m_efbEncodeBlendState); - D3D::stateman->PushDepthState(m_efbEncodeDepthState); - D3D::stateman->PushRasterizerState(m_efbEncodeRastState); - D3D::stateman->Apply(); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2), FLOAT(numBlocksY)); - D3D::context->RSSetViewports(1, &vp); - - D3D::context->IASetInputLayout(m_quadLayout); - D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - UINT stride = sizeof(QuadVertex); - UINT offset = 0; - D3D::context->IASetVertexBuffers(0, 1, &m_quad, &stride, &offset); - - EFBRectangle fullSrcRect; - fullSrcRect.left = 0; - fullSrcRect.top = 0; - fullSrcRect.right = EFB_WIDTH; - fullSrcRect.bottom = EFB_HEIGHT; - TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); - - EFBEncodeParams params = { 0 }; - params.NumHalfCacheLinesX = FLOAT(cacheLinesPerRow*2); - params.NumBlocksY = FLOAT(numBlocksY); - params.PosX = FLOAT(srcRect.left); - params.PosY = FLOAT(srcRect.top); - params.TexLeft = float(targetRect.left) / g_renderer->GetFullTargetWidth(); - params.TexTop = float(targetRect.top) / g_renderer->GetFullTargetHeight(); - params.TexRight = float(targetRect.right) / g_renderer->GetFullTargetWidth(); - params.TexBottom = float(targetRect.bottom) / g_renderer->GetFullTargetHeight(); - D3D::context->UpdateSubresource(m_encodeParams, 0, NULL, ¶ms, 0, 0); - - D3D::context->VSSetConstantBuffers(0, 1, &m_encodeParams); - - D3D::context->OMSetRenderTargets(1, &m_outRTV, NULL); - - ID3D11ShaderResourceView* pEFB = (srcFormat == PIXELFMT_Z24) ? - FramebufferManager::GetEFBDepthTexture()->GetSRV() : - FramebufferManager::GetEFBColorTexture()->GetSRV(); - - D3D::context->PSSetConstantBuffers(0, 1, &m_encodeParams); - D3D::context->PSSetShaderResources(0, 1, &pEFB); - D3D::context->PSSetSamplers(0, 1, &m_efbSampler); - - // Encode! - - D3D::context->Draw(4, 0); - - // Copy to staging buffer - - D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2, numBlocksY, 1); - D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); - - // Clean up state - - IUnknown* nullDummy = NULL; - - D3D::context->PSSetSamplers(0, 1, (ID3D11SamplerState**)&nullDummy); - D3D::context->PSSetShaderResources(0, 1, (ID3D11ShaderResourceView**)&nullDummy); - D3D::context->PSSetConstantBuffers(0, 1, (ID3D11Buffer**)&nullDummy); - - D3D::context->OMSetRenderTargets(0, NULL, NULL); - - D3D::context->VSSetConstantBuffers(0, 1, (ID3D11Buffer**)&nullDummy); - - D3D::stateman->PopRasterizerState(); - D3D::stateman->PopDepthState(); - D3D::stateman->PopBlendState(); - - D3D::context->PSSetShader(NULL, NULL, 0); - D3D::context->VSSetShader(NULL, NULL, 0); - - // Transfer staging buffer to GameCube/Wii RAM - - D3D11_MAPPED_SUBRESOURCE map = { 0 }; - hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); - CHECK(SUCCEEDED(hr), "map staging buffer"); - - u8* src = (u8*)map.pData; - for (unsigned int y = 0; y < numBlocksY; ++y) - { - memcpy(dst, src, cacheLinesPerRow*32); - dst += bpmem.copyMipMapStrideChannels*32; - src += map.RowPitch; - } - - D3D::context->Unmap(m_outStage, 0); - - encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY; - } - - // Restore API - - g_renderer->RestoreAPIState(); - D3D::context->OMSetRenderTargets(1, - &FramebufferManager::GetEFBColorTexture()->GetRTV(), - FramebufferManager::GetEFBDepthTexture()->GetDSV()); - - return encodeSize; -} - -bool PSTextureEncoder::InitStaticMode() -{ - // Nothing to really do. - return true; -} - -static const char* FETCH_FUNC_NAMES[4] = { - "Fetch_0", "Fetch_1", "Fetch_2", "Fetch_3" -}; - -static const char* SCALEDFETCH_FUNC_NAMES[2] = { - "ScaledFetch_0", "ScaledFetch_1" -}; - -static const char* INTENSITY_FUNC_NAMES[2] = { - "Intensity_0", "Intensity_1" -}; - -bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, unsigned int srcFormat, - bool isIntensity, bool scaleByHalf) -{ - size_t fetchNum = srcFormat; - size_t scaledFetchNum = scaleByHalf ? 1 : 0; - size_t intensityNum = isIntensity ? 1 : 0; - size_t generatorNum = dstFormat; - - ComboKey key = MakeComboKey(dstFormat, srcFormat, isIntensity, scaleByHalf); - - ComboMap::iterator it = m_staticShaders.find(key); - if (it == m_staticShaders.end()) - { - const char* generatorFuncName = NULL; - switch (generatorNum) - { - case 0x0: generatorFuncName = "Generate_0"; break; - case 0x1: generatorFuncName = "Generate_1"; break; - case 0x2: generatorFuncName = "Generate_2"; break; - case 0x3: generatorFuncName = "Generate_3"; break; - case 0x4: generatorFuncName = "Generate_4"; break; - case 0x5: generatorFuncName = "Generate_5"; break; - case 0x6: generatorFuncName = "Generate_6"; break; - case 0x7: generatorFuncName = "Generate_7"; break; - case 0x8: generatorFuncName = "Generate_8"; break; - case 0x9: generatorFuncName = "Generate_9"; break; - case 0xA: generatorFuncName = "Generate_A"; break; - case 0xB: generatorFuncName = "Generate_B"; break; - case 0xC: generatorFuncName = "Generate_C"; break; - default: - WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); - m_staticShaders[key] = NULL; - return false; - } - - INFO_LOG(VIDEO, "Compiling efb encoding shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d", - dstFormat, srcFormat, isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); - - // Shader permutation not found, so compile it - D3DBlob* bytecode = NULL; - D3D_SHADER_MACRO macros[] = { - { "IMP_FETCH", FETCH_FUNC_NAMES[fetchNum] }, - { "IMP_SCALEDFETCH", SCALEDFETCH_FUNC_NAMES[scaledFetchNum] }, - { "IMP_INTENSITY", INTENSITY_FUNC_NAMES[intensityNum] }, - { "IMP_GENERATOR", generatorFuncName }, - { NULL, NULL } - }; - if (!D3D::CompilePixelShader(EFB_ENCODE_PS, sizeof(EFB_ENCODE_PS), &bytecode, macros)) - { - WARN_LOG(VIDEO, "EFB encoder shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d failed to compile", - dstFormat, srcFormat, isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); - // Add dummy shader to map to prevent trying to compile over and - // over again - m_staticShaders[key] = NULL; - return false; - } - - ID3D11PixelShader* newShader; - HRESULT hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), NULL, &newShader); - CHECK(SUCCEEDED(hr), "create efb encoder pixel shader"); - - m_staticShaders[key] = newShader; - bytecode->Release(); - - it = m_staticShaders.find(key); - } - - if (it != m_staticShaders.end()) - { - if (it->second) - { - D3D::context->PSSetShader(it->second, NULL, 0); - return true; - } - else - return false; - } - else - return false; -} - -bool PSTextureEncoder::InitDynamicMode() -{ - HRESULT hr; - - D3D_SHADER_MACRO macros[] = { - { "DYNAMIC_MODE", NULL }, - { NULL, NULL } - }; - - D3DBlob* bytecode = NULL; - if (!D3D::CompilePixelShader(EFB_ENCODE_PS, sizeof(EFB_ENCODE_PS), &bytecode, macros)) - { - ERROR_LOG(VIDEO, "EFB encode pixel shader failed to compile"); - return false; - } - - hr = D3D::device->CreateClassLinkage(&m_classLinkage); - CHECK(SUCCEEDED(hr), "create efb encode class linkage"); - D3D::SetDebugObjectName(m_classLinkage, "efb encoder class linkage"); - - hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), m_classLinkage, &m_dynamicShader); - CHECK(SUCCEEDED(hr), "create efb encode pixel shader"); - D3D::SetDebugObjectName(m_dynamicShader, "efb encoder pixel shader"); - - // Use D3DReflect - - ID3D11ShaderReflection* reflect = NULL; - hr = PD3DReflect(bytecode->Data(), bytecode->Size(), IID_ID3D11ShaderReflection, (void**)&reflect); - CHECK(SUCCEEDED(hr), "reflect on efb encoder shader"); - - // Get number of slots and create dynamic linkage array - - UINT numSlots = reflect->GetNumInterfaceSlots(); - m_linkageArray.resize(numSlots, NULL); - - // Get interface slots - - ID3D11ShaderReflectionVariable* var = reflect->GetVariableByName("g_fetch"); - m_fetchSlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_scaledFetch"); - m_scaledFetchSlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_intensity"); - m_intensitySlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_generator"); - m_generatorSlot = var->GetInterfaceSlot(0); - - INFO_LOG(VIDEO, "fetch slot %d, scaledFetch slot %d, intensity slot %d, generator slot %d", - m_fetchSlot, m_scaledFetchSlot, m_intensitySlot, m_generatorSlot); - - // Class instances will be created at the time they are used - - for (size_t i = 0; i < 4; ++i) - m_fetchClass[i] = NULL; - for (size_t i = 0; i < 2; ++i) - m_scaledFetchClass[i] = NULL; - for (size_t i = 0; i < 2; ++i) - m_intensityClass[i] = NULL; - for (size_t i = 0; i < 16; ++i) - m_generatorClass[i] = NULL; - - reflect->Release(); - bytecode->Release(); - - return true; -} - -static const char* FETCH_CLASS_NAMES[4] = { - "cFetch_0", "cFetch_1", "cFetch_2", "cFetch_3" -}; - -static const char* SCALEDFETCH_CLASS_NAMES[2] = { - "cScaledFetch_0", "cScaledFetch_1" -}; - -static const char* INTENSITY_CLASS_NAMES[2] = { - "cIntensity_0", "cIntensity_1" -}; - -bool PSTextureEncoder::SetDynamicShader(unsigned int dstFormat, - unsigned int srcFormat, bool isIntensity, bool scaleByHalf) -{ - size_t fetchNum = srcFormat; - size_t scaledFetchNum = scaleByHalf ? 1 : 0; - size_t intensityNum = isIntensity ? 1 : 0; - size_t generatorNum = dstFormat; - - // FIXME: Not all the possible generators are available as classes yet. - // When dynamic mode is usable, implement them. - const char* generatorName = NULL; - switch (generatorNum) - { - case 0x4: generatorName = "cGenerator_4"; break; - case 0x5: generatorName = "cGenerator_5"; break; - case 0x6: generatorName = "cGenerator_6"; break; - case 0x8: generatorName = "cGenerator_8"; break; - case 0xB: generatorName = "cGenerator_B"; break; - default: - WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); - return false; - } - - // Make sure class instances are available - if (!m_fetchClass[fetchNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - FETCH_CLASS_NAMES[fetchNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - FETCH_CLASS_NAMES[fetchNum], 0, 0, 0, 0, &m_fetchClass[fetchNum]); - CHECK(SUCCEEDED(hr), "create fetch class instance"); - } - if (!m_scaledFetchClass[scaledFetchNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - SCALEDFETCH_CLASS_NAMES[scaledFetchNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - SCALEDFETCH_CLASS_NAMES[scaledFetchNum], 0, 0, 0, 0, - &m_scaledFetchClass[scaledFetchNum]); - CHECK(SUCCEEDED(hr), "create scaled fetch class instance"); - } - if (!m_intensityClass[intensityNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - INTENSITY_CLASS_NAMES[intensityNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - INTENSITY_CLASS_NAMES[intensityNum], 0, 0, 0, 0, - &m_intensityClass[intensityNum]); - CHECK(SUCCEEDED(hr), "create intensity class instance"); - } - if (!m_generatorClass[generatorNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - generatorName, dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - generatorName, 0, 0, 0, 0, &m_generatorClass[generatorNum]); - CHECK(SUCCEEDED(hr), "create generator class instance"); - } - - // Assemble dynamic linkage array - if (m_fetchSlot != UINT(-1)) - m_linkageArray[m_fetchSlot] = m_fetchClass[fetchNum]; - if (m_scaledFetchSlot != UINT(-1)) - m_linkageArray[m_scaledFetchSlot] = m_scaledFetchClass[scaledFetchNum]; - if (m_intensitySlot != UINT(-1)) - m_linkageArray[m_intensitySlot] = m_intensityClass[intensityNum]; - if (m_generatorSlot != UINT(-1)) - m_linkageArray[m_generatorSlot] = m_generatorClass[generatorNum]; - - D3D::context->PSSetShader(m_dynamicShader, - m_linkageArray.empty() ? NULL : &m_linkageArray[0], - (UINT)m_linkageArray.size()); - - return true; -} - -} +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "PSTextureEncoder.h" + +#include "D3DBase.h" +#include "D3DShader.h" +#include "GfxState.h" +#include "BPMemory.h" +#include "FramebufferManager.h" +#include "Render.h" +#include "HW/Memmap.h" +#include "TextureCache.h" + +// "Static mode" will compile a new EFB encoder shader for every combination of +// encoding configurations. It's compatible with Shader Model 4. + +// "Dynamic mode" will use the dynamic-linking feature of Shader Model 5. Only +// one shader needs to be compiled. + +// Unfortunately, the June 2010 DirectX SDK includes a broken HLSL compiler +// which cripples dynamic linking for us. +// See . +// Dynamic mode is disabled for now. To enable it, uncomment the line below. + +//#define USE_DYNAMIC_MODE + +// FIXME: When Microsoft fixes their HLSL compiler, make Dolphin enable dynamic +// mode on Shader Model 5-compatible cards. + +namespace DX11 +{ + +union EFBEncodeParams +{ + struct + { + FLOAT NumHalfCacheLinesX; + FLOAT NumBlocksY; + FLOAT PosX; + FLOAT PosY; + FLOAT TexLeft; + FLOAT TexTop; + FLOAT TexRight; + FLOAT TexBottom; + }; + // Constant buffers must be a multiple of 16 bytes in size. + u8 pad[32]; // Pad to the next multiple of 16 bytes +}; + +static const char EFB_ENCODE_VS[] = +"// dolphin-emu EFB encoder vertex shader\n" + +"cbuffer cbParams : register(b0)\n" +"{\n" + "struct\n" // Should match EFBEncodeParams above + "{\n" + "float NumHalfCacheLinesX;\n" + "float NumBlocksY;\n" + "float PosX;\n" // Upper-left corner of source + "float PosY;\n" + "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) + "float TexTop;\n" + "float TexRight;\n" + "float TexBottom;\n" + "} Params;\n" +"}\n" + +"struct Output\n" +"{\n" + "float4 Pos : SV_Position;\n" + "float2 Coord : ENCODECOORD;\n" +"};\n" + +"Output main(in float2 Pos : POSITION)\n" +"{\n" + "Output result;\n" + "result.Pos = float4(2*Pos.x-1, -2*Pos.y+1, 0.0, 1.0);\n" + "result.Coord = Pos * float2(Params.NumHalfCacheLinesX, Params.NumBlocksY);\n" + "return result;\n" +"}\n" +; + +static const char EFB_ENCODE_PS[] = +"// dolphin-emu EFB encoder pixel shader\n" + +// Input + +"cbuffer cbParams : register(b0)\n" +"{\n" + "struct\n" // Should match EFBEncodeParams above + "{\n" + "float NumHalfCacheLinesX;\n" + "float NumBlocksY;\n" + "float PosX;\n" // Upper-left corner of source + "float PosY;\n" + "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) + "float TexTop;\n" + "float TexRight;\n" + "float TexBottom;\n" + "} Params;\n" +"}\n" + +"Texture2D EFBTexture : register(t0);\n" +"sampler EFBSampler : register(s0);\n" + +// Constants + +"static const float2 INV_EFB_DIMS = float2(1.0/640.0, 1.0/528.0);\n" + +// FIXME: Is this correct? +"static const float3 INTENSITY_COEFFS = float3(0.257, 0.504, 0.098);\n" +"static const float INTENSITY_ADD = 16.0/255.0;\n" + +// Utility functions + +"uint4 Swap4_32(uint4 v) {\n" + "return (((v >> 24) & 0xFF) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | ((v << 24) & 0xFF000000));\n" +"}\n" + +"uint4 UINT4_8888_BE(uint4 a, uint4 b, uint4 c, uint4 d) {\n" + "return (d << 24) | (c << 16) | (b << 8) | a;\n" +"}\n" + +"uint UINT_44444444_BE(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n" + "return (g << 28) | (h << 24) | (e << 20) | (f << 16) | (c << 12) | (d << 8) | (a << 4) | b;\n" +"}\n" + +"uint UINT_1555(uint a, uint b, uint c, uint d) {\n" + "return (a << 15) | (b << 10) | (c << 5) | d;\n" +"}\n" + +"uint UINT_3444(uint a, uint b, uint c, uint d) {\n" + "return (a << 12) | (b << 8) | (c << 4) | d;\n" +"}\n" + +"uint UINT_565(uint a, uint b, uint c) {\n" + "return (a << 11) | (b << 5) | c;\n" +"}\n" + +"uint UINT_1616(uint a, uint b) {\n" + "return (a << 16) | b;\n" +"}\n" + +"uint EncodeRGB5A3(float4 pixel) {\n" + "if (pixel.a >= 224.0/255.0) {\n" + // Encode to ARGB1555 + "return UINT_1555(1, pixel.r*31, pixel.g*31, pixel.b*31);\n" + "} else {\n" + // Encode to ARGB3444 + "return UINT_3444(pixel.a*7, pixel.r*15, pixel.g*15, pixel.b*15);\n" + "}\n" +"}\n" + +"uint EncodeRGB565(float4 pixel) {\n" + "return UINT_565(pixel.r*31, pixel.g*63, pixel.b*31);\n" +"}\n" + +"float2 CalcTexCoord(float2 coord)\n" +"{\n" + // Add 0.5,0.5 to sample from the center of the EFB pixel + "float2 efbCoord = coord + float2(0.5,0.5);\n" + "return lerp(float2(Params.TexLeft,Params.TexTop), float2(Params.TexRight,Params.TexBottom), efbCoord * INV_EFB_DIMS);\n" +"}\n" + +// Interface and classes for different source formats + +"float4 Fetch_0(float2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + "float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n" + "result.a = 1.0;\n" + "return result;\n" +"}\n" + +"float4 Fetch_1(float2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + "return EFBTexture.Sample(EFBSampler, texCoord);\n" +"}\n" + +"float4 Fetch_2(float2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + "float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n" + "result.a = 1.0;\n" + "return result;\n" +"}\n" + +"float4 Fetch_3(float2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + + "uint depth24 = 0xFFFFFF * EFBTexture.Sample(EFBSampler, texCoord).r;\n" + "uint4 bytes = uint4(\n" + "(depth24 >> 16) & 0xFF,\n" // r + "(depth24 >> 8) & 0xFF,\n" // g + "depth24 & 0xFF,\n" // b + "255);\n" // a + "return bytes / 255.0;\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iFetch\n" +"{\n" + "float4 Fetch(float2 coord);\n" +"};\n" + +// Source format 0 +"class cFetch_0 : iFetch\n" +"{\n" + "float4 Fetch(float2 coord)\n" + "{ return Fetch_0(coord); }\n" +"};\n" + + +// Source format 1 +"class cFetch_1 : iFetch\n" +"{\n" + "float4 Fetch(float2 coord)\n" + "{ return Fetch_1(coord); }\n" +"};\n" + +// Source format 2 +"class cFetch_2 : iFetch\n" +"{\n" + "float4 Fetch(float2 coord)\n" + "{ return Fetch_2(coord); }\n" +"};\n" + +// Source format 3 +"class cFetch_3 : iFetch\n" +"{\n" + "float4 Fetch(float2 coord)\n" + "{ return Fetch_3(coord); }\n" +"};\n" + +// Declare fetch interface; must be set by application +"iFetch g_fetch;\n" +"#define IMP_FETCH g_fetch.Fetch\n" + +"#endif\n" // #ifdef DYNAMIC_MODE + +"#ifndef IMP_FETCH\n" +"#error No Fetch specified\n" +"#endif\n" + +// Interface and classes for different intensity settings (on or off) + +"float4 Intensity_0(float4 sample)\n" +"{\n" + "return sample;\n" +"}\n" + +"float4 Intensity_1(float4 sample)\n" +"{\n" + "sample.r = dot(INTENSITY_COEFFS, sample.rgb) + INTENSITY_ADD;\n" + // FIXME: Is this correct? What happens if you use one of the non-R + // formats with intensity on? + "sample = sample.rrrr;\n" + "return sample;\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iIntensity\n" +"{\n" + "float4 Intensity(float4 sample);\n" +"};\n" + +// Intensity off +"class cIntensity_0 : iIntensity\n" +"{\n" + "float4 Intensity(float4 sample)\n" + "{ return Intensity_0(sample); }\n" +"};\n" + +// Intensity on +"class cIntensity_1 : iIntensity\n" +"{\n" + "float4 Intensity(float4 sample)\n" + "{ return Intensity_1(sample); }\n" +"};\n" + +// Declare intensity interface; must be set by application +"iIntensity g_intensity;\n" +"#define IMP_INTENSITY g_intensity.Intensity\n" + +"#endif\n" // #ifdef DYNAMIC_MODE + +"#ifndef IMP_INTENSITY\n" +"#error No Intensity specified\n" +"#endif\n" + + +// Interface and classes for different scale/filter settings (on or off) + +"float4 ScaledFetch_0(float2 coord)\n" +"{\n" + "return IMP_FETCH(float2(Params.PosX,Params.PosY) + coord);\n" +"}\n" + +"float4 ScaledFetch_1(float2 coord)\n" +"{\n" + "float2 ul = float2(Params.PosX,Params.PosY) + 2*coord;\n" + "float4 sample0 = IMP_FETCH(ul+float2(0,0));\n" + "float4 sample1 = IMP_FETCH(ul+float2(1,0));\n" + "float4 sample2 = IMP_FETCH(ul+float2(0,1));\n" + "float4 sample3 = IMP_FETCH(ul+float2(1,1));\n" + // Average all four samples together + // FIXME: Is this correct? + "return 0.25 * (sample0+sample1+sample2+sample3);\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iScaledFetch\n" +"{\n" + "float4 ScaledFetch(float2 coord);\n" +"};\n" + +// Scale off +"class cScaledFetch_0 : iScaledFetch\n" +"{\n" + "float4 ScaledFetch(float2 coord)\n" + "{ return ScaledFetch_0(coord); }\n" +"};\n" + +// Scale on +"class cScaledFetch_1 : iScaledFetch\n" +"{\n" + "float4 ScaledFetch(float2 coord)\n" + "{ return ScaledFetch_1(coord); }\n" +"};\n" + +// Declare scaled fetch interface; must be set by application code +"iScaledFetch g_scaledFetch;\n" +"#define IMP_SCALEDFETCH g_scaledFetch.ScaledFetch\n" + +"#endif\n" // #ifdef DYNAMIC_MODE + +"#ifndef IMP_SCALEDFETCH\n" +"#error No ScaledFetch specified\n" +"#endif\n" + +// Main EFB-sampling function: performs all steps of fetching pixels, scaling, +// applying intensity function + +"float4 SampleEFB(float2 coord)\n" +"{\n" + // FIXME: Does intensity happen before or after scaling? Or does + // it matter? + "float4 sample = IMP_SCALEDFETCH(coord);\n" + "return IMP_INTENSITY(sample);\n" +"}\n" + +// Interfaces and classes for different destination formats + +"uint4 Generate_0(float2 cacheCoord)\n" // R4 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,8);\n" + "float2 subBlockUL = blockUL + float2(0, 4*(cacheCoord.x%2));\n" + + "float4 sample[32];\n" + "for (uint y = 0; y < 4; ++y) {\n" + "for (uint x = 0; x < 8; ++x) {\n" + "sample[y*8+x] = SampleEFB(subBlockUL+float2(x,y));\n" + "}\n" + "}\n" + + "uint dw[4];\n" + "for (uint i = 0; i < 4; ++i) {\n" + "dw[i] = UINT_44444444_BE(\n" + "15*sample[8*i+0].r,\n" + "15*sample[8*i+1].r,\n" + "15*sample[8*i+2].r,\n" + "15*sample[8*i+3].r,\n" + "15*sample[8*i+4].r,\n" + "15*sample[8*i+5].r,\n" + "15*sample[8*i+6].r,\n" + "15*sample[8*i+7].r\n" + ");\n" + "}\n" + + "return uint4(dw[0], dw[1], dw[2], dw[3]);\n" +"}\n" + +// FIXME: Untested +"uint4 Generate_1(float2 cacheCoord)\n" // R8 (FIXME: Duplicate of R8 below?) +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" + "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" + "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" + "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" + "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" + "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.r, sample4.r, sample8.r, sampleC.r),\n" + "255*float4(sample1.r, sample5.r, sample9.r, sampleD.r),\n" + "255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n" + "255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n" + ");\n" + + "return dw4;\n" +"}\n" + +// FIXME: Untested +"uint4 Generate_2(float2 cacheCoord)\n" // A4 R4 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" + "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" + "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" + "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" + "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" + "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" + + "uint dw0 = UINT_44444444_BE(\n" + "15*sample0.a, 15*sample0.r,\n" + "15*sample1.a, 15*sample1.r,\n" + "15*sample2.a, 15*sample2.r,\n" + "15*sample3.a, 15*sample3.r\n" + ");\n" + "uint dw1 = UINT_44444444_BE(\n" + "15*sample4.a, 15*sample4.r,\n" + "15*sample5.a, 15*sample5.r,\n" + "15*sample6.a, 15*sample6.r,\n" + "15*sample7.a, 15*sample7.r\n" + ");\n" + "uint dw2 = UINT_44444444_BE(\n" + "15*sample8.a, 15*sample8.r,\n" + "15*sample9.a, 15*sample9.r,\n" + "15*sampleA.a, 15*sampleA.r,\n" + "15*sampleB.a, 15*sampleB.r\n" + ");\n" + "uint dw3 = UINT_44444444_BE(\n" + "15*sampleC.a, 15*sampleC.r,\n" + "15*sampleD.a, 15*sampleD.r,\n" + "15*sampleE.a, 15*sampleE.r,\n" + "15*sampleF.a, 15*sampleF.r\n" + ");\n" + + "return uint4(dw0, dw1, dw2, dw3);\n" +"}\n" + +// FIXME: Untested +"uint4 Generate_3(float2 cacheCoord)\n" // A8 R8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(4,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.a, sample2.a, sample4.a, sample6.a),\n" + "255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n" + "255*float4(sample1.a, sample3.a, sample5.a, sample7.a),\n" + "255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n" + ");\n" + + "return dw4;\n" +"}\n" + +"uint4 Generate_4(float2 cacheCoord)\n" // R5 G6 B5 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(4,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" + + "uint dw0 = UINT_1616(EncodeRGB565(sample0), EncodeRGB565(sample1));\n" + "uint dw1 = UINT_1616(EncodeRGB565(sample2), EncodeRGB565(sample3));\n" + "uint dw2 = UINT_1616(EncodeRGB565(sample4), EncodeRGB565(sample5));\n" + "uint dw3 = UINT_1616(EncodeRGB565(sample6), EncodeRGB565(sample7));\n" + + "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" +"}\n" + +"uint4 Generate_5(float2 cacheCoord)\n" // 1 R5 G5 B5 or 0 A3 R4 G4 G4 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(4,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" + + "uint dw0 = UINT_1616(EncodeRGB5A3(sample0), EncodeRGB5A3(sample1));\n" + "uint dw1 = UINT_1616(EncodeRGB5A3(sample2), EncodeRGB5A3(sample3));\n" + "uint dw2 = UINT_1616(EncodeRGB5A3(sample4), EncodeRGB5A3(sample5));\n" + "uint dw3 = UINT_1616(EncodeRGB5A3(sample6), EncodeRGB5A3(sample7));\n" + + "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" +"}\n" + +"uint4 Generate_6(float2 cacheCoord)\n" // A8 R8 A8 R8 | G8 B8 G8 B8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(4,1));\n" + + "float2 blockUL = blockCoord * float2(4,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" + + "uint4 dw4;\n" + "if (cacheCoord.x % 4 < 2)\n" + "{\n" + // First cache line gets AR + "dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.a, sample2.a, sample4.a, sample6.a),\n" + "255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n" + "255*float4(sample1.a, sample3.a, sample5.a, sample7.a),\n" + "255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n" + ");\n" + "}\n" + "else\n" + "{\n" + // Second cache line gets GB + "dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n" + "255*float4(sample0.b, sample2.b, sample4.b, sample6.b),\n" + "255*float4(sample1.g, sample3.g, sample5.g, sample7.g),\n" + "255*float4(sample1.b, sample3.b, sample5.b, sample7.b)\n" + ");\n" + "}\n" + + "return dw4;\n" +"}\n" + +"uint4 Generate_7(float2 cacheCoord)\n" // A8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" + "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" + "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" + "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" + "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" + "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.a, sample4.a, sample8.a, sampleC.a),\n" + "255*float4(sample1.a, sample5.a, sample9.a, sampleD.a),\n" + "255*float4(sample2.a, sample6.a, sampleA.a, sampleE.a),\n" + "255*float4(sample3.a, sample7.a, sampleB.a, sampleF.a)\n" + ");\n" + + "return dw4;\n" +"}\n" + +"uint4 Generate_8(float2 cacheCoord)\n" // R8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" + "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" + "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" + "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" + "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" + "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.r, sample4.r, sample8.r, sampleC.r),\n" + "255*float4(sample1.r, sample5.r, sample9.r, sampleD.r),\n" + "255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n" + "255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n" + ");\n" + + "return dw4;\n" +"}\n" + +// FIXME: Untested +"uint4 Generate_9(float2 cacheCoord)\n" // G8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" + "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" + "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" + "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" + "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" + "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.g, sample4.g, sample8.g, sampleC.g),\n" + "255*float4(sample1.g, sample5.g, sample9.g, sampleD.g),\n" + "255*float4(sample2.g, sample6.g, sampleA.g, sampleE.g),\n" + "255*float4(sample3.g, sample7.g, sampleB.g, sampleF.g)\n" + ");\n" + + "return dw4;\n" +"}\n" + +"uint4 Generate_A(float2 cacheCoord)\n" // B8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(8,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" + "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" + "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" + "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" + "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" + "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.b, sample4.b, sample8.b, sampleC.b),\n" + "255*float4(sample1.b, sample5.b, sample9.b, sampleD.b),\n" + "255*float4(sample2.b, sample6.b, sampleA.b, sampleE.b),\n" + "255*float4(sample3.b, sample7.b, sampleB.b, sampleF.b)\n" + ");\n" + + "return dw4;\n" +"}\n" + +"uint4 Generate_B(float2 cacheCoord)\n" // G8 R8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(4,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n" + "255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n" + "255*float4(sample1.g, sample3.g, sample5.g, sample7.g),\n" + "255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n" + ");\n" + + "return dw4;\n" +"}\n" + +// FIXME: Untested +"uint4 Generate_C(float2 cacheCoord)\n" // B8 G8 +"{\n" + "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" + + "float2 blockUL = blockCoord * float2(4,4);\n" + "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" + + "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" + "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" + "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" + "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" + "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" + "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" + "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" + "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" + + "uint4 dw4 = UINT4_8888_BE(\n" + "255*float4(sample0.b, sample2.b, sample4.b, sample6.b),\n" + "255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n" + "255*float4(sample1.b, sample3.b, sample5.b, sample7.b),\n" + "255*float4(sample1.g, sample3.g, sample5.g, sample7.g)\n" + ");\n" + + "return dw4;\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iGenerator\n" +"{\n" + "uint4 Generate(float2 cacheCoord);\n" +"};\n" + +"class cGenerator_4 : iGenerator\n" +"{\n" + "uint4 Generate(float2 cacheCoord)\n" + "{ return Generate_4(cacheCoord); }\n" +"};\n" + +"class cGenerator_5 : iGenerator\n" +"{\n" + "uint4 Generate(float2 cacheCoord)\n" + "{ return Generate_5(cacheCoord); }\n" +"};\n" + +"class cGenerator_6 : iGenerator\n" +"{\n" + "uint4 Generate(float2 cacheCoord)\n" + "{ return Generate_6(cacheCoord); }\n" +"};\n" + +"class cGenerator_8 : iGenerator\n" +"{\n" + "uint4 Generate(float2 cacheCoord)\n" + "{ return Generate_8(cacheCoord); }\n" +"};\n" + +"class cGenerator_B : iGenerator\n" +"{\n" + "uint4 Generate(float2 cacheCoord)\n" + "{ return Generate_B(cacheCoord); }\n" +"};\n" + +// Declare generator interface; must be set by application +"iGenerator g_generator;\n" +"#define IMP_GENERATOR g_generator.Generate\n" + +"#endif\n" + +"#ifndef IMP_GENERATOR\n" +"#error No generator specified\n" +"#endif\n" + +"void main(out uint4 ocol0 : SV_Target, in float4 Pos : SV_Position, in float2 fCacheCoord : ENCODECOORD)\n" +"{\n" + "float2 cacheCoord = floor(fCacheCoord);\n" + "ocol0 = IMP_GENERATOR(cacheCoord);\n" +"}\n" +; + +PSTextureEncoder::PSTextureEncoder() + : m_ready(false), m_out(NULL), m_outRTV(NULL), m_outStage(NULL), + m_encodeParams(NULL), + m_quad(NULL), m_vShader(NULL), m_quadLayout(NULL), + m_efbEncodeBlendState(NULL), m_efbEncodeDepthState(NULL), + m_efbEncodeRastState(NULL), m_efbSampler(NULL), + m_dynamicShader(NULL), m_classLinkage(NULL) +{ + for (size_t i = 0; i < 4; ++i) + m_fetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_scaledFetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_intensityClass[i] = NULL; + for (size_t i = 0; i < 16; ++i) + m_generatorClass[i] = NULL; +} + +static const D3D11_INPUT_ELEMENT_DESC QUAD_LAYOUT_DESC[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 } +}; + +static const struct QuadVertex +{ + float posX; + float posY; +} QUAD_VERTS[4] = { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } }; + +void PSTextureEncoder::Init() +{ + m_ready = false; + + HRESULT hr; + + // Create output texture RGBA format + + // This format allows us to generate one cache line in two pixels. + D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC( + DXGI_FORMAT_R32G32B32A32_UINT, + EFB_WIDTH, EFB_HEIGHT/4, 1, 1, D3D11_BIND_RENDER_TARGET); + hr = D3D::device->CreateTexture2D(&t2dd, NULL, &m_out); + CHECK(SUCCEEDED(hr), "create efb encode output texture"); + D3D::SetDebugObjectName(m_out, "efb encoder output texture"); + + // Create output render target view + + D3D11_RENDER_TARGET_VIEW_DESC rtvd = CD3D11_RENDER_TARGET_VIEW_DESC(m_out, + D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R32G32B32A32_UINT); + hr = D3D::device->CreateRenderTargetView(m_out, &rtvd, &m_outRTV); + CHECK(SUCCEEDED(hr), "create efb encode output render target view"); + D3D::SetDebugObjectName(m_outRTV, "efb encoder output rtv"); + + // Create output staging buffer + + t2dd.Usage = D3D11_USAGE_STAGING; + t2dd.BindFlags = 0; + t2dd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + hr = D3D::device->CreateTexture2D(&t2dd, NULL, &m_outStage); + CHECK(SUCCEEDED(hr), "create efb encode output staging buffer"); + D3D::SetDebugObjectName(m_outStage, "efb encoder output staging buffer"); + + // Create constant buffer for uploading data to shaders + + D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), + D3D11_BIND_CONSTANT_BUFFER); + hr = D3D::device->CreateBuffer(&bd, NULL, &m_encodeParams); + CHECK(SUCCEEDED(hr), "create efb encode params buffer"); + D3D::SetDebugObjectName(m_encodeParams, "efb encoder params buffer"); + + // Create vertex quad + + bd = CD3D11_BUFFER_DESC(sizeof(QUAD_VERTS), D3D11_BIND_VERTEX_BUFFER, + D3D11_USAGE_IMMUTABLE); + D3D11_SUBRESOURCE_DATA srd = { QUAD_VERTS, 0, 0 }; + + hr = D3D::device->CreateBuffer(&bd, &srd, &m_quad); + CHECK(SUCCEEDED(hr), "create efb encode quad vertex buffer"); + D3D::SetDebugObjectName(m_quad, "efb encoder quad vertex buffer"); + + // Create vertex shader + + D3DBlob* bytecode = NULL; + if (!D3D::CompileVertexShader(EFB_ENCODE_VS, sizeof(EFB_ENCODE_VS), &bytecode)) + { + ERROR_LOG(VIDEO, "EFB encode vertex shader failed to compile"); + return; + } + + hr = D3D::device->CreateVertexShader(bytecode->Data(), bytecode->Size(), NULL, &m_vShader); + CHECK(SUCCEEDED(hr), "create efb encode vertex shader"); + D3D::SetDebugObjectName(m_vShader, "efb encoder vertex shader"); + + // Create input layout for vertex quad using bytecode from vertex shader + + hr = D3D::device->CreateInputLayout(QUAD_LAYOUT_DESC, + sizeof(QUAD_LAYOUT_DESC)/sizeof(D3D11_INPUT_ELEMENT_DESC), + bytecode->Data(), bytecode->Size(), &m_quadLayout); + CHECK(SUCCEEDED(hr), "create efb encode quad vertex layout"); + D3D::SetDebugObjectName(m_quadLayout, "efb encoder quad layout"); + + bytecode->Release(); + + // Create pixel shader + +#ifdef USE_DYNAMIC_MODE + if (!InitDynamicMode()) +#else + if (!InitStaticMode()) +#endif + return; + + // Create blend state + + D3D11_BLEND_DESC bld = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); + hr = D3D::device->CreateBlendState(&bld, &m_efbEncodeBlendState); + CHECK(SUCCEEDED(hr), "create efb encode blend state"); + D3D::SetDebugObjectName(m_efbEncodeBlendState, "efb encoder blend state"); + + // Create depth state + + D3D11_DEPTH_STENCIL_DESC dsd = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); + dsd.DepthEnable = FALSE; + hr = D3D::device->CreateDepthStencilState(&dsd, &m_efbEncodeDepthState); + CHECK(SUCCEEDED(hr), "create efb encode depth state"); + D3D::SetDebugObjectName(m_efbEncodeDepthState, "efb encoder depth state"); + + // Create rasterizer state + + D3D11_RASTERIZER_DESC rd = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); + rd.CullMode = D3D11_CULL_NONE; + rd.DepthClipEnable = FALSE; + hr = D3D::device->CreateRasterizerState(&rd, &m_efbEncodeRastState); + CHECK(SUCCEEDED(hr), "create efb encode rast state"); + D3D::SetDebugObjectName(m_efbEncodeRastState, "efb encoder rast state"); + + // Create efb texture sampler + + D3D11_SAMPLER_DESC sd = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); + sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + hr = D3D::device->CreateSamplerState(&sd, &m_efbSampler); + CHECK(SUCCEEDED(hr), "create efb encode texture sampler"); + D3D::SetDebugObjectName(m_efbSampler, "efb encoder texture sampler"); + + m_ready = true; +} + +void PSTextureEncoder::Shutdown() +{ + m_ready = false; + + for (size_t i = 0; i < 4; ++i) + SAFE_RELEASE(m_fetchClass[i]); + for (size_t i = 0; i < 2; ++i) + SAFE_RELEASE(m_scaledFetchClass[i]); + for (size_t i = 0; i < 2; ++i) + SAFE_RELEASE(m_intensityClass[i]); + for (size_t i = 0; i < 16; ++i) + SAFE_RELEASE(m_generatorClass[i]); + m_linkageArray.clear(); + + SAFE_RELEASE(m_classLinkage); + SAFE_RELEASE(m_dynamicShader); + + for (ComboMap::iterator it = m_staticShaders.begin(); + it != m_staticShaders.end(); ++it) + { + SAFE_RELEASE(it->second); + } + m_staticShaders.clear(); + + SAFE_RELEASE(m_efbSampler); + SAFE_RELEASE(m_efbEncodeRastState); + SAFE_RELEASE(m_efbEncodeDepthState); + SAFE_RELEASE(m_efbEncodeBlendState); + SAFE_RELEASE(m_quadLayout); + SAFE_RELEASE(m_vShader); + SAFE_RELEASE(m_quad); + SAFE_RELEASE(m_encodeParams); + SAFE_RELEASE(m_outStage); + SAFE_RELEASE(m_outRTV); + SAFE_RELEASE(m_out); +} + +size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, + bool scaleByHalf) +{ + if (!m_ready) // Make sure we initialized OK + return 0; + + HRESULT hr; + + unsigned int blockW = BLOCK_WIDTHS[dstFormat]; + unsigned int blockH = BLOCK_HEIGHTS[dstFormat]; + + // Round up source dims to multiple of block size + unsigned int actualWidth = srcRect.GetWidth() / (scaleByHalf ? 2 : 1); + actualWidth = (actualWidth + blockW-1) & ~(blockW-1); + unsigned int actualHeight = srcRect.GetHeight() / (scaleByHalf ? 2 : 1); + actualHeight = (actualHeight + blockH-1) & ~(blockH-1); + + unsigned int numBlocksX = actualWidth/blockW; + unsigned int numBlocksY = actualHeight/blockH; + + unsigned int cacheLinesPerRow; + if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one + cacheLinesPerRow = numBlocksX*2; + else + cacheLinesPerRow = numBlocksX; + CHECK(cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check"); + + unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY; + CHECK(totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check"); + + size_t encodeSize = 0; + + // Reset API + + g_renderer->ResetAPIState(); + + // Set up all the state for EFB encoding + +#ifdef USE_DYNAMIC_MODE + if (SetDynamicShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) +#else + if (SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) +#endif + { + D3D::context->VSSetShader(m_vShader, NULL, 0); + + D3D::stateman->PushBlendState(m_efbEncodeBlendState); + D3D::stateman->PushDepthState(m_efbEncodeDepthState); + D3D::stateman->PushRasterizerState(m_efbEncodeRastState); + D3D::stateman->Apply(); + + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2), FLOAT(numBlocksY)); + D3D::context->RSSetViewports(1, &vp); + + D3D::context->IASetInputLayout(m_quadLayout); + D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + UINT stride = sizeof(QuadVertex); + UINT offset = 0; + D3D::context->IASetVertexBuffers(0, 1, &m_quad, &stride, &offset); + + EFBRectangle fullSrcRect; + fullSrcRect.left = 0; + fullSrcRect.top = 0; + fullSrcRect.right = EFB_WIDTH; + fullSrcRect.bottom = EFB_HEIGHT; + TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); + + EFBEncodeParams params = { 0 }; + params.NumHalfCacheLinesX = FLOAT(cacheLinesPerRow*2); + params.NumBlocksY = FLOAT(numBlocksY); + params.PosX = FLOAT(srcRect.left); + params.PosY = FLOAT(srcRect.top); + params.TexLeft = float(targetRect.left) / g_renderer->GetFullTargetWidth(); + params.TexTop = float(targetRect.top) / g_renderer->GetFullTargetHeight(); + params.TexRight = float(targetRect.right) / g_renderer->GetFullTargetWidth(); + params.TexBottom = float(targetRect.bottom) / g_renderer->GetFullTargetHeight(); + D3D::context->UpdateSubresource(m_encodeParams, 0, NULL, ¶ms, 0, 0); + + D3D::context->VSSetConstantBuffers(0, 1, &m_encodeParams); + + D3D::context->OMSetRenderTargets(1, &m_outRTV, NULL); + + ID3D11ShaderResourceView* pEFB = (srcFormat == PIXELFMT_Z24) ? + FramebufferManager::GetEFBDepthTexture()->GetSRV() : + FramebufferManager::GetEFBColorTexture()->GetSRV(); + + D3D::context->PSSetConstantBuffers(0, 1, &m_encodeParams); + D3D::context->PSSetShaderResources(0, 1, &pEFB); + D3D::context->PSSetSamplers(0, 1, &m_efbSampler); + + // Encode! + + D3D::context->Draw(4, 0); + + // Copy to staging buffer + + D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2, numBlocksY, 1); + D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); + + // Clean up state + + IUnknown* nullDummy = NULL; + + D3D::context->PSSetSamplers(0, 1, (ID3D11SamplerState**)&nullDummy); + D3D::context->PSSetShaderResources(0, 1, (ID3D11ShaderResourceView**)&nullDummy); + D3D::context->PSSetConstantBuffers(0, 1, (ID3D11Buffer**)&nullDummy); + + D3D::context->OMSetRenderTargets(0, NULL, NULL); + + D3D::context->VSSetConstantBuffers(0, 1, (ID3D11Buffer**)&nullDummy); + + D3D::stateman->PopRasterizerState(); + D3D::stateman->PopDepthState(); + D3D::stateman->PopBlendState(); + + D3D::context->PSSetShader(NULL, NULL, 0); + D3D::context->VSSetShader(NULL, NULL, 0); + + // Transfer staging buffer to GameCube/Wii RAM + + D3D11_MAPPED_SUBRESOURCE map = { 0 }; + hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); + CHECK(SUCCEEDED(hr), "map staging buffer"); + + u8* src = (u8*)map.pData; + for (unsigned int y = 0; y < numBlocksY; ++y) + { + memcpy(dst, src, cacheLinesPerRow*32); + dst += bpmem.copyMipMapStrideChannels*32; + src += map.RowPitch; + } + + D3D::context->Unmap(m_outStage, 0); + + encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY; + } + + // Restore API + + g_renderer->RestoreAPIState(); + D3D::context->OMSetRenderTargets(1, + &FramebufferManager::GetEFBColorTexture()->GetRTV(), + FramebufferManager::GetEFBDepthTexture()->GetDSV()); + + return encodeSize; +} + +bool PSTextureEncoder::InitStaticMode() +{ + // Nothing to really do. + return true; +} + +static const char* FETCH_FUNC_NAMES[4] = { + "Fetch_0", "Fetch_1", "Fetch_2", "Fetch_3" +}; + +static const char* SCALEDFETCH_FUNC_NAMES[2] = { + "ScaledFetch_0", "ScaledFetch_1" +}; + +static const char* INTENSITY_FUNC_NAMES[2] = { + "Intensity_0", "Intensity_1" +}; + +bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf) +{ + size_t fetchNum = srcFormat; + size_t scaledFetchNum = scaleByHalf ? 1 : 0; + size_t intensityNum = isIntensity ? 1 : 0; + size_t generatorNum = dstFormat; + + ComboKey key = MakeComboKey(dstFormat, srcFormat, isIntensity, scaleByHalf); + + ComboMap::iterator it = m_staticShaders.find(key); + if (it == m_staticShaders.end()) + { + const char* generatorFuncName = NULL; + switch (generatorNum) + { + case 0x0: generatorFuncName = "Generate_0"; break; + case 0x1: generatorFuncName = "Generate_1"; break; + case 0x2: generatorFuncName = "Generate_2"; break; + case 0x3: generatorFuncName = "Generate_3"; break; + case 0x4: generatorFuncName = "Generate_4"; break; + case 0x5: generatorFuncName = "Generate_5"; break; + case 0x6: generatorFuncName = "Generate_6"; break; + case 0x7: generatorFuncName = "Generate_7"; break; + case 0x8: generatorFuncName = "Generate_8"; break; + case 0x9: generatorFuncName = "Generate_9"; break; + case 0xA: generatorFuncName = "Generate_A"; break; + case 0xB: generatorFuncName = "Generate_B"; break; + case 0xC: generatorFuncName = "Generate_C"; break; + default: + WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); + m_staticShaders[key] = NULL; + return false; + } + + INFO_LOG(VIDEO, "Compiling efb encoding shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d", + dstFormat, srcFormat, isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); + + // Shader permutation not found, so compile it + D3DBlob* bytecode = NULL; + D3D_SHADER_MACRO macros[] = { + { "IMP_FETCH", FETCH_FUNC_NAMES[fetchNum] }, + { "IMP_SCALEDFETCH", SCALEDFETCH_FUNC_NAMES[scaledFetchNum] }, + { "IMP_INTENSITY", INTENSITY_FUNC_NAMES[intensityNum] }, + { "IMP_GENERATOR", generatorFuncName }, + { NULL, NULL } + }; + if (!D3D::CompilePixelShader(EFB_ENCODE_PS, sizeof(EFB_ENCODE_PS), &bytecode, macros)) + { + WARN_LOG(VIDEO, "EFB encoder shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d failed to compile", + dstFormat, srcFormat, isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); + // Add dummy shader to map to prevent trying to compile over and + // over again + m_staticShaders[key] = NULL; + return false; + } + + ID3D11PixelShader* newShader; + HRESULT hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), NULL, &newShader); + CHECK(SUCCEEDED(hr), "create efb encoder pixel shader"); + + m_staticShaders[key] = newShader; + bytecode->Release(); + + it = m_staticShaders.find(key); + } + + if (it != m_staticShaders.end()) + { + if (it->second) + { + D3D::context->PSSetShader(it->second, NULL, 0); + return true; + } + else + return false; + } + else + return false; +} + +bool PSTextureEncoder::InitDynamicMode() +{ + HRESULT hr; + + D3D_SHADER_MACRO macros[] = { + { "DYNAMIC_MODE", NULL }, + { NULL, NULL } + }; + + D3DBlob* bytecode = NULL; + if (!D3D::CompilePixelShader(EFB_ENCODE_PS, sizeof(EFB_ENCODE_PS), &bytecode, macros)) + { + ERROR_LOG(VIDEO, "EFB encode pixel shader failed to compile"); + return false; + } + + hr = D3D::device->CreateClassLinkage(&m_classLinkage); + CHECK(SUCCEEDED(hr), "create efb encode class linkage"); + D3D::SetDebugObjectName(m_classLinkage, "efb encoder class linkage"); + + hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), m_classLinkage, &m_dynamicShader); + CHECK(SUCCEEDED(hr), "create efb encode pixel shader"); + D3D::SetDebugObjectName(m_dynamicShader, "efb encoder pixel shader"); + + // Use D3DReflect + + ID3D11ShaderReflection* reflect = NULL; + hr = PD3DReflect(bytecode->Data(), bytecode->Size(), IID_ID3D11ShaderReflection, (void**)&reflect); + CHECK(SUCCEEDED(hr), "reflect on efb encoder shader"); + + // Get number of slots and create dynamic linkage array + + UINT numSlots = reflect->GetNumInterfaceSlots(); + m_linkageArray.resize(numSlots, NULL); + + // Get interface slots + + ID3D11ShaderReflectionVariable* var = reflect->GetVariableByName("g_fetch"); + m_fetchSlot = var->GetInterfaceSlot(0); + + var = reflect->GetVariableByName("g_scaledFetch"); + m_scaledFetchSlot = var->GetInterfaceSlot(0); + + var = reflect->GetVariableByName("g_intensity"); + m_intensitySlot = var->GetInterfaceSlot(0); + + var = reflect->GetVariableByName("g_generator"); + m_generatorSlot = var->GetInterfaceSlot(0); + + INFO_LOG(VIDEO, "fetch slot %d, scaledFetch slot %d, intensity slot %d, generator slot %d", + m_fetchSlot, m_scaledFetchSlot, m_intensitySlot, m_generatorSlot); + + // Class instances will be created at the time they are used + + for (size_t i = 0; i < 4; ++i) + m_fetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_scaledFetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_intensityClass[i] = NULL; + for (size_t i = 0; i < 16; ++i) + m_generatorClass[i] = NULL; + + reflect->Release(); + bytecode->Release(); + + return true; +} + +static const char* FETCH_CLASS_NAMES[4] = { + "cFetch_0", "cFetch_1", "cFetch_2", "cFetch_3" +}; + +static const char* SCALEDFETCH_CLASS_NAMES[2] = { + "cScaledFetch_0", "cScaledFetch_1" +}; + +static const char* INTENSITY_CLASS_NAMES[2] = { + "cIntensity_0", "cIntensity_1" +}; + +bool PSTextureEncoder::SetDynamicShader(unsigned int dstFormat, + unsigned int srcFormat, bool isIntensity, bool scaleByHalf) +{ + size_t fetchNum = srcFormat; + size_t scaledFetchNum = scaleByHalf ? 1 : 0; + size_t intensityNum = isIntensity ? 1 : 0; + size_t generatorNum = dstFormat; + + // FIXME: Not all the possible generators are available as classes yet. + // When dynamic mode is usable, implement them. + const char* generatorName = NULL; + switch (generatorNum) + { + case 0x4: generatorName = "cGenerator_4"; break; + case 0x5: generatorName = "cGenerator_5"; break; + case 0x6: generatorName = "cGenerator_6"; break; + case 0x8: generatorName = "cGenerator_8"; break; + case 0xB: generatorName = "cGenerator_B"; break; + default: + WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); + return false; + } + + // Make sure class instances are available + if (!m_fetchClass[fetchNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + FETCH_CLASS_NAMES[fetchNum], dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + FETCH_CLASS_NAMES[fetchNum], 0, 0, 0, 0, &m_fetchClass[fetchNum]); + CHECK(SUCCEEDED(hr), "create fetch class instance"); + } + if (!m_scaledFetchClass[scaledFetchNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + SCALEDFETCH_CLASS_NAMES[scaledFetchNum], dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + SCALEDFETCH_CLASS_NAMES[scaledFetchNum], 0, 0, 0, 0, + &m_scaledFetchClass[scaledFetchNum]); + CHECK(SUCCEEDED(hr), "create scaled fetch class instance"); + } + if (!m_intensityClass[intensityNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + INTENSITY_CLASS_NAMES[intensityNum], dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + INTENSITY_CLASS_NAMES[intensityNum], 0, 0, 0, 0, + &m_intensityClass[intensityNum]); + CHECK(SUCCEEDED(hr), "create intensity class instance"); + } + if (!m_generatorClass[generatorNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + generatorName, dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + generatorName, 0, 0, 0, 0, &m_generatorClass[generatorNum]); + CHECK(SUCCEEDED(hr), "create generator class instance"); + } + + // Assemble dynamic linkage array + if (m_fetchSlot != UINT(-1)) + m_linkageArray[m_fetchSlot] = m_fetchClass[fetchNum]; + if (m_scaledFetchSlot != UINT(-1)) + m_linkageArray[m_scaledFetchSlot] = m_scaledFetchClass[scaledFetchNum]; + if (m_intensitySlot != UINT(-1)) + m_linkageArray[m_intensitySlot] = m_intensityClass[intensityNum]; + if (m_generatorSlot != UINT(-1)) + m_linkageArray[m_generatorSlot] = m_generatorClass[generatorNum]; + + D3D::context->PSSetShader(m_dynamicShader, + m_linkageArray.empty() ? NULL : &m_linkageArray[0], + (UINT)m_linkageArray.size()); + + return true; +} + +} diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h index 4fdbfb4ba5..490cd69ef5 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h @@ -1,119 +1,119 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#ifndef _PSTEXTUREENCODER_H -#define _PSTEXTUREENCODER_H - -#include "TextureEncoder.h" - -struct ID3D11Texture2D; -struct ID3D11RenderTargetView; -struct ID3D11Buffer; -struct ID3D11InputLayout; -struct ID3D11VertexShader; -struct ID3D11PixelShader; -struct ID3D11ClassLinkage; -struct ID3D11ClassInstance; -struct ID3D11BlendState; -struct ID3D11DepthStencilState; -struct ID3D11RasterizerState; -struct ID3D11SamplerState; - -namespace DX11 -{ - -class PSTextureEncoder : public TextureEncoder -{ - -public: - - PSTextureEncoder(); - - void Init(); - void Shutdown(); - size_t Encode(u8* dst, unsigned int dstFormat, - unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf); - -private: - - bool m_ready; - - ID3D11Texture2D* m_out; - ID3D11RenderTargetView* m_outRTV; - ID3D11Texture2D* m_outStage; - ID3D11Buffer* m_encodeParams; - ID3D11Buffer* m_quad; - ID3D11VertexShader* m_vShader; - ID3D11InputLayout* m_quadLayout; - ID3D11BlendState* m_efbEncodeBlendState; - ID3D11DepthStencilState* m_efbEncodeDepthState; - ID3D11RasterizerState* m_efbEncodeRastState; - ID3D11SamplerState* m_efbSampler; - - // Stuff only used in static-linking mode (SM4.0-compatible) - - bool InitStaticMode(); - bool SetStaticShader(unsigned int dstFormat, unsigned int srcFormat, - bool isIntensity, bool scaleByHalf); - - typedef unsigned int ComboKey; // Key for a shader combination - - ComboKey MakeComboKey(unsigned int dstFormat, unsigned int srcFormat, - bool isIntensity, bool scaleByHalf) - { - return (dstFormat << 4) | (srcFormat << 2) | (isIntensity ? (1<<1) : 0) - | (scaleByHalf ? (1<<0) : 0); - } - - typedef std::map ComboMap; - - ComboMap m_staticShaders; - - // Stuff only used for dynamic-linking mode (SM5.0+, available as soon as - // Microsoft fixes their bloody HLSL compiler) - - bool InitDynamicMode(); - bool SetDynamicShader(unsigned int dstFormat, unsigned int srcFormat, - bool isIntensity, bool scaleByHalf); - - ID3D11PixelShader* m_dynamicShader; - ID3D11ClassLinkage* m_classLinkage; - - // Interface slots - UINT m_fetchSlot; - UINT m_scaledFetchSlot; - UINT m_intensitySlot; - UINT m_generatorSlot; - - // Class instances - // Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z - ID3D11ClassInstance* m_fetchClass[4]; - // ScaledFetch: 0 is off, 1 is on - ID3D11ClassInstance* m_scaledFetchClass[2]; - // Intensity: 0 is off, 1 is on - ID3D11ClassInstance* m_intensityClass[2]; - // Generator: one for each dst format, 16 total - ID3D11ClassInstance* m_generatorClass[16]; - - std::vector m_linkageArray; - -}; - -} - -#endif +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _PSTEXTUREENCODER_H +#define _PSTEXTUREENCODER_H + +#include "TextureEncoder.h" + +struct ID3D11Texture2D; +struct ID3D11RenderTargetView; +struct ID3D11Buffer; +struct ID3D11InputLayout; +struct ID3D11VertexShader; +struct ID3D11PixelShader; +struct ID3D11ClassLinkage; +struct ID3D11ClassInstance; +struct ID3D11BlendState; +struct ID3D11DepthStencilState; +struct ID3D11RasterizerState; +struct ID3D11SamplerState; + +namespace DX11 +{ + +class PSTextureEncoder : public TextureEncoder +{ + +public: + + PSTextureEncoder(); + + void Init(); + void Shutdown(); + size_t Encode(u8* dst, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, + bool scaleByHalf); + +private: + + bool m_ready; + + ID3D11Texture2D* m_out; + ID3D11RenderTargetView* m_outRTV; + ID3D11Texture2D* m_outStage; + ID3D11Buffer* m_encodeParams; + ID3D11Buffer* m_quad; + ID3D11VertexShader* m_vShader; + ID3D11InputLayout* m_quadLayout; + ID3D11BlendState* m_efbEncodeBlendState; + ID3D11DepthStencilState* m_efbEncodeDepthState; + ID3D11RasterizerState* m_efbEncodeRastState; + ID3D11SamplerState* m_efbSampler; + + // Stuff only used in static-linking mode (SM4.0-compatible) + + bool InitStaticMode(); + bool SetStaticShader(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf); + + typedef unsigned int ComboKey; // Key for a shader combination + + ComboKey MakeComboKey(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf) + { + return (dstFormat << 4) | (srcFormat << 2) | (isIntensity ? (1<<1) : 0) + | (scaleByHalf ? (1<<0) : 0); + } + + typedef std::map ComboMap; + + ComboMap m_staticShaders; + + // Stuff only used for dynamic-linking mode (SM5.0+, available as soon as + // Microsoft fixes their bloody HLSL compiler) + + bool InitDynamicMode(); + bool SetDynamicShader(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf); + + ID3D11PixelShader* m_dynamicShader; + ID3D11ClassLinkage* m_classLinkage; + + // Interface slots + UINT m_fetchSlot; + UINT m_scaledFetchSlot; + UINT m_intensitySlot; + UINT m_generatorSlot; + + // Class instances + // Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z + ID3D11ClassInstance* m_fetchClass[4]; + // ScaledFetch: 0 is off, 1 is on + ID3D11ClassInstance* m_scaledFetchClass[2]; + // Intensity: 0 is off, 1 is on + ID3D11ClassInstance* m_intensityClass[2]; + // Generator: one for each dst format, 16 total + ID3D11ClassInstance* m_generatorClass[16]; + + std::vector m_linkageArray; + +}; + +} + +#endif diff --git a/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h b/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h index 4f5413e0e2..8cfa923911 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h @@ -1,135 +1,135 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#ifndef _TEXTUREENCODER_H -#define _TEXTUREENCODER_H - -#include "VideoCommon.h" - -namespace DX11 -{ - -// 4-bit format: 8x8 texels / cache line -// 8-bit format: 8x4 texels / cache line -// 16-bit format: 4x4 texels / cache line -// 32-bit format: 4x4 texels / 2 cache lines -// Compressed format: 8x8 texels / cache line - -// Document EFB encoding formats here with examples of where they are used. - -// Format: 0 - R4 -// Used in The Legend of Zelda: The Wind Waker for character shadows (srcFormat 1, -// isIntensity 1, scaleByHalf 1). - -// Format: 1 - R8 -// FIXME: Unseen. May or may not be a duplicate of format 8. - -// Format: 2 - A4 R4 -// FIXME: Unseen. - -// Format: 3 - A8 R8 -// FIXME: Unseen. - -// Format: 4 - R5 G6 B5 -// Used in Wind Waker for most render-to-texture effects like heat shimmer and -// depth-of-field. - -// Format: 5 - 1 R5 G5 B5 or 0 A3 R4 G4 B4 -// Used in Twilight Princess for character shadows. - -// Format: 6 - A8 R8 A8 R8 | G8 B8 G8 B8 -// Used in Twilight Princess for bloom effect. - -// Format: 7 - A8 -// Used in Metroid Prime 2 for the scan visor. - -// Format: 8 - R8 -// Used in Twilight Princess for the map. - -// Format: 9 - G8 -// FIXME: Unseen. - -// Format: A - B8 -// Used in Metroid Prime 2 for the scan visor. - -// Format: B - G8 R8 -// Used in Wind Waker for depth-of-field. Usually used with srcFormat 3 to -// render depth textures. The bytes are swapped, so games have to correct it -// in RAM before using it as a texture. - -// Format: C - B8 G8 -// FIXME: Unseen. - -const unsigned int BLOCK_WIDTHS[16] = { - 8, // R4 - 8, // R8 (FIXME: duplicate of R8 below?) - 8, // A4 R4 - 4, // A8 R8 - 4, // R5 G6 B5 - 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 - 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) - 8, // A8 - 8, // R8 (FIXME: duplicate of R8 above?) - 8, // G8 - 8, // B8 - 4, // G8 R8 - 4, // B8 G8 - 0, 0, 0 // Unknown formats -}; - -const unsigned int BLOCK_HEIGHTS[16] = { - 8, // R4 - 4, // R8 (FIXME: duplicate of R8 below?) - 4, // A4 R4 - 4, // A8 R8 - 4, // R5 G6 B5 - 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 - 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) - 4, // A8 - 4, // R8 (FIXME: duplicate of R8 above?) - 4, // G8 - 4, // B8 - 4, // G8 R8 - 4, // B8 G8 - 0, 0, 0 // Unknown formats -}; - -// Maximum number of bytes that can occur in a texture block-row generated by -// the encoder -static const UINT MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH/4)*64; -// The maximum amount of data that the texture encoder can generate in one call -static const UINT MAX_BYTES_PER_ENCODE = MAX_BYTES_PER_BLOCK_ROW*(EFB_HEIGHT/4); - -class TextureEncoder -{ - -public: - - virtual ~TextureEncoder() { } - - virtual void Init() = 0; - virtual void Shutdown() = 0; - // Returns size in bytes of encoded block of memory - virtual size_t Encode(u8* dst, unsigned int dstFormat, - unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf) = 0; - -}; - -} - -#endif +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _TEXTUREENCODER_H +#define _TEXTUREENCODER_H + +#include "VideoCommon.h" + +namespace DX11 +{ + +// 4-bit format: 8x8 texels / cache line +// 8-bit format: 8x4 texels / cache line +// 16-bit format: 4x4 texels / cache line +// 32-bit format: 4x4 texels / 2 cache lines +// Compressed format: 8x8 texels / cache line + +// Document EFB encoding formats here with examples of where they are used. + +// Format: 0 - R4 +// Used in The Legend of Zelda: The Wind Waker for character shadows (srcFormat 1, +// isIntensity 1, scaleByHalf 1). + +// Format: 1 - R8 +// FIXME: Unseen. May or may not be a duplicate of format 8. + +// Format: 2 - A4 R4 +// FIXME: Unseen. + +// Format: 3 - A8 R8 +// FIXME: Unseen. + +// Format: 4 - R5 G6 B5 +// Used in Wind Waker for most render-to-texture effects like heat shimmer and +// depth-of-field. + +// Format: 5 - 1 R5 G5 B5 or 0 A3 R4 G4 B4 +// Used in Twilight Princess for character shadows. + +// Format: 6 - A8 R8 A8 R8 | G8 B8 G8 B8 +// Used in Twilight Princess for bloom effect. + +// Format: 7 - A8 +// Used in Metroid Prime 2 for the scan visor. + +// Format: 8 - R8 +// Used in Twilight Princess for the map. + +// Format: 9 - G8 +// FIXME: Unseen. + +// Format: A - B8 +// Used in Metroid Prime 2 for the scan visor. + +// Format: B - G8 R8 +// Used in Wind Waker for depth-of-field. Usually used with srcFormat 3 to +// render depth textures. The bytes are swapped, so games have to correct it +// in RAM before using it as a texture. + +// Format: C - B8 G8 +// FIXME: Unseen. + +const unsigned int BLOCK_WIDTHS[16] = { + 8, // R4 + 8, // R8 (FIXME: duplicate of R8 below?) + 8, // A4 R4 + 4, // A8 R8 + 4, // R5 G6 B5 + 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 + 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) + 8, // A8 + 8, // R8 (FIXME: duplicate of R8 above?) + 8, // G8 + 8, // B8 + 4, // G8 R8 + 4, // B8 G8 + 0, 0, 0 // Unknown formats +}; + +const unsigned int BLOCK_HEIGHTS[16] = { + 8, // R4 + 4, // R8 (FIXME: duplicate of R8 below?) + 4, // A4 R4 + 4, // A8 R8 + 4, // R5 G6 B5 + 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 + 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) + 4, // A8 + 4, // R8 (FIXME: duplicate of R8 above?) + 4, // G8 + 4, // B8 + 4, // G8 R8 + 4, // B8 G8 + 0, 0, 0 // Unknown formats +}; + +// Maximum number of bytes that can occur in a texture block-row generated by +// the encoder +static const UINT MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH/4)*64; +// The maximum amount of data that the texture encoder can generate in one call +static const UINT MAX_BYTES_PER_ENCODE = MAX_BYTES_PER_BLOCK_ROW*(EFB_HEIGHT/4); + +class TextureEncoder +{ + +public: + + virtual ~TextureEncoder() { } + + virtual void Init() = 0; + virtual void Shutdown() = 0; + // Returns size in bytes of encoded block of memory + virtual size_t Encode(u8* dst, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, + bool scaleByHalf) = 0; + +}; + +} + +#endif