From f0c5cc76a998b59ddb1dd720354db366b5e15e9b Mon Sep 17 00:00:00 2001 From: Nolan Check Date: Sat, 26 Feb 2011 23:41:02 +0000 Subject: [PATCH] Add EFB encode-to-RAM support in DX11 backend. It could probably be simplified a lot, and not all the possible formats are implemented. I tried to use the dynamic-linking feature of shader model 5, but Microsoft's HLSL compiler is broken. "Dynamic mode" is implemented, but disabled for now. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7253 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/VideoCommon/Src/BPFunctions.cpp | 6 +- Source/Core/VideoCommon/Src/BPFunctions.h | 3 +- Source/Core/VideoCommon/Src/BPStructs.cpp | 7 +- .../Core/VideoCommon/Src/TextureCacheBase.cpp | 40 +- .../Core/VideoCommon/Src/TextureCacheBase.h | 11 +- .../Plugins/Plugin_VideoDX11/Src/D3DBase.cpp | 48 +- Source/Plugins/Plugin_VideoDX11/Src/D3DBase.h | 10 +- .../Plugin_VideoDX11/Src/D3DShader.cpp | 22 +- .../Plugins/Plugin_VideoDX11/Src/D3DShader.h | 3 +- .../Plugin_VideoDX11/Src/PSTextureEncoder.cpp | 1257 +++++++++++++++++ .../Plugin_VideoDX11/Src/PSTextureEncoder.h | 119 ++ .../Plugin_VideoDX11/Src/TextureCache.cpp | 106 +- .../Plugin_VideoDX11/Src/TextureCache.h | 7 +- .../Plugin_VideoDX11/Src/TextureEncoder.h | 90 ++ Source/Plugins/Plugin_VideoDX11/Src/main.cpp | 2 +- .../Plugin_VideoDX9/Src/TextureCache.cpp | 27 +- .../Plugin_VideoDX9/Src/TextureCache.h | 7 +- .../Plugin_VideoOGL/Src/TextureCache.cpp | 27 +- .../Plugin_VideoOGL/Src/TextureCache.h | 7 +- 19 files changed, 1687 insertions(+), 112 deletions(-) create mode 100644 Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp create mode 100644 Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h create mode 100644 Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h diff --git a/Source/Core/VideoCommon/Src/BPFunctions.cpp b/Source/Core/VideoCommon/Src/BPFunctions.cpp index 5255b6cb54..7e804e1b57 100644 --- a/Source/Core/VideoCommon/Src/BPFunctions.cpp +++ b/Source/Core/VideoCommon/Src/BPFunctions.cpp @@ -77,12 +77,14 @@ void SetColorMask(const BPCmd &bp) g_renderer->SetColorMask(); } -void CopyEFB(const BPCmd &bp, const EFBRectangle &rc, const u32 &address, const bool &fromZBuffer, const bool &isIntensityFmt, const u32 ©fmt, const int &scaleByHalf) +void CopyEFB(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat, + const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) { // bpmem.zcontrol.pixel_format to PIXELFMT_Z24 is when the game wants to copy from ZBuffer (Zbuffer uses 24-bit Format) if (g_ActiveConfig.bEFBCopyEnable) { - TextureCache::CopyRenderTargetToTexture(address, fromZBuffer, isIntensityFmt, copyfmt, !!scaleByHalf, rc); + TextureCache::CopyRenderTargetToTexture(dstAddr, dstFormat, srcFormat, + srcRect, isIntensity, scaleByHalf); } } diff --git a/Source/Core/VideoCommon/Src/BPFunctions.h b/Source/Core/VideoCommon/Src/BPFunctions.h index e2a184f65e..d104b12751 100644 --- a/Source/Core/VideoCommon/Src/BPFunctions.h +++ b/Source/Core/VideoCommon/Src/BPFunctions.h @@ -45,7 +45,8 @@ void SetBlendMode(const BPCmd &bp); void SetDitherMode(const BPCmd &bp); void SetLogicOpMode(const BPCmd &bp); void SetColorMask(const BPCmd &bp); -void CopyEFB(const BPCmd &bp, const EFBRectangle &rc, const u32 &address, const bool &fromZBuffer, const bool &isIntensityFmt, const u32 ©fmt, const int &scaleByHalf); +void CopyEFB(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat, + const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); void ClearScreen(const BPCmd &bp, const EFBRectangle &rc); void OnPixelFormatChange(const BPCmd &bp); u8 *GetPointer(const u32 &address); diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index 600a4e45af..259a402240 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -248,10 +248,9 @@ void BPWritten(const BPCmd& bp) if (GetConfig(CONFIG_SHOWEFBREGIONS)) stats.efb_regions.push_back(rc); - CopyEFB(bp, rc, bpmem.copyTexDest << 5, - bpmem.zcontrol.pixel_format == PIXELFMT_Z24, - PE_copy.intensity_fmt > 0,PE_copy.tp_realFormat(), - PE_copy.half_scale); + CopyEFB(bpmem.copyTexDest << 5, PE_copy.tp_realFormat(), + bpmem.zcontrol.pixel_format, rc, PE_copy.intensity_fmt, + PE_copy.half_scale); } else { diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp index 47525af3fa..e8aef15f1c 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp @@ -421,8 +421,8 @@ return_entry: return entry; } -void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, - bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, const EFBRectangle &source_rect) +void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat, + const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) { float colmat[28] = {0}; float *const fConstAdd = colmat + 16; @@ -431,9 +431,9 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f; unsigned int cbufid = -1; - if (bFromZBuffer) + if (srcFormat == PIXELFMT_Z24) { - switch (copyfmt) + switch (dstFormat) { case 0: // Z4 colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f; @@ -476,17 +476,17 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, break; default: - ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt); + ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat); colmat[2] = colmat[5] = colmat[8] = 1.0f; cbufid = 7; break; } } - else if (bIsIntensityFmt) + else if (isIntensity) { fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f/255.0f; - switch (copyfmt) + switch (dstFormat) { case 0: // I4 case 1: // I8 @@ -498,11 +498,11 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f; colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f; - if (copyfmt < 2 || copyfmt == 8) + if (dstFormat < 2 || dstFormat == 8) { colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f; fConstAdd[3] = 16.0f/255.0f; - if (copyfmt == 0) + if (dstFormat == 0) { ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f; ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f; @@ -516,7 +516,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, else// alpha { colmat[15] = 1; - if (copyfmt == 2) + if (dstFormat == 2) { ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f; ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f; @@ -531,7 +531,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, break; default: - ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", copyfmt); + ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat); colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; cbufid = 23; break; @@ -539,7 +539,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, } else { - switch (copyfmt) + switch (dstFormat) { case 0: // R4 colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; @@ -612,22 +612,22 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, break; default: - ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", copyfmt); + ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat); colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; cbufid = 23; break; } } - const unsigned int tex_w = (abs(source_rect.GetWidth()) >> (int)bScaleByHalf); - const unsigned int tex_h = (abs(source_rect.GetHeight()) >> (int)bScaleByHalf); + const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth()/2 : srcRect.GetWidth(); + const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight()/2 : srcRect.GetHeight(); unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w; unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h; bool texture_is_dynamic = false; - TCacheEntryBase *entry = textures[address]; + TCacheEntryBase *entry = textures[dstAddr]; if (entry) { if ((entry->isRenderTarget && entry->virtualW == scaled_tex_w && entry->virtualH == scaled_tex_h) @@ -652,9 +652,9 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, if (NULL == entry) { // create the texture - textures[address] = entry = g_texture_cache->CreateRenderTargetTexture(scaled_tex_w, scaled_tex_h); + textures[dstAddr] = entry = g_texture_cache->CreateRenderTargetTexture(scaled_tex_w, scaled_tex_h); - entry->addr = address; + entry->addr = dstAddr; entry->hash = 0; entry->realW = tex_w; @@ -663,7 +663,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, entry->virtualW = scaled_tex_w; entry->virtualH = scaled_tex_h; - entry->format = copyfmt; + entry->format = dstFormat; entry->mipLevels = 0; entry->isRenderTarget = true; @@ -675,7 +675,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, g_renderer->ResetAPIState(); // reset any game specific settings - entry->FromRenderTarget(bFromZBuffer, bScaleByHalf, cbufid, colmat, source_rect, bIsIntensityFmt, copyfmt); + entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat); g_renderer->RestoreAPIState(); } diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.h b/Source/Core/VideoCommon/Src/TextureCacheBase.h index 0a57d0a486..1c86e64006 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.h +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.h @@ -63,9 +63,10 @@ public: virtual void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int level, bool autogen_mips = false) = 0; - virtual void FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float *colmat, const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt) = 0; + virtual void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat) = 0; int IntersectsMemoryRange(u32 range_address, u32 range_size) const; }; @@ -87,8 +88,8 @@ public: static TCacheEntryBase* Load(unsigned int stage, u32 address, unsigned int width, unsigned int height, int format, unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel); - static void CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, - u32 copyfmt, bool bScaleByHalf, const EFBRectangle &source_rect); + static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat, + const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); static bool DeferredInvalidate; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.cpp b/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.cpp index 76351142f6..80e3482b95 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.cpp @@ -25,6 +25,10 @@ namespace DX11 { +HINSTANCE hD3DCompilerDll = NULL; +D3DREFLECT PD3DReflect = NULL; +int d3dcompiler_dll_ref = 0; + HINSTANCE hD3DXDll = NULL; D3DX11COMPILEFROMMEMORYTYPE PD3DX11CompileFromMemory = NULL; D3DX11FILTERTEXTURETYPE PD3DX11FilterTexture = NULL; @@ -113,7 +117,7 @@ HRESULT LoadD3DX() // try to load D3DX11 first to check whether we have proper runtime support // try to use the dll the backend was compiled against first - don't bother about debug runtimes - hD3DXDll = LoadLibraryA(StringFromFormat("d3dx11_%d.dll", D3DX11_SDK_VERSION).c_str()); + hD3DXDll = LoadLibraryA(D3DX11_DLL_A); if (!hD3DXDll) { // if that fails, use the dll which should be available in every SDK which officially supports DX11. @@ -144,6 +148,35 @@ HRESULT LoadD3DX() return S_OK; } +HRESULT LoadD3DCompiler() +{ + if (d3dcompiler_dll_ref++ > 0) return S_OK; + if (hD3DCompilerDll) return S_OK; + + // try to load D3DCompiler first to check whether we have proper runtime support + // try to use the dll the backend was compiled against first - don't bother about debug runtimes + hD3DCompilerDll = LoadLibraryA(D3DCOMPILER_DLL_A); + if (!hD3DCompilerDll) + { + // if that fails, use the dll which should be available in every SDK which officially supports DX11. + hD3DCompilerDll = LoadLibraryA("D3DCompiler_42.dll"); + if (!hD3DCompilerDll) + { + MessageBoxA(NULL, "Failed to load D3DCompiler_42.dll, update your DX11 runtime, please", "Critical error", MB_OK | MB_ICONERROR); + return E_FAIL; + } + else + { + NOTICE_LOG(VIDEO, "Successfully loaded D3DCompiler_42.dll. If you're having trouble, try updating your DX runtime first."); + } + } + + PD3DReflect = (D3DREFLECT)GetProcAddress(hD3DCompilerDll, "D3DReflect"); + if (PD3DReflect == NULL) MessageBoxA(NULL, "GetProcAddress failed for D3DReflect!", "Critical error", MB_OK | MB_ICONERROR); + + return S_OK; +} + void UnloadDXGI() { if (!dxgi_dll_ref) return; @@ -177,6 +210,16 @@ void UnloadD3D() PD3D11CreateDeviceAndSwapChain = NULL; } +void UnloadD3DCompiler() +{ + if (!d3dcompiler_dll_ref) return; + if (--d3dcompiler_dll_ref != 0) return; + + if (hD3DCompilerDll) FreeLibrary(hD3DCompilerDll); + hD3DCompilerDll = NULL; + PD3DReflect = NULL; +} + void EnumAAModes(IDXGIAdapter* adapter, std::vector& aa_modes) { aa_modes.clear(); @@ -232,10 +275,13 @@ HRESULT Create(HWND wnd) hr = LoadDXGI(); if (SUCCEEDED(hr)) hr = LoadD3D(); if (SUCCEEDED(hr)) hr = LoadD3DX(); + if (SUCCEEDED(hr)) hr = LoadD3DCompiler(); if (FAILED(hr)) { UnloadDXGI(); UnloadD3D(); + UnloadD3DX(); + UnloadD3DCompiler(); return hr; } diff --git a/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.h b/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.h index 9bd5c1f686..16cc1c7865 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/D3DBase.h @@ -17,7 +17,8 @@ #pragma once -#include +#include +#include #include "Common.h" #include @@ -37,9 +38,11 @@ namespace D3D HRESULT LoadDXGI(); HRESULT LoadD3D(); HRESULT LoadD3DX(); +HRESULT LoadD3DCompiler(); void UnloadDXGI(); void UnloadD3D(); void UnloadD3DX(); +void UnloadD3DCompiler(); void EnumAAModes(IDXGIAdapter* adapter, std::vector& aa_modes); DXGI_SAMPLE_DESC GetAAMode(int index); @@ -72,7 +75,7 @@ unsigned int GetMaxTextureSize(); inline void SetDebugObjectName(ID3D11DeviceChild* resource, const char* name) { #if defined(_DEBUG) || defined(DEBUGFAST) - resource->SetPrivateData( WKPDID_D3DDebugObjectName, strlen(name), name); + resource->SetPrivateData( WKPDID_D3DDebugObjectName, (UINT)strlen(name), name); #endif } @@ -105,4 +108,7 @@ extern CREATEDXGIFACTORY PCreateDXGIFactory; typedef HRESULT (WINAPI* D3D11CREATEDEVICE)(IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, UINT, CONST D3D_FEATURE_LEVEL*, UINT, UINT, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext**); extern D3D11CREATEDEVICE PD3D11CreateDevice; +typedef HRESULT (WINAPI *D3DREFLECT)(LPCVOID, SIZE_T, REFIID, void**); +extern D3DREFLECT PD3DReflect; + } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.cpp index 08d4840a64..a398417e61 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.cpp @@ -54,8 +54,14 @@ bool CompileVertexShader(const char* code, unsigned int len, D3DBlob** blob) #endif HRESULT hr = PD3DX11CompileFromMemory(code, len, NULL, NULL, NULL, "main", D3D::VertexShaderVersionString(), flags, 0, NULL, &shaderBuffer, &errorBuffer, NULL); + + if (errorBuffer) + { + INFO_LOG(VIDEO, "Vertex shader compiler messages:\n%s\n", + (const char*)errorBuffer->GetBufferPointer()); + } - if (FAILED(hr) || errorBuffer) + if (FAILED(hr)) { if (g_ActiveConfig.bShowShaderErrors) { @@ -90,7 +96,8 @@ ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned } // code->bytecode -bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob) +bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob, + const D3D_SHADER_MACRO* pDefines) { ID3D10Blob* shaderBuffer = NULL; ID3D10Blob* errorBuffer = NULL; @@ -100,10 +107,16 @@ bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob) #else UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3; #endif - HRESULT hr = PD3DX11CompileFromMemory(code, len, NULL, NULL, NULL, "main", D3D::PixelShaderVersionString(), + HRESULT hr = PD3DX11CompileFromMemory(code, len, NULL, pDefines, NULL, "main", D3D::PixelShaderVersionString(), flags, 0, NULL, &shaderBuffer, &errorBuffer, NULL); + + if (errorBuffer) + { + INFO_LOG(VIDEO, "Pixel shader compiler messages:\n%s", + (const char*)errorBuffer->GetBufferPointer()); + } - if (FAILED(hr) || errorBuffer) + if (FAILED(hr)) { if (g_ActiveConfig.bShowShaderErrors) { @@ -121,6 +134,7 @@ bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob) *blob = new D3DBlob(shaderBuffer); shaderBuffer->Release(); } + return SUCCEEDED(hr); } diff --git a/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.h b/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.h index 49407dc767..a60382ca79 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/D3DShader.h @@ -17,6 +17,7 @@ #pragma once +#include "D3DBase.h" #include "D3DBlob.h" struct ID3D11PixelShader; @@ -32,7 +33,7 @@ namespace D3D // The returned bytecode buffers should be Release()d. bool CompileVertexShader(const char* code, unsigned int len, D3DBlob** blob); - bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob); + bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob, const D3D_SHADER_MACRO* pDefines = NULL); // Utility functions ID3D11VertexShader* CompileAndCreateVertexShader(const char* code, unsigned int len); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp new file mode 100644 index 0000000000..6663238b26 --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp @@ -0,0 +1,1257 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "PSTextureEncoder.h" + +#include "D3DBase.h" +#include "D3DShader.h" +#include "GfxState.h" +#include "BPMemory.h" +#include "FramebufferManager.h" +#include "Render.h" +#include "HW/Memmap.h" +#include "TextureCache.h" + +// "Static mode" will compile a new EFB encoder shader for every combination of +// encoding configurations. It's compatible with Shader Model 4. + +// "Dynamic mode" will use the dynamic-linking feature of Shader Model 5. Only +// one shader needs to be compiled. + +// Unfortunately, the June 2010 DirectX SDK includes a broken HLSL compiler +// which cripples dynamic linking for us. +// See . +// Dynamic mode is disabled for now. To enable it, uncomment the line below. + +//#define USE_DYNAMIC_MODE + +// FIXME: When Microsoft fixes their HLSL compiler, make Dolphin enable dynamic +// mode on Shader Model 5-compatible cards. + +namespace DX11 +{ + +union EFBEncodeParams +{ + struct + { + UINT NumHalfCacheLinesX; + UINT NumBlocksY; + UINT PosX; + UINT PosY; + FLOAT TexLeft; + FLOAT TexTop; + FLOAT TexRight; + FLOAT TexBottom; + }; + // Constant buffers must be a multiple of 16 bytes in size. + u8 pad[32]; // Should be at least the size of the struct above +}; + +static const char EFB_ENCODE_VS[] = +"// dolphin-emu EFB encoder vertex shader\n" + +"uniform struct\n" // Should match EFBEncodeParams above +"{\n" + "uint NumHalfCacheLinesX;\n" + "uint NumBlocksY;\n" + "uint PosX;\n" // Upper-left corner of source + "uint PosY;\n" + "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) + "float TexTop;\n" + "float TexRight;\n" + "float TexBottom;\n" +"} Params : register(c0);\n" + +"struct Output\n" +"{\n" + "float4 Pos : SV_Position;\n" + "float2 Coord : ENCODECOORD;\n" +"};\n" + +"Output main(in float2 Pos : POSITION)\n" +"{\n" + "Output result;\n" + "result.Pos = float4(2*Pos.x-1, -2*Pos.y+1, 0.0, 1.0);\n" + "result.Coord = Pos * float2(Params.NumHalfCacheLinesX, Params.NumBlocksY);\n" + "return result;\n" +"}\n" +; + +static const char EFB_ENCODE_PS[] = +"// dolphin-emu EFB encoder pixel shader\n" + +// Input + +"uniform struct\n" // Should match EFBEncodeParams above +"{\n" + "uint NumHalfCacheLinesX;\n" + "uint NumBlocksY;\n" + "uint PosX;\n" // Upper-left corner of source + "uint PosY;\n" + "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) + "float TexTop;\n" + "float TexRight;\n" + "float TexBottom;\n" +"} Params : register(c0);\n" + +"Texture2D EFBTexture : register(t0);\n" +"sampler EFBSampler : register(s0);\n" + +// Constants + +"static const float2 INV_EFB_DIMS = float2(1.0/640.0, 1.0/528.0);\n" + +// FIXME: Is this correct? +"static const float3 INTENSITY_COEFFS = float3(0.257, 0.504, 0.098);\n" +"static const float INTENSITY_ADD = 16.0/255.0;\n" + +// Utility functions + +"uint ExtractA(uint pixel) { return pixel >> 24; }\n" +"uint ExtractA1(uint pixel) { return ExtractA(pixel) >> 7; }\n" +"uint ExtractA3(uint pixel) { return ExtractA(pixel) >> 5; }\n" +"uint ExtractR(uint pixel) { return (pixel >> 16) & 0xFF; }\n" +"uint ExtractR4(uint pixel) { return ExtractR(pixel) >> 4; }\n" +"uint ExtractR5(uint pixel) { return ExtractR(pixel) >> 3; }\n" +"uint ExtractG(uint pixel) { return (pixel >> 8) & 0xFF; }\n" +"uint ExtractG4(uint pixel) { return ExtractG(pixel) >> 4; }\n" +"uint ExtractG5(uint pixel) { return ExtractG(pixel) >> 3; }\n" +"uint ExtractG6(uint pixel) { return ExtractG(pixel) >> 2; }\n" +"uint ExtractB(uint pixel) { return pixel & 0xFF; }\n" +"uint ExtractB4(uint pixel) { return ExtractB(pixel) >> 4; }\n" +"uint ExtractB5(uint pixel) { return ExtractB(pixel) >> 3; }\n" + +"uint Swap32(uint v) {\n" + "return (((v >> 24) & 0xFF) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | ((v << 24) & 0xFF000000));\n" +"}\n" + +"uint UINT_8888(uint a, uint b, uint c, uint d) {\n" + "return (a << 24) | (b << 16) | (c << 8) | d;\n" +"}\n" + +"uint UINT_44444444(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n" + "return (a << 28) | (b << 24) | (c << 20) | (d << 16) | (e << 12) | (f << 8) | (g << 4) | h;\n" +"}\n" + +"uint UINT_1555(uint a, uint b, uint c, uint d) {\n" + "return (a << 15) | (b << 10) | (c << 5) | d;\n" +"}\n" + +"uint UINT_3444(uint a, uint b, uint c, uint d) {\n" + "return (a << 12) | (b << 8) | (c << 4) | d;\n" +"}\n" + +"uint UINT_565(uint a, uint b, uint c) {\n" + "return (a << 11) | (b << 5) | c;\n" +"}\n" + +"uint UINT_1616(uint a, uint b) {\n" + "return (a << 16) | b;\n" +"}\n" + +"uint EncodeRGB5A3(uint pixel) {\n" + "if (ExtractA(pixel) >= 224) {\n" + // Encode to ARGB1555 + "return UINT_1555(ExtractA1(pixel), ExtractR5(pixel), ExtractG5(pixel), ExtractB5(pixel));\n" + "} else {\n" + // Encode to ARGB3444 + "return UINT_3444(ExtractA3(pixel), ExtractR4(pixel), ExtractG4(pixel), ExtractB4(pixel));\n" + "}\n" +"}\n" + +"uint EncodeRGB565(uint pixel) {\n" + "return UINT_565(ExtractR5(pixel), ExtractG6(pixel), ExtractB5(pixel));\n" +"}\n" + +"float2 CalcTexCoord(uint2 coord)\n" +"{\n" + // Add 0.5,0.5 to sample from the center of the EFB pixel + "float2 efbCoord = float2(coord) + float2(0.5,0.5);\n" + "return lerp(float2(Params.TexLeft,Params.TexTop), float2(Params.TexRight,Params.TexBottom), efbCoord * INV_EFB_DIMS);\n" +"}\n" + +// Interface and classes for different source formats + +"float4 Fetch_0(uint2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + "float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n" + "result.a = 1.0;\n" + "return result;\n" +"}\n" + +"float4 Fetch_1(uint2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + "return EFBTexture.Sample(EFBSampler, texCoord);\n" +"}\n" + +"float4 Fetch_2(uint2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + "float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n" + "result.a = 1.0;\n" + "return result;\n" +"}\n" + +"float4 Fetch_3(uint2 coord)\n" +"{\n" + "float2 texCoord = CalcTexCoord(coord);\n" + // Ref: + // Ref: + "float depth = 255.99998474121094 * EFBTexture.Sample(EFBSampler, texCoord).r;\n" + "float4 result = depth.rrrr;\n" + + "result.a = floor(result.a);\n" // bits 31..24 + + "result.rgb -= result.a;\n" + "result.rgb *= 256.0;\n" + "result.r = floor(result.r);\n" // bits 23..16 + + "result.gb -= result.r;\n" + "result.gb *= 256.0;\n" + "result.g = floor(result.g);\n" // bits 15..8 + + "result.b -= result.g;\n" + "result.b *= 256.0;\n" + "result.b = floor(result.b);\n" // bits 7..0 + + "result = float4(result.arg / 255.0, 1.0);\n" + "return result;\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iFetch\n" +"{\n" + "float4 Fetch(uint2 coord);\n" +"};\n" + +// Source format 0 +"class cFetch_0 : iFetch\n" +"{\n" + "float4 Fetch(uint2 coord)\n" + "{ return Fetch_0(coord); }\n" +"};\n" + + +// Source format 1 +"class cFetch_1 : iFetch\n" +"{\n" + "float4 Fetch(uint2 coord)\n" + "{ return Fetch_1(coord); }\n" +"};\n" + +// Source format 2 +"class cFetch_2 : iFetch\n" +"{\n" + "float4 Fetch(uint2 coord)\n" + "{ return Fetch_2(coord); }\n" +"};\n" + +// Source format 3 +"class cFetch_3 : iFetch\n" +"{\n" + "float4 Fetch(uint2 coord)\n" + "{ return Fetch_3(coord); }\n" +"};\n" + +// Declare fetch interface; must be set by application +"iFetch g_fetch;\n" +"#define IMP_FETCH g_fetch.Fetch\n" + +"#endif\n" // #ifdef DYNAMIC_MODE + +"#ifndef IMP_FETCH\n" +"#error No Fetch specified\n" +"#endif\n" + +// Interface and classes for different intensity settings (on or off) + +"float4 Intensity_0(float4 sample)\n" +"{\n" + "return sample;\n" +"}\n" + +"float4 Intensity_1(float4 sample)\n" +"{\n" + "sample.r = dot(INTENSITY_COEFFS, sample.rgb) + INTENSITY_ADD;\n" + // FIXME: Is this correct? What happens if you use one of the non-R + // formats with intensity on? + "sample = sample.rrrr;\n" + "return sample;\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iIntensity\n" +"{\n" + "float4 Intensity(float4 sample);\n" +"};\n" + +// Intensity off +"class cIntensity_0 : iIntensity\n" +"{\n" + "float4 Intensity(float4 sample)\n" + "{ return Intensity_0(sample); }\n" +"};\n" + +// Intensity on +"class cIntensity_1 : iIntensity\n" +"{\n" + "float4 Intensity(float4 sample)\n" + "{ return Intensity_1(sample); }\n" +"};\n" + +// Declare intensity interface; must be set by application +"iIntensity g_intensity;\n" +"#define IMP_INTENSITY g_intensity.Intensity\n" + +"#endif\n" // #ifdef DYNAMIC_MODE + +"#ifndef IMP_INTENSITY\n" +"#error No Intensity specified\n" +"#endif\n" + + +// Interface and classes for different scale/filter settings (on or off) + +"float4 ScaledFetch_0(uint2 coord)\n" +"{\n" + "return IMP_FETCH(uint2(Params.PosX,Params.PosY) + coord);\n" +"}\n" + +"float4 ScaledFetch_1(uint2 coord)\n" +"{\n" + "uint2 ul = uint2(Params.PosX,Params.PosY) + 2*coord;\n" + "float4 sample0 = IMP_FETCH(ul+uint2(0,0));\n" + "float4 sample1 = IMP_FETCH(ul+uint2(1,0));\n" + "float4 sample2 = IMP_FETCH(ul+uint2(0,1));\n" + "float4 sample3 = IMP_FETCH(ul+uint2(1,1));\n" + // Average all four samples together + // FIXME: Is this correct? + "return 0.25 * (sample0+sample1+sample2+sample3);\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iScaledFetch\n" +"{\n" + "float4 ScaledFetch(uint2 coord);\n" +"};\n" + +// Scale off +"class cScaledFetch_0 : iScaledFetch\n" +"{\n" + "float4 ScaledFetch(uint2 coord)\n" + "{ return ScaledFetch_0(coord); }\n" +"};\n" + +// Scale on +"class cScaledFetch_1 : iScaledFetch\n" +"{\n" + "float4 ScaledFetch(uint2 coord)\n" + "{ return ScaledFetch_1(coord); }\n" +"};\n" + +// Declare scaled fetch interface; must be set by application code +"iScaledFetch g_scaledFetch;\n" +"#define IMP_SCALEDFETCH g_scaledFetch.ScaledFetch\n" + +"#endif\n" // #ifdef DYNAMIC_MODE + +"#ifndef IMP_SCALEDFETCH\n" +"#error No ScaledFetch specified\n" +"#endif\n" + +// Main EFB-sampling function: performs all steps of fetching pixels, scaling, +// applying intensity function + +"uint SampleEFB(uint2 coord)\n" +"{\n" + // FIXME: Does intensity happen before or after scaling? Or does + // it matter? + "float4 sample = IMP_SCALEDFETCH(coord);\n" + "sample = IMP_INTENSITY(sample);\n" + "float4 byteSample = 255.0 * sample;\n" + "return UINT_8888(byteSample.a, byteSample.r, byteSample.g, byteSample.b);\n" +"}\n" + +// Interfaces and classes for different destination formats + +"uint4 Generate_0(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(8,8);\n" + "uint2 subBlockUL = blockUL + uint2(0, 4*(cacheCoord.x%2));\n" + + "uint sample[32];\n" + "for (uint y = 0; y < 4; ++y) {\n" + "for (uint x = 0; x < 8; ++x) {\n" + "sample[y*8+x] = SampleEFB(subBlockUL+uint2(x,y));\n" + "}\n" + "}\n" + + "uint dw[4];\n" + "for (uint i = 0; i < 4; ++i) {\n" + "dw[i] = UINT_44444444(\n" + "ExtractR4(sample[8*i+0]), ExtractR4(sample[8*i+1]), ExtractR4(sample[8*i+2]), ExtractR4(sample[8*i+3]),\n" + "ExtractR4(sample[8*i+4]), ExtractR4(sample[8*i+5]), ExtractR4(sample[8*i+6]), ExtractR4(sample[8*i+7])\n" + ");\n" + "}\n" + + "return uint4(Swap32(dw[0]), Swap32(dw[1]), Swap32(dw[2]), Swap32(dw[3]));\n" +"}\n" + +"uint4 Generate_4(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(4,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(3,1));\n" + + "uint dw0 = UINT_1616(EncodeRGB565(sample0), EncodeRGB565(sample1));\n" + "uint dw1 = UINT_1616(EncodeRGB565(sample2), EncodeRGB565(sample3));\n" + "uint dw2 = UINT_1616(EncodeRGB565(sample4), EncodeRGB565(sample5));\n" + "uint dw3 = UINT_1616(EncodeRGB565(sample6), EncodeRGB565(sample7));\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"uint4 Generate_5(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(4,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(3,1));\n" + + "uint dw0 = UINT_1616(EncodeRGB5A3(sample0), EncodeRGB5A3(sample1));\n" + "uint dw1 = UINT_1616(EncodeRGB5A3(sample2), EncodeRGB5A3(sample3));\n" + "uint dw2 = UINT_1616(EncodeRGB5A3(sample4), EncodeRGB5A3(sample5));\n" + "uint dw3 = UINT_1616(EncodeRGB5A3(sample6), EncodeRGB5A3(sample7));\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"uint4 Generate_6(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(4,1);\n" + + "uint2 blockUL = blockCoord * uint2(4,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(3,1));\n" + + "uint dw0;\n" + "uint dw1;\n" + "uint dw2;\n" + "uint dw3;\n" + "if (cacheCoord.x % 4 < 2)\n" + "{\n" + // First cache line gets AR + "dw0 = UINT_8888(ExtractA(sample0), ExtractR(sample0), ExtractA(sample1), ExtractR(sample1));\n" + "dw1 = UINT_8888(ExtractA(sample2), ExtractR(sample2), ExtractA(sample3), ExtractR(sample3));\n" + "dw2 = UINT_8888(ExtractA(sample4), ExtractR(sample4), ExtractA(sample5), ExtractR(sample5));\n" + "dw3 = UINT_8888(ExtractA(sample6), ExtractR(sample6), ExtractA(sample7), ExtractR(sample7));\n" + "}\n" + "else\n" + "{\n" + // Second cache line gets GB + "dw0 = UINT_8888(ExtractG(sample0), ExtractB(sample0), ExtractG(sample1), ExtractB(sample1));\n" + "dw1 = UINT_8888(ExtractG(sample2), ExtractB(sample2), ExtractG(sample3), ExtractB(sample3));\n" + "dw2 = UINT_8888(ExtractG(sample4), ExtractB(sample4), ExtractG(sample5), ExtractB(sample5));\n" + "dw3 = UINT_8888(ExtractG(sample6), ExtractB(sample6), ExtractG(sample7), ExtractB(sample7));\n" + "}\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"uint4 Generate_7(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(8,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(4,0));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(5,0));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(6,0));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(7,0));\n" + "uint sample8 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample9 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sampleA = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sampleB = SampleEFB(subBlockUL+uint2(3,1));\n" + "uint sampleC = SampleEFB(subBlockUL+uint2(4,1));\n" + "uint sampleD = SampleEFB(subBlockUL+uint2(5,1));\n" + "uint sampleE = SampleEFB(subBlockUL+uint2(6,1));\n" + "uint sampleF = SampleEFB(subBlockUL+uint2(7,1));\n" + + "uint dw0 = UINT_8888(ExtractA(sample0), ExtractA(sample1), ExtractA(sample2), ExtractA(sample3));\n" + "uint dw1 = UINT_8888(ExtractA(sample4), ExtractA(sample5), ExtractA(sample6), ExtractA(sample7));\n" + "uint dw2 = UINT_8888(ExtractA(sample8), ExtractA(sample9), ExtractA(sampleA), ExtractA(sampleB));\n" + "uint dw3 = UINT_8888(ExtractA(sampleC), ExtractA(sampleD), ExtractA(sampleE), ExtractA(sampleF));\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"uint4 Generate_8(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(8,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(4,0));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(5,0));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(6,0));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(7,0));\n" + "uint sample8 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample9 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sampleA = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sampleB = SampleEFB(subBlockUL+uint2(3,1));\n" + "uint sampleC = SampleEFB(subBlockUL+uint2(4,1));\n" + "uint sampleD = SampleEFB(subBlockUL+uint2(5,1));\n" + "uint sampleE = SampleEFB(subBlockUL+uint2(6,1));\n" + "uint sampleF = SampleEFB(subBlockUL+uint2(7,1));\n" + + "uint dw0 = UINT_8888(ExtractR(sample0), ExtractR(sample1), ExtractR(sample2), ExtractR(sample3));\n" + "uint dw1 = UINT_8888(ExtractR(sample4), ExtractR(sample5), ExtractR(sample6), ExtractR(sample7));\n" + "uint dw2 = UINT_8888(ExtractR(sample8), ExtractR(sample9), ExtractR(sampleA), ExtractR(sampleB));\n" + "uint dw3 = UINT_8888(ExtractR(sampleC), ExtractR(sampleD), ExtractR(sampleE), ExtractR(sampleF));\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"uint4 Generate_A(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(8,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(4,0));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(5,0));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(6,0));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(7,0));\n" + "uint sample8 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample9 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sampleA = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sampleB = SampleEFB(subBlockUL+uint2(3,1));\n" + "uint sampleC = SampleEFB(subBlockUL+uint2(4,1));\n" + "uint sampleD = SampleEFB(subBlockUL+uint2(5,1));\n" + "uint sampleE = SampleEFB(subBlockUL+uint2(6,1));\n" + "uint sampleF = SampleEFB(subBlockUL+uint2(7,1));\n" + + "uint dw0 = UINT_8888(ExtractB(sample0), ExtractB(sample1), ExtractB(sample2), ExtractB(sample3));\n" + "uint dw1 = UINT_8888(ExtractB(sample4), ExtractB(sample5), ExtractB(sample6), ExtractB(sample7));\n" + "uint dw2 = UINT_8888(ExtractB(sample8), ExtractB(sample9), ExtractB(sampleA), ExtractB(sampleB));\n" + "uint dw3 = UINT_8888(ExtractB(sampleC), ExtractB(sampleD), ExtractB(sampleE), ExtractB(sampleF));\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"uint4 Generate_B(uint2 cacheCoord)\n" +"{\n" + "uint2 blockCoord = cacheCoord / uint2(2,1);\n" + + "uint2 blockUL = blockCoord * uint2(4,4);\n" + "uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n" + + "uint sample0 = SampleEFB(subBlockUL+uint2(0,0));\n" + "uint sample1 = SampleEFB(subBlockUL+uint2(1,0));\n" + "uint sample2 = SampleEFB(subBlockUL+uint2(2,0));\n" + "uint sample3 = SampleEFB(subBlockUL+uint2(3,0));\n" + "uint sample4 = SampleEFB(subBlockUL+uint2(0,1));\n" + "uint sample5 = SampleEFB(subBlockUL+uint2(1,1));\n" + "uint sample6 = SampleEFB(subBlockUL+uint2(2,1));\n" + "uint sample7 = SampleEFB(subBlockUL+uint2(3,1));\n" + + "uint dw0 = UINT_8888(ExtractG(sample0), ExtractR(sample0), ExtractG(sample1), ExtractR(sample1));\n" + "uint dw1 = UINT_8888(ExtractG(sample2), ExtractR(sample2), ExtractG(sample3), ExtractR(sample3));\n" + "uint dw2 = UINT_8888(ExtractG(sample4), ExtractR(sample4), ExtractG(sample5), ExtractR(sample5));\n" + "uint dw3 = UINT_8888(ExtractG(sample6), ExtractR(sample6), ExtractG(sample7), ExtractR(sample7));\n" + + "return uint4(Swap32(dw0), Swap32(dw1), Swap32(dw2), Swap32(dw3));\n" +"}\n" + +"#ifdef DYNAMIC_MODE\n" +"interface iGenerator\n" +"{\n" + "uint4 Generate(uint2 cacheCoord);\n" +"};\n" + +"class cGenerator_4 : iGenerator\n" +"{\n" + "uint4 Generate(uint2 cacheCoord)\n" + "{ return Generate_4(cacheCoord); }\n" +"};\n" + +"class cGenerator_5 : iGenerator\n" +"{\n" + "uint4 Generate(uint2 cacheCoord)\n" + "{ return Generate_5(cacheCoord); }\n" +"};\n" + +"class cGenerator_6 : iGenerator\n" +"{\n" + "uint4 Generate(uint2 cacheCoord)\n" + "{ return Generate_6(cacheCoord); }\n" +"};\n" + +"class cGenerator_8 : iGenerator\n" +"{\n" + "uint4 Generate(uint2 cacheCoord)\n" + "{ return Generate_8(cacheCoord); }\n" +"};\n" + +"class cGenerator_B : iGenerator\n" +"{\n" + "uint4 Generate(uint2 cacheCoord)\n" + "{ return Generate_B(cacheCoord); }\n" +"};\n" + +// Declare generator interface; must be set by application +"iGenerator g_generator;\n" +"#define IMP_GENERATOR g_generator.Generate\n" + +"#endif\n" + +"#ifndef IMP_GENERATOR\n" +"#error No generator specified\n" +"#endif\n" + +"void main(out uint4 ocol0 : SV_Target, in float4 Pos : SV_Position, in float2 fCacheCoord : ENCODECOORD)\n" +"{\n" + "uint2 cacheCoord = uint2(fCacheCoord);\n" + "ocol0 = IMP_GENERATOR(cacheCoord);\n" +"}\n" +; + +PSTextureEncoder::PSTextureEncoder() + : m_ready(false), m_out(NULL), m_outRTV(NULL), m_outStage(NULL), + m_encodeParams(NULL), + m_quad(NULL), m_vShader(NULL), m_quadLayout(NULL), + m_efbEncodeBlendState(NULL), m_efbEncodeDepthState(NULL), + m_efbEncodeRastState(NULL), m_efbSampler(NULL), + m_dynamicShader(NULL), m_classLinkage(NULL) +{ + for (size_t i = 0; i < 4; ++i) + m_fetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_scaledFetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_intensityClass[i] = NULL; + for (size_t i = 0; i < 16; ++i) + m_generatorClass[i] = NULL; +} + +static const D3D11_INPUT_ELEMENT_DESC QUAD_LAYOUT_DESC[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 } +}; + +static const struct QuadVertex +{ + float posX; + float posY; +} QUAD_VERTS[4] = { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } }; + +void PSTextureEncoder::Init() +{ + m_ready = false; + + HRESULT hr; + + // Create output texture RGBA format + + // This format allows us to generate one cache line in two pixels. + D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC( + DXGI_FORMAT_R32G32B32A32_UINT, + EFB_WIDTH, EFB_HEIGHT/4, 1, 1, D3D11_BIND_RENDER_TARGET); + hr = D3D::device->CreateTexture2D(&t2dd, NULL, &m_out); + CHECK(SUCCEEDED(hr), "create efb encode output texture"); + D3D::SetDebugObjectName(m_out, "efb encoder output texture"); + + // Create output render target view + + D3D11_RENDER_TARGET_VIEW_DESC rtvd = CD3D11_RENDER_TARGET_VIEW_DESC(m_out, + D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R32G32B32A32_UINT); + hr = D3D::device->CreateRenderTargetView(m_out, &rtvd, &m_outRTV); + CHECK(SUCCEEDED(hr), "create efb encode output render target view"); + D3D::SetDebugObjectName(m_outRTV, "efb encoder output rtv"); + + // Create output staging buffer + + t2dd.Usage = D3D11_USAGE_STAGING; + t2dd.BindFlags = 0; + t2dd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + hr = D3D::device->CreateTexture2D(&t2dd, NULL, &m_outStage); + CHECK(SUCCEEDED(hr), "create efb encode output staging buffer"); + D3D::SetDebugObjectName(m_outStage, "efb encoder output staging buffer"); + + // Create constant buffer for uploading data to shaders + + D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), + D3D11_BIND_CONSTANT_BUFFER); + hr = D3D::device->CreateBuffer(&bd, NULL, &m_encodeParams); + CHECK(SUCCEEDED(hr), "create efb encode params buffer"); + D3D::SetDebugObjectName(m_encodeParams, "efb encoder params buffer"); + + // Create vertex quad + + bd = CD3D11_BUFFER_DESC(sizeof(QUAD_VERTS), D3D11_BIND_VERTEX_BUFFER, + D3D11_USAGE_IMMUTABLE); + D3D11_SUBRESOURCE_DATA srd = { QUAD_VERTS, 0, 0 }; + + hr = D3D::device->CreateBuffer(&bd, &srd, &m_quad); + CHECK(SUCCEEDED(hr), "create efb encode quad vertex buffer"); + D3D::SetDebugObjectName(m_quad, "efb encoder quad vertex buffer"); + + // Create vertex shader + + D3DBlob* bytecode = NULL; + if (!D3D::CompileVertexShader(EFB_ENCODE_VS, sizeof(EFB_ENCODE_VS), &bytecode)) + { + ERROR_LOG(VIDEO, "EFB encode vertex shader failed to compile"); + return; + } + + hr = D3D::device->CreateVertexShader(bytecode->Data(), bytecode->Size(), NULL, &m_vShader); + CHECK(SUCCEEDED(hr), "create efb encode vertex shader"); + D3D::SetDebugObjectName(m_vShader, "efb encoder vertex shader"); + + // Create input layout for vertex quad using bytecode from vertex shader + + hr = D3D::device->CreateInputLayout(QUAD_LAYOUT_DESC, + sizeof(QUAD_LAYOUT_DESC)/sizeof(D3D11_INPUT_ELEMENT_DESC), + bytecode->Data(), bytecode->Size(), &m_quadLayout); + CHECK(SUCCEEDED(hr), "create efb encode quad vertex layout"); + D3D::SetDebugObjectName(m_quadLayout, "efb encoder quad layout"); + + bytecode->Release(); + + // Create pixel shader + +#ifdef USE_DYNAMIC_MODE + if (!InitDynamicMode()) +#else + if (!InitStaticMode()) +#endif + return; + + // Create blend state + + D3D11_BLEND_DESC bld = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); + hr = D3D::device->CreateBlendState(&bld, &m_efbEncodeBlendState); + CHECK(SUCCEEDED(hr), "create efb encode blend state"); + D3D::SetDebugObjectName(m_efbEncodeBlendState, "efb encoder blend state"); + + // Create depth state + + D3D11_DEPTH_STENCIL_DESC dsd = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); + dsd.DepthEnable = FALSE; + hr = D3D::device->CreateDepthStencilState(&dsd, &m_efbEncodeDepthState); + CHECK(SUCCEEDED(hr), "create efb encode depth state"); + D3D::SetDebugObjectName(m_efbEncodeDepthState, "efb encoder depth state"); + + // Create rasterizer state + + D3D11_RASTERIZER_DESC rd = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); + rd.CullMode = D3D11_CULL_NONE; + rd.DepthClipEnable = FALSE; + hr = D3D::device->CreateRasterizerState(&rd, &m_efbEncodeRastState); + CHECK(SUCCEEDED(hr), "create efb encode rast state"); + D3D::SetDebugObjectName(m_efbEncodeRastState, "efb encoder rast state"); + + // Create efb texture sampler + + D3D11_SAMPLER_DESC sd = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); + sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + hr = D3D::device->CreateSamplerState(&sd, &m_efbSampler); + CHECK(SUCCEEDED(hr), "create efb encode texture sampler"); + D3D::SetDebugObjectName(m_efbSampler, "efb encoder texture sampler"); + + m_ready = true; +} + +void PSTextureEncoder::Shutdown() +{ + m_ready = false; + + for (size_t i = 0; i < 4; ++i) + SAFE_RELEASE(m_fetchClass[i]); + for (size_t i = 0; i < 2; ++i) + SAFE_RELEASE(m_scaledFetchClass[i]); + for (size_t i = 0; i < 2; ++i) + SAFE_RELEASE(m_intensityClass[i]); + for (size_t i = 0; i < 16; ++i) + SAFE_RELEASE(m_generatorClass[i]); + m_linkageArray.clear(); + + SAFE_RELEASE(m_classLinkage); + SAFE_RELEASE(m_dynamicShader); + + for (ComboMap::iterator it = m_staticShaders.begin(); + it != m_staticShaders.end(); ++it) + { + SAFE_RELEASE(it->second); + } + m_staticShaders.clear(); + + SAFE_RELEASE(m_efbSampler); + SAFE_RELEASE(m_efbEncodeRastState); + SAFE_RELEASE(m_efbEncodeDepthState); + SAFE_RELEASE(m_efbEncodeBlendState); + SAFE_RELEASE(m_quadLayout); + SAFE_RELEASE(m_vShader); + SAFE_RELEASE(m_quad); + SAFE_RELEASE(m_encodeParams); + SAFE_RELEASE(m_outStage); + SAFE_RELEASE(m_outRTV); + SAFE_RELEASE(m_out); +} + +size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, + bool scaleByHalf) +{ + if (!m_ready) // Make sure we initialized OK + return 0; + + HRESULT hr; + + unsigned int blockW = BLOCK_WIDTHS[dstFormat]; + unsigned int blockH = BLOCK_HEIGHTS[dstFormat]; + + // Round up source dims to multiple of block size + unsigned int actualWidth = srcRect.GetWidth() / (scaleByHalf ? 2 : 1); + actualWidth = (actualWidth + blockW-1) & ~(blockW-1); + unsigned int actualHeight = srcRect.GetHeight() / (scaleByHalf ? 2 : 1); + actualHeight = (actualHeight + blockH-1) & ~(blockH-1); + + unsigned int numBlocksX = actualWidth/blockW; + unsigned int numBlocksY = actualHeight/blockH; + + unsigned int cacheLinesPerRow; + if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one + cacheLinesPerRow = numBlocksX*2; + else + cacheLinesPerRow = numBlocksX; + CHECK(cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check"); + + unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY; + CHECK(totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check"); + + size_t encodeSize = 0; + + // Reset API + + g_renderer->ResetAPIState(); + + // Set up all the state for EFB encoding + +#ifdef USE_DYNAMIC_MODE + if (SetDynamicShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) +#else + if (SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) +#endif + { + D3D::context->VSSetShader(m_vShader, NULL, 0); + + D3D::stateman->PushBlendState(m_efbEncodeBlendState); + D3D::stateman->PushDepthState(m_efbEncodeDepthState); + D3D::stateman->PushRasterizerState(m_efbEncodeRastState); + D3D::stateman->Apply(); + + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2), FLOAT(numBlocksY)); + D3D::context->RSSetViewports(1, &vp); + + D3D::context->IASetInputLayout(m_quadLayout); + D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + UINT stride = sizeof(QuadVertex); + UINT offset = 0; + D3D::context->IASetVertexBuffers(0, 1, &m_quad, &stride, &offset); + + EFBRectangle fullSrcRect; + fullSrcRect.left = 0; + fullSrcRect.top = 0; + fullSrcRect.right = EFB_WIDTH; + fullSrcRect.bottom = EFB_HEIGHT; + TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); + + EFBEncodeParams params = { 0 }; + params.NumHalfCacheLinesX = cacheLinesPerRow*2; + params.NumBlocksY = numBlocksY; + params.PosX = srcRect.left; + params.PosY = srcRect.top; + params.TexLeft = float(targetRect.left) / g_renderer->GetFullTargetWidth(); + params.TexTop = float(targetRect.top) / g_renderer->GetFullTargetHeight(); + params.TexRight = float(targetRect.right) / g_renderer->GetFullTargetWidth(); + params.TexBottom = float(targetRect.bottom) / g_renderer->GetFullTargetHeight(); + D3D::context->UpdateSubresource(m_encodeParams, 0, NULL, ¶ms, 0, 0); + + D3D::context->VSSetConstantBuffers(0, 1, &m_encodeParams); + + D3D::context->OMSetRenderTargets(1, &m_outRTV, NULL); + + ID3D11ShaderResourceView* pEFB = (srcFormat == PIXELFMT_Z24) ? + FramebufferManager::GetEFBDepthTexture()->GetSRV() : + FramebufferManager::GetEFBColorTexture()->GetSRV(); + + D3D::context->PSSetConstantBuffers(0, 1, &m_encodeParams); + D3D::context->PSSetShaderResources(0, 1, &pEFB); + D3D::context->PSSetSamplers(0, 1, &m_efbSampler); + + // Encode! + + D3D::context->Draw(4, 0); + + // Copy to staging buffer + + D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2, numBlocksY, 1); + D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); + + // Clean up state + + IUnknown* nullDummy = NULL; + + D3D::context->PSSetSamplers(0, 1, (ID3D11SamplerState**)&nullDummy); + D3D::context->PSSetShaderResources(0, 1, (ID3D11ShaderResourceView**)&nullDummy); + D3D::context->PSSetConstantBuffers(0, 1, (ID3D11Buffer**)&nullDummy); + + D3D::context->OMSetRenderTargets(0, NULL, NULL); + + D3D::context->VSSetConstantBuffers(0, 1, (ID3D11Buffer**)&nullDummy); + + D3D::stateman->PopRasterizerState(); + D3D::stateman->PopDepthState(); + D3D::stateman->PopBlendState(); + + D3D::context->PSSetShader(NULL, NULL, 0); + D3D::context->VSSetShader(NULL, NULL, 0); + + // Transfer staging buffer to GameCube/Wii RAM + + D3D11_MAPPED_SUBRESOURCE map = { 0 }; + hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); + CHECK(SUCCEEDED(hr), "map staging buffer"); + + u8* src = (u8*)map.pData; + for (unsigned int y = 0; y < numBlocksY; ++y) + { + memcpy(dst, src, cacheLinesPerRow*32); + dst += bpmem.copyMipMapStrideChannels*32; + src += map.RowPitch; + } + + D3D::context->Unmap(m_outStage, 0); + + encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY; + } + + // Restore API + + g_renderer->RestoreAPIState(); + D3D::context->OMSetRenderTargets(1, + &FramebufferManager::GetEFBColorTexture()->GetRTV(), + FramebufferManager::GetEFBDepthTexture()->GetDSV()); + + return encodeSize; +} + +bool PSTextureEncoder::InitStaticMode() +{ + // Nothing to really do. + return true; +} + +static const char* FETCH_FUNC_NAMES[4] = { + "Fetch_0", "Fetch_1", "Fetch_2", "Fetch_3" +}; + +static const char* SCALEDFETCH_FUNC_NAMES[2] = { + "ScaledFetch_0", "ScaledFetch_1" +}; + +static const char* INTENSITY_FUNC_NAMES[2] = { + "Intensity_0", "Intensity_1" +}; + +bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf) +{ + size_t fetchNum = srcFormat; + size_t scaledFetchNum = scaleByHalf ? 1 : 0; + size_t intensityNum = isIntensity ? 1 : 0; + size_t generatorNum = dstFormat; + + ComboKey key = MakeComboKey(dstFormat, srcFormat, isIntensity, scaleByHalf); + + ComboMap::iterator it = m_staticShaders.find(key); + if (it == m_staticShaders.end()) + { + const char* generatorFuncName = NULL; + switch (generatorNum) + { + case 0x0: generatorFuncName = "Generate_0"; break; + case 0x4: generatorFuncName = "Generate_4"; break; + case 0x5: generatorFuncName = "Generate_5"; break; + case 0x6: generatorFuncName = "Generate_6"; break; + case 0x7: generatorFuncName = "Generate_7"; break; + case 0x8: generatorFuncName = "Generate_8"; break; + case 0xA: generatorFuncName = "Generate_A"; break; + case 0xB: generatorFuncName = "Generate_B"; break; + default: + WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); + m_staticShaders[key] = NULL; + return false; + } + + // Shader permutation not found, so compile it + D3DBlob* bytecode = NULL; + D3D_SHADER_MACRO macros[] = { + { "IMP_FETCH", FETCH_FUNC_NAMES[fetchNum] }, + { "IMP_SCALEDFETCH", SCALEDFETCH_FUNC_NAMES[scaledFetchNum] }, + { "IMP_INTENSITY", INTENSITY_FUNC_NAMES[intensityNum] }, + { "IMP_GENERATOR", generatorFuncName }, + { NULL, NULL } + }; + if (!D3D::CompilePixelShader(EFB_ENCODE_PS, sizeof(EFB_ENCODE_PS), &bytecode, macros)) + { + WARN_LOG(VIDEO, "EFB encoder shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d failed to compile", + dstFormat, srcFormat, isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); + // Add dummy shader to map to prevent trying to compile over and + // over again + m_staticShaders[key] = NULL; + return false; + } + + ID3D11PixelShader* newShader; + HRESULT hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), NULL, &newShader); + CHECK(SUCCEEDED(hr), "create efb encoder pixel shader"); + + m_staticShaders[key] = newShader; + bytecode->Release(); + } + + if (it != m_staticShaders.end()) + { + if (it->second) + { + D3D::context->PSSetShader(it->second, NULL, 0); + return true; + } + else + return false; + } + else + return false; +} + +bool PSTextureEncoder::InitDynamicMode() +{ + HRESULT hr; + + D3D_SHADER_MACRO macros[] = { + { "DYNAMIC_MODE", NULL }, + { NULL, NULL } + }; + + D3DBlob* bytecode = NULL; + if (!D3D::CompilePixelShader(EFB_ENCODE_PS, sizeof(EFB_ENCODE_PS), &bytecode, macros)) + { + ERROR_LOG(VIDEO, "EFB encode pixel shader failed to compile"); + return false; + } + + hr = D3D::device->CreateClassLinkage(&m_classLinkage); + CHECK(SUCCEEDED(hr), "create efb encode class linkage"); + D3D::SetDebugObjectName(m_classLinkage, "efb encoder class linkage"); + + hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), m_classLinkage, &m_dynamicShader); + CHECK(SUCCEEDED(hr), "create efb encode pixel shader"); + D3D::SetDebugObjectName(m_dynamicShader, "efb encoder pixel shader"); + + // Use D3DReflect + + ID3D11ShaderReflection* reflect = NULL; + hr = PD3DReflect(bytecode->Data(), bytecode->Size(), IID_ID3D11ShaderReflection, (void**)&reflect); + CHECK(SUCCEEDED(hr), "reflect on efb encoder shader"); + + // Get number of slots and create dynamic linkage array + + UINT numSlots = reflect->GetNumInterfaceSlots(); + m_linkageArray.resize(numSlots, NULL); + + // Get interface slots + + ID3D11ShaderReflectionVariable* var = reflect->GetVariableByName("g_fetch"); + m_fetchSlot = var->GetInterfaceSlot(0); + + var = reflect->GetVariableByName("g_scaledFetch"); + m_scaledFetchSlot = var->GetInterfaceSlot(0); + + var = reflect->GetVariableByName("g_intensity"); + m_intensitySlot = var->GetInterfaceSlot(0); + + var = reflect->GetVariableByName("g_generator"); + m_generatorSlot = var->GetInterfaceSlot(0); + + INFO_LOG(VIDEO, "fetch slot %d, scaledFetch slot %d, intensity slot %d, generator slot %d", + m_fetchSlot, m_scaledFetchSlot, m_intensitySlot, m_generatorSlot); + + // Class instances will be created at the time they are used + + for (size_t i = 0; i < 4; ++i) + m_fetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_scaledFetchClass[i] = NULL; + for (size_t i = 0; i < 2; ++i) + m_intensityClass[i] = NULL; + for (size_t i = 0; i < 16; ++i) + m_generatorClass[i] = NULL; + + reflect->Release(); + bytecode->Release(); + + return true; +} + +static const char* FETCH_CLASS_NAMES[4] = { + "cFetch_0", "cFetch_1", "cFetch_2", "cFetch_3" +}; + +static const char* SCALEDFETCH_CLASS_NAMES[2] = { + "cScaledFetch_0", "cScaledFetch_1" +}; + +static const char* INTENSITY_CLASS_NAMES[2] = { + "cIntensity_0", "cIntensity_1" +}; + +bool PSTextureEncoder::SetDynamicShader(unsigned int dstFormat, + unsigned int srcFormat, bool isIntensity, bool scaleByHalf) +{ + size_t fetchNum = srcFormat; + size_t scaledFetchNum = scaleByHalf ? 1 : 0; + size_t intensityNum = isIntensity ? 1 : 0; + size_t generatorNum = dstFormat; + + // FIXME: Not all the possible generators are available as classes yet. + // When dynamic mode is usable, implement them. + const char* generatorName = NULL; + switch (generatorNum) + { + case 0x4: generatorName = "cGenerator_4"; break; + case 0x5: generatorName = "cGenerator_5"; break; + case 0x6: generatorName = "cGenerator_6"; break; + case 0x8: generatorName = "cGenerator_8"; break; + case 0xB: generatorName = "cGenerator_B"; break; + default: + WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); + return false; + } + + // Make sure class instances are available + if (!m_fetchClass[fetchNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + FETCH_CLASS_NAMES[fetchNum], dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + FETCH_CLASS_NAMES[fetchNum], 0, 0, 0, 0, &m_fetchClass[fetchNum]); + CHECK(SUCCEEDED(hr), "create fetch class instance"); + } + if (!m_scaledFetchClass[scaledFetchNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + SCALEDFETCH_CLASS_NAMES[scaledFetchNum], dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + SCALEDFETCH_CLASS_NAMES[scaledFetchNum], 0, 0, 0, 0, + &m_scaledFetchClass[scaledFetchNum]); + CHECK(SUCCEEDED(hr), "create scaled fetch class instance"); + } + if (!m_intensityClass[intensityNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + INTENSITY_CLASS_NAMES[intensityNum], dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + INTENSITY_CLASS_NAMES[intensityNum], 0, 0, 0, 0, + &m_intensityClass[intensityNum]); + CHECK(SUCCEEDED(hr), "create intensity class instance"); + } + if (!m_generatorClass[generatorNum]) + { + INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", + generatorName, dstFormat); + HRESULT hr = m_classLinkage->CreateClassInstance( + generatorName, 0, 0, 0, 0, &m_generatorClass[generatorNum]); + CHECK(SUCCEEDED(hr), "create generator class instance"); + } + + // Assemble dynamic linkage array + if (m_fetchSlot != UINT(-1)) + m_linkageArray[m_fetchSlot] = m_fetchClass[fetchNum]; + if (m_scaledFetchSlot != UINT(-1)) + m_linkageArray[m_scaledFetchSlot] = m_scaledFetchClass[scaledFetchNum]; + if (m_intensitySlot != UINT(-1)) + m_linkageArray[m_intensitySlot] = m_intensityClass[intensityNum]; + if (m_generatorSlot != UINT(-1)) + m_linkageArray[m_generatorSlot] = m_generatorClass[generatorNum]; + + D3D::context->PSSetShader(m_dynamicShader, + m_linkageArray.empty() ? NULL : &m_linkageArray[0], + (UINT)m_linkageArray.size()); + + return true; +} + +} diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h new file mode 100644 index 0000000000..4fdbfb4ba5 --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.h @@ -0,0 +1,119 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _PSTEXTUREENCODER_H +#define _PSTEXTUREENCODER_H + +#include "TextureEncoder.h" + +struct ID3D11Texture2D; +struct ID3D11RenderTargetView; +struct ID3D11Buffer; +struct ID3D11InputLayout; +struct ID3D11VertexShader; +struct ID3D11PixelShader; +struct ID3D11ClassLinkage; +struct ID3D11ClassInstance; +struct ID3D11BlendState; +struct ID3D11DepthStencilState; +struct ID3D11RasterizerState; +struct ID3D11SamplerState; + +namespace DX11 +{ + +class PSTextureEncoder : public TextureEncoder +{ + +public: + + PSTextureEncoder(); + + void Init(); + void Shutdown(); + size_t Encode(u8* dst, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, + bool scaleByHalf); + +private: + + bool m_ready; + + ID3D11Texture2D* m_out; + ID3D11RenderTargetView* m_outRTV; + ID3D11Texture2D* m_outStage; + ID3D11Buffer* m_encodeParams; + ID3D11Buffer* m_quad; + ID3D11VertexShader* m_vShader; + ID3D11InputLayout* m_quadLayout; + ID3D11BlendState* m_efbEncodeBlendState; + ID3D11DepthStencilState* m_efbEncodeDepthState; + ID3D11RasterizerState* m_efbEncodeRastState; + ID3D11SamplerState* m_efbSampler; + + // Stuff only used in static-linking mode (SM4.0-compatible) + + bool InitStaticMode(); + bool SetStaticShader(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf); + + typedef unsigned int ComboKey; // Key for a shader combination + + ComboKey MakeComboKey(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf) + { + return (dstFormat << 4) | (srcFormat << 2) | (isIntensity ? (1<<1) : 0) + | (scaleByHalf ? (1<<0) : 0); + } + + typedef std::map ComboMap; + + ComboMap m_staticShaders; + + // Stuff only used for dynamic-linking mode (SM5.0+, available as soon as + // Microsoft fixes their bloody HLSL compiler) + + bool InitDynamicMode(); + bool SetDynamicShader(unsigned int dstFormat, unsigned int srcFormat, + bool isIntensity, bool scaleByHalf); + + ID3D11PixelShader* m_dynamicShader; + ID3D11ClassLinkage* m_classLinkage; + + // Interface slots + UINT m_fetchSlot; + UINT m_scaledFetchSlot; + UINT m_intensitySlot; + UINT m_generatorSlot; + + // Class instances + // Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z + ID3D11ClassInstance* m_fetchClass[4]; + // ScaledFetch: 0 is off, 1 is on + ID3D11ClassInstance* m_scaledFetchClass[2]; + // Intensity: 0 is off, 1 is on + ID3D11ClassInstance* m_intensityClass[2]; + // Generator: one for each dst format, 16 total + ID3D11ClassInstance* m_generatorClass[16]; + + std::vector m_linkageArray; + +}; + +} + +#endif diff --git a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp index 6087eac65c..7b1632639b 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp @@ -23,12 +23,17 @@ #include "PixelShaderCache.h" #include "TextureCache.h" #include "VertexShaderCache.h" +#include "TextureEncoder.h" +#include "PSTextureEncoder.h" +#include "HW/Memmap.h" +#include "VideoConfig.h" namespace DX11 { -#define MAX_COPY_BUFFERS 25 -ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {}; +static TextureEncoder* g_encoder = NULL; +const size_t MAX_COPY_BUFFERS = 25; +ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = { 0 }; TextureCache::TCacheEntry::~TCacheEntry() { @@ -92,48 +97,70 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(unsigned int width, return entry; } -void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float colmat[], const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt) +void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat) { - g_renderer->ResetAPIState(); - // stretch picture with increased internal resolution - const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)virtualW, (float)virtualH); - D3D::context->RSSetViewports(1, &vp); - - // set transformation - if (NULL == efbcopycbuf[cbufid]) + if (!isDynamic || g_ActiveConfig.bCopyEFBToTexture) { - const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); - D3D11_SUBRESOURCE_DATA data; - data.pSysMem = colmat; - HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]); - CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbufid); - D3D::SetDebugObjectName((ID3D11DeviceChild*)efbcopycbuf[cbufid], "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); - } - D3D::context->PSSetConstantBuffers(0, 1, &efbcopycbuf[cbufid]); + g_renderer->ResetAPIState(); - const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); - // TODO: try targetSource.asRECT(); - const D3D11_RECT sourcerect = CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); + // stretch picture with increased internal resolution + const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)virtualW, (float)virtualH); + D3D::context->RSSetViewports(1, &vp); - // Use linear filtering if (bScaleByHalf), use point filtering otherwise - if (bScaleByHalf) - D3D::SetLinearCopySampler(); - else - D3D::SetPointCopySampler(); + // set transformation + if (NULL == efbcopycbuf[cbufid]) + { + const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); + D3D11_SUBRESOURCE_DATA data; + data.pSysMem = colmat; + HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]); + CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbufid); + D3D::SetDebugObjectName((ID3D11DeviceChild*)efbcopycbuf[cbufid], "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); + } + D3D::context->PSSetConstantBuffers(0, 1, &efbcopycbuf[cbufid]); - D3D::context->OMSetRenderTargets(1, &texture->GetRTV(), NULL); + const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); + // TODO: try targetSource.asRECT(); + const D3D11_RECT sourcerect = CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); - D3D::drawShadedTexQuad( - (bFromZBuffer) ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : FramebufferManager::GetEFBColorTexture()->GetSRV(), - &sourcerect, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), - (bFromZBuffer) ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true), - VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); + // Use linear filtering if (bScaleByHalf), use point filtering otherwise + if (scaleByHalf) + D3D::SetLinearCopySampler(); + else + D3D::SetPointCopySampler(); - D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); + D3D::context->OMSetRenderTargets(1, &texture->GetRTV(), NULL); + + // Create texture copy + D3D::drawShadedTexQuad( + (srcFormat == PIXELFMT_Z24) ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : FramebufferManager::GetEFBColorTexture()->GetSRV(), + &sourcerect, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), + (srcFormat == PIXELFMT_Z24) ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true), + VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); + + D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); - g_renderer->RestoreAPIState(); + g_renderer->RestoreAPIState(); + } + + if (!g_ActiveConfig.bCopyEFBToTexture) + { + u8* dst = Memory::GetPointer(dstAddr); + size_t encodeSize = g_encoder->Encode(dst, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf); + hash = GetHash64(dst, encodeSize, g_ActiveConfig.iSafeTextureCache_ColorSamples); + if (g_ActiveConfig.bEFBCopyCacheEnable) + { + // If the texture in RAM is already in the texture cache, + // do not copy it again as it has not changed. + if (TextureCache::Find(dstAddr, hash)) + return; + } + + TextureCache::MakeRangeDynamic(dstAddr, encodeSize); + } } TextureCache::TCacheEntryBase* TextureCache::CreateRenderTargetTexture( @@ -146,12 +173,19 @@ TextureCache::TCacheEntryBase* TextureCache::CreateRenderTargetTexture( TextureCache::TextureCache() { + // FIXME: Is it safe here? + g_encoder = new PSTextureEncoder; + g_encoder->Init(); } TextureCache::~TextureCache() { for (unsigned int k = 0; k < MAX_COPY_BUFFERS; ++k) SAFE_RELEASE(efbcopycbuf[k]); + + g_encoder->Shutdown(); + delete g_encoder; + g_encoder = NULL; } } diff --git a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.h b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.h index 3e229b671c..fdae05bd30 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.h @@ -43,9 +43,10 @@ private: void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int levels, bool autogen_mips = false); - void FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float* colmat, const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt); + void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat); void Bind(unsigned int stage); bool Save(const char filename[]); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h b/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h new file mode 100644 index 0000000000..203815a57f --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureEncoder.h @@ -0,0 +1,90 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _TEXTUREENCODER_H +#define _TEXTUREENCODER_H + +#include "VideoCommon.h" + +namespace DX11 +{ + +// 4-bit format: 8x8 texels / cache line +// 8-bit format: 8x4 texels / cache line +// 16-bit format: 4x4 texels / cache line +// 32-bit format: 4x4 texels / 2 cache lines +// Compressed format: 8x8 texels / cache line + +const unsigned int BLOCK_WIDTHS[16] = { + 8, // R4 + 8, // R8 (FIXME: duplicate of R8 below?) + 8, // A4 R4 + 4, // A8 R8 + 4, // R5 G6 B5 + 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 + 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) + 8, // A8 + 8, // R8 (FIXME: duplicate of R8 above?) + 8, // G8 + 8, // B8 + 4, // G8 R8 + 4, // B8 G8 + 0, 0, 0 // Unknown formats +}; + +const unsigned int BLOCK_HEIGHTS[16] = { + 8, // R4 + 4, // R8 (FIXME: duplicate of R8 below?) + 4, // A4 R4 + 4, // A8 R8 + 4, // R5 G6 B5 + 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 + 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) + 4, // A8 + 4, // R8 (FIXME: duplicate of R8 above?) + 4, // G8 + 4, // B8 + 4, // G8 R8 + 4, // B8 G8 + 0, 0, 0 // Unknown formats +}; + +// Maximum number of bytes that can occur in a texture block-row generated by +// the encoder +static const UINT MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH/4)*64; +// The maximum amount of data that the texture encoder can generate in one call +static const UINT MAX_BYTES_PER_ENCODE = MAX_BYTES_PER_BLOCK_ROW*(EFB_HEIGHT/4); + +class TextureEncoder +{ + +public: + + virtual ~TextureEncoder() { } + + virtual void Init() = 0; + virtual void Shutdown() = 0; + // Returns size in bytes of encoded block of memory + virtual size_t Encode(u8* dst, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity, + bool scaleByHalf) = 0; + +}; + +} + +#endif diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index 9fc36929b3..cac0a429c9 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -81,7 +81,7 @@ void InitBackendInfo() { g_Config.backend_info.APIType = API_D3D11; g_Config.backend_info.bUseRGBATextures = true; // the GX formats barely match any D3D11 formats - g_Config.backend_info.bSupportsEFBToRAM = false; + g_Config.backend_info.bSupportsEFBToRAM = true; g_Config.backend_info.bSupportsRealXFB = false; g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bAllowSignedBytes = true; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 03eeefd169..b7b77e6d9b 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -69,11 +69,12 @@ void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height, // D3D9 will automatically generate mip maps if necessary } -void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float *colmat, const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt) +void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat) { - const LPDIRECT3DTEXTURE9 read_texture = bFromZBuffer ? + const LPDIRECT3DTEXTURE9 read_texture = (srcFormat == PIXELFMT_Z24) ? FramebufferManager::GetEFBDepthTexture() : FramebufferManager::GetEFBColorTexture(); @@ -101,16 +102,16 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB destrect.top = 0; PixelShaderManager::SetColorMatrix(colmat); // set transformation - TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); + TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); RECT sourcerect; sourcerect.bottom = targetSource.bottom; sourcerect.left = targetSource.left; sourcerect.right = targetSource.right; sourcerect.top = targetSource.top; - if (bFromZBuffer) + if (srcFormat == PIXELFMT_Z24) { - if (bScaleByHalf || g_ActiveConfig.iMultisampleMode) + if (scaleByHalf || g_ActiveConfig.iMultisampleMode) { D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); @@ -134,7 +135,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), virtualW, virtualH, // TODO: why is D3DFMT_D24X8 singled out here? why not D3DFMT_D24X4S4/D24S8/D24FS8/D32/D16/D15S1 too, or none of them? - PixelShaderCache::GetDepthMatrixProgram(SSAAMode, bFromZBuffer && bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8), + PixelShaderCache::GetDepthMatrixProgram(SSAAMode, (srcFormat == PIXELFMT_Z24) && bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8), VertexShaderCache::GetSimpleVertexShader(SSAAMode)); Rendersurf->Release(); @@ -147,11 +148,11 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB read_texture, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), - bFromZBuffer, - bIsIntensityFmt, - copyfmt, - bScaleByHalf, - source_rect); + srcFormat == PIXELFMT_Z24, + isIntensity, + dstFormat, + scaleByHalf, + srcRect); } D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h index ef8cacfb9d..6234824c2e 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h @@ -46,9 +46,10 @@ private: void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int levels, bool autogen_mips = false); - void FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float* colmat, const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt); + void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat); void Bind(unsigned int stage); bool Save(const char filename[]); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp index 5eae3626bd..1270bda015 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp @@ -264,16 +264,17 @@ TextureCache::TCacheEntryBase* TextureCache::CreateRenderTargetTexture( return entry; } -void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float colmat[], const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt) +void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat) { glBindTexture(GL_TEXTURE_2D, texture); // Make sure to resolve anything we need to read from. - const GLuint read_texture = bFromZBuffer ? - FramebufferManager::ResolveAndGetDepthTarget(source_rect) : - FramebufferManager::ResolveAndGetRenderTarget(source_rect); + const GLuint read_texture = (srcFormat == PIXELFMT_Z24) ? + FramebufferManager::ResolveAndGetDepthTarget(srcRect) : + FramebufferManager::ResolveAndGetRenderTarget(srcRect); GL_REPORT_ERRORD(); @@ -295,11 +296,11 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB glViewport(0, 0, virtualW, virtualH); - PixelShaderCache::SetCurrentShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); + PixelShaderCache::SetCurrentShader((srcFormat == PIXELFMT_Z24) ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); PixelShaderManager::SetColorMatrix(colmat); // set transformation GL_REPORT_ERRORD(); - TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); + TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); glBegin(GL_QUADS); glTexCoord2f((GLfloat)targetSource.left, (GLfloat)targetSource.bottom); glVertex2f(-1, 1); @@ -319,11 +320,11 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB hash = TextureConverter::EncodeToRamFromTexture( addr, read_texture, - bFromZBuffer, - bIsIntensityFmt, - copyfmt, - bScaleByHalf, - source_rect); + srcFormat == PIXELFMT_Z24, + isIntensity, + dstFormat, + scaleByHalf, + srcRect); } FramebufferManager::SetFramebuffer(0); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.h b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.h index a835da798c..866fba33b3 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.h @@ -56,9 +56,10 @@ private: void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int level, bool autogen_mips = false); - void FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf, - unsigned int cbufid, const float colmat[], const EFBRectangle &source_rect, - bool bIsIntensityFmt, u32 copyfmt); + void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + unsigned int srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf, unsigned int cbufid, + const float *colmat); void Bind(unsigned int stage); bool Save(const char filename[]);