Merge pull request #1966 from magumagu/unify-efb-encode

Unify EFB encoding shader generation
This commit is contained in:
Markus Wick 2015-01-27 23:14:18 +01:00
commit beaa9905a6
4 changed files with 95 additions and 1306 deletions

File diff suppressed because it is too large Load Diff

View File

@ -24,9 +24,7 @@ namespace DX11
class PSTextureEncoder : public TextureEncoder class PSTextureEncoder : public TextureEncoder
{ {
public: public:
PSTextureEncoder(); PSTextureEncoder();
void Init(); void Init();
@ -36,25 +34,14 @@ public:
bool isIntensity, bool scaleByHalf); bool isIntensity, bool scaleByHalf);
private: private:
bool m_ready; bool m_ready;
ID3D11Texture2D* m_out; ID3D11Texture2D* m_out;
ID3D11RenderTargetView* m_outRTV; ID3D11RenderTargetView* m_outRTV;
ID3D11Texture2D* m_outStage; ID3D11Texture2D* m_outStage;
ID3D11Buffer* m_encodeParams; ID3D11Buffer* m_encodeParams;
ID3D11Buffer* m_quad;
ID3D11VertexShader* m_vShader;
ID3D11InputLayout* m_quadLayout;
ID3D11BlendState* m_efbEncodeBlendState;
ID3D11DepthStencilState* m_efbEncodeDepthState;
ID3D11RasterizerState* m_efbEncodeRastState;
ID3D11SamplerState* m_efbSampler;
// Stuff only used in static-linking mode (SM4.0-compatible) ID3D11PixelShader* SetStaticShader(unsigned int dstFormat,
bool InitStaticMode();
bool SetStaticShader(unsigned int dstFormat,
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf); PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
typedef unsigned int ComboKey; // Key for a shader combination typedef unsigned int ComboKey; // Key for a shader combination
@ -69,35 +56,6 @@ private:
typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap; typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap;
ComboMap m_staticShaders; ComboMap m_staticShaders;
// Stuff only used for dynamic-linking mode (SM5.0+, available as soon as
// Microsoft fixes their bloody HLSL compiler)
bool InitDynamicMode();
bool SetDynamicShader(unsigned int dstFormat,
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
ID3D11PixelShader* m_dynamicShader;
ID3D11ClassLinkage* m_classLinkage;
// Interface slots
UINT m_fetchSlot;
UINT m_scaledFetchSlot;
UINT m_intensitySlot;
UINT m_generatorSlot;
// Class instances
// Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z
ID3D11ClassInstance* m_fetchClass[4];
// ScaledFetch: 0 is off, 1 is on
ID3D11ClassInstance* m_scaledFetchClass[2];
// Intensity: 0 is off, 1 is on
ID3D11ClassInstance* m_intensityClass[2];
// Generator: one for each dst format, 16 total
ID3D11ClassInstance* m_generatorClass[16];
std::vector<ID3D11ClassInstance*> m_linkageArray;
}; };
} }

View File

@ -35,7 +35,7 @@ static GLuint s_texConvFrameBuffer[2] = {0,0};
static GLuint s_srcTexture = 0; // for decoding from RAM static GLuint s_srcTexture = 0; // for decoding from RAM
static GLuint s_dstTexture = 0; // for encoding to RAM static GLuint s_dstTexture = 0; // for encoding to RAM
const int renderBufferWidth = 1024; const int renderBufferWidth = EFB_WIDTH * 4;
const int renderBufferHeight = 1024; const int renderBufferHeight = 1024;
static SHADER s_rgbToYuyvProgram; static SHADER s_rgbToYuyvProgram;
@ -320,14 +320,16 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer,
source.left, source.top, source.left, source.top,
expandedWidth, bScaleByHalf ? 2 : 1); expandedWidth, bScaleByHalf ? 2 : 1);
int cacheBytes = 32; unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format);
unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format);
unsigned int cacheLinesPerRow;
if ((format & 0x0f) == 6) if ((format & 0x0f) == 6)
cacheBytes = 64; cacheLinesPerRow = numBlocksX * 2;
else
cacheLinesPerRow = numBlocksX;
int readStride = (expandedWidth * cacheBytes) /
TexDecoder_GetBlockWidthInTexels(format);
EncodeToRamUsingShader(source_texture, EncodeToRamUsingShader(source_texture,
dest_ptr, expandedWidth / samples, expandedHeight, readStride, dest_ptr, cacheLinesPerRow * 8, numBlocksY, cacheLinesPerRow * 32,
bScaleByHalf > 0 && !bFromZBuffer); bScaleByHalf > 0 && !bFromZBuffer);
return size_in_bytes; // TODO: D3D11 is calculating this value differently! return size_in_bytes; // TODO: D3D11 is calculating this value differently!

View File

@ -70,36 +70,38 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
WRITE(p, " out vec4 ocol0;\n"); WRITE(p, " out vec4 ocol0;\n");
WRITE(p, "void main()\n"); WRITE(p, "void main()\n");
}
else // D3D
{
WRITE(p,"sampler samp0 : register(s0);\n");
WRITE(p, "Texture2D Tex0 : register(t0);\n");
WRITE(p,"void main(\n");
WRITE(p," out float4 ocol0 : SV_Target)\n");
}
WRITE(p, "{\n" WRITE(p, "{\n"
" int2 sampleUv;\n" " int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n" " int2 uv1 = int2(gl_FragCoord.xy);\n"
); );
}
else // D3D
{
WRITE(p, "sampler samp0 : register(s0);\n");
WRITE(p, "Texture2D Tex0 : register(t0);\n");
WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1)); WRITE(p, "void main(\n");
WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1); WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n");
WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples)); WRITE(p, "{\n"
WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", IntLog2(blkH), ~(blkW - 1)); " int2 sampleUv;\n"
" int2 uv1 = int2(rawpos.xy);\n"
);
}
WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW));
WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH));
if (samples == 1) if (samples == 1)
{ {
// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments // With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
WRITE(p, " bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders WRITE(p, " bool first = (uv1.x & %d) == 0;\n", blkH * blkW / 2);
WRITE(p, " x_virtual_position = x_virtual_position << 1;\n"); samples = 2;
} }
WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1); WRITE(p, " int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1);
WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", IntLog2(blkW), blkH - 1); WRITE(p, " int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples));
WRITE(p, " int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1, IntLog2(samples));
WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n"); WRITE(p, " sampleUv.x = x_block_position + x_offset_in_block;\n");
WRITE(p, " sampleUv.y = y_block_position + y_offset;\n"); WRITE(p, " sampleUv.y = y_block_position + y_offset_in_block;\n");
WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords
WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel
@ -116,9 +118,18 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType) static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
{ {
if (ApiType == API_OPENGL)
{
WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n",
dest, xoffset, colorComp dest, xoffset, colorComp
); );
}
else
{
WRITE(p, " %s = Tex0.Sample(samp0, uv0 + float2(%d, 0) * sample_offset).%s;\n",
dest, xoffset, colorComp
);
}
} }
static void WriteColorToIntensity(char*& p, const char* src, const char* dest) static void WriteColorToIntensity(char*& p, const char* src, const char* dest)