Merge pull request #1966 from magumagu/unify-efb-encode
Unify EFB encoding shader generation
This commit is contained in:
commit
beaa9905a6
File diff suppressed because it is too large
Load Diff
|
@ -24,9 +24,7 @@ namespace DX11
|
||||||
|
|
||||||
class PSTextureEncoder : public TextureEncoder
|
class PSTextureEncoder : public TextureEncoder
|
||||||
{
|
{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
PSTextureEncoder();
|
PSTextureEncoder();
|
||||||
|
|
||||||
void Init();
|
void Init();
|
||||||
|
@ -36,25 +34,14 @@ public:
|
||||||
bool isIntensity, bool scaleByHalf);
|
bool isIntensity, bool scaleByHalf);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
bool m_ready;
|
bool m_ready;
|
||||||
|
|
||||||
ID3D11Texture2D* m_out;
|
ID3D11Texture2D* m_out;
|
||||||
ID3D11RenderTargetView* m_outRTV;
|
ID3D11RenderTargetView* m_outRTV;
|
||||||
ID3D11Texture2D* m_outStage;
|
ID3D11Texture2D* m_outStage;
|
||||||
ID3D11Buffer* m_encodeParams;
|
ID3D11Buffer* m_encodeParams;
|
||||||
ID3D11Buffer* m_quad;
|
|
||||||
ID3D11VertexShader* m_vShader;
|
|
||||||
ID3D11InputLayout* m_quadLayout;
|
|
||||||
ID3D11BlendState* m_efbEncodeBlendState;
|
|
||||||
ID3D11DepthStencilState* m_efbEncodeDepthState;
|
|
||||||
ID3D11RasterizerState* m_efbEncodeRastState;
|
|
||||||
ID3D11SamplerState* m_efbSampler;
|
|
||||||
|
|
||||||
// Stuff only used in static-linking mode (SM4.0-compatible)
|
ID3D11PixelShader* SetStaticShader(unsigned int dstFormat,
|
||||||
|
|
||||||
bool InitStaticMode();
|
|
||||||
bool SetStaticShader(unsigned int dstFormat,
|
|
||||||
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
|
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
|
||||||
|
|
||||||
typedef unsigned int ComboKey; // Key for a shader combination
|
typedef unsigned int ComboKey; // Key for a shader combination
|
||||||
|
@ -69,35 +56,6 @@ private:
|
||||||
typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap;
|
typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap;
|
||||||
|
|
||||||
ComboMap m_staticShaders;
|
ComboMap m_staticShaders;
|
||||||
|
|
||||||
// Stuff only used for dynamic-linking mode (SM5.0+, available as soon as
|
|
||||||
// Microsoft fixes their bloody HLSL compiler)
|
|
||||||
|
|
||||||
bool InitDynamicMode();
|
|
||||||
bool SetDynamicShader(unsigned int dstFormat,
|
|
||||||
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
|
|
||||||
|
|
||||||
ID3D11PixelShader* m_dynamicShader;
|
|
||||||
ID3D11ClassLinkage* m_classLinkage;
|
|
||||||
|
|
||||||
// Interface slots
|
|
||||||
UINT m_fetchSlot;
|
|
||||||
UINT m_scaledFetchSlot;
|
|
||||||
UINT m_intensitySlot;
|
|
||||||
UINT m_generatorSlot;
|
|
||||||
|
|
||||||
// Class instances
|
|
||||||
// Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z
|
|
||||||
ID3D11ClassInstance* m_fetchClass[4];
|
|
||||||
// ScaledFetch: 0 is off, 1 is on
|
|
||||||
ID3D11ClassInstance* m_scaledFetchClass[2];
|
|
||||||
// Intensity: 0 is off, 1 is on
|
|
||||||
ID3D11ClassInstance* m_intensityClass[2];
|
|
||||||
// Generator: one for each dst format, 16 total
|
|
||||||
ID3D11ClassInstance* m_generatorClass[16];
|
|
||||||
|
|
||||||
std::vector<ID3D11ClassInstance*> m_linkageArray;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ static GLuint s_texConvFrameBuffer[2] = {0,0};
|
||||||
static GLuint s_srcTexture = 0; // for decoding from RAM
|
static GLuint s_srcTexture = 0; // for decoding from RAM
|
||||||
static GLuint s_dstTexture = 0; // for encoding to RAM
|
static GLuint s_dstTexture = 0; // for encoding to RAM
|
||||||
|
|
||||||
const int renderBufferWidth = 1024;
|
const int renderBufferWidth = EFB_WIDTH * 4;
|
||||||
const int renderBufferHeight = 1024;
|
const int renderBufferHeight = 1024;
|
||||||
|
|
||||||
static SHADER s_rgbToYuyvProgram;
|
static SHADER s_rgbToYuyvProgram;
|
||||||
|
@ -320,14 +320,16 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer,
|
||||||
source.left, source.top,
|
source.left, source.top,
|
||||||
expandedWidth, bScaleByHalf ? 2 : 1);
|
expandedWidth, bScaleByHalf ? 2 : 1);
|
||||||
|
|
||||||
int cacheBytes = 32;
|
unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format);
|
||||||
|
unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format);
|
||||||
|
unsigned int cacheLinesPerRow;
|
||||||
if ((format & 0x0f) == 6)
|
if ((format & 0x0f) == 6)
|
||||||
cacheBytes = 64;
|
cacheLinesPerRow = numBlocksX * 2;
|
||||||
|
else
|
||||||
|
cacheLinesPerRow = numBlocksX;
|
||||||
|
|
||||||
int readStride = (expandedWidth * cacheBytes) /
|
|
||||||
TexDecoder_GetBlockWidthInTexels(format);
|
|
||||||
EncodeToRamUsingShader(source_texture,
|
EncodeToRamUsingShader(source_texture,
|
||||||
dest_ptr, expandedWidth / samples, expandedHeight, readStride,
|
dest_ptr, cacheLinesPerRow * 8, numBlocksY, cacheLinesPerRow * 32,
|
||||||
bScaleByHalf > 0 && !bFromZBuffer);
|
bScaleByHalf > 0 && !bFromZBuffer);
|
||||||
return size_in_bytes; // TODO: D3D11 is calculating this value differently!
|
return size_in_bytes; // TODO: D3D11 is calculating this value differently!
|
||||||
|
|
||||||
|
|
|
@ -70,6 +70,10 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||||
|
|
||||||
WRITE(p, " out vec4 ocol0;\n");
|
WRITE(p, " out vec4 ocol0;\n");
|
||||||
WRITE(p, "void main()\n");
|
WRITE(p, "void main()\n");
|
||||||
|
WRITE(p, "{\n"
|
||||||
|
" int2 sampleUv;\n"
|
||||||
|
" int2 uv1 = int2(gl_FragCoord.xy);\n"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
else // D3D
|
else // D3D
|
||||||
{
|
{
|
||||||
|
@ -77,29 +81,27 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||||
WRITE(p, "Texture2D Tex0 : register(t0);\n");
|
WRITE(p, "Texture2D Tex0 : register(t0);\n");
|
||||||
|
|
||||||
WRITE(p, "void main(\n");
|
WRITE(p, "void main(\n");
|
||||||
WRITE(p," out float4 ocol0 : SV_Target)\n");
|
WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n");
|
||||||
}
|
|
||||||
|
|
||||||
WRITE(p, "{\n"
|
WRITE(p, "{\n"
|
||||||
" int2 sampleUv;\n"
|
" int2 sampleUv;\n"
|
||||||
" int2 uv1 = int2(gl_FragCoord.xy);\n"
|
" int2 uv1 = int2(rawpos.xy);\n"
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
|
WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW));
|
||||||
WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1);
|
WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH));
|
||||||
WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples));
|
|
||||||
WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", IntLog2(blkH), ~(blkW - 1));
|
|
||||||
if (samples == 1)
|
if (samples == 1)
|
||||||
{
|
{
|
||||||
// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments
|
// With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
|
||||||
WRITE(p, " bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders
|
WRITE(p, " bool first = (uv1.x & %d) == 0;\n", blkH * blkW / 2);
|
||||||
WRITE(p, " x_virtual_position = x_virtual_position << 1;\n");
|
samples = 2;
|
||||||
}
|
}
|
||||||
WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1);
|
WRITE(p, " int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1);
|
||||||
WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", IntLog2(blkW), blkH - 1);
|
WRITE(p, " int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples));
|
||||||
|
WRITE(p, " int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1, IntLog2(samples));
|
||||||
|
|
||||||
WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n");
|
WRITE(p, " sampleUv.x = x_block_position + x_offset_in_block;\n");
|
||||||
WRITE(p, " sampleUv.y = y_block_position + y_offset;\n");
|
WRITE(p, " sampleUv.y = y_block_position + y_offset_in_block;\n");
|
||||||
|
|
||||||
WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords
|
WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords
|
||||||
WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel
|
WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel
|
||||||
|
@ -115,11 +117,20 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
|
static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
|
||||||
|
{
|
||||||
|
if (ApiType == API_OPENGL)
|
||||||
{
|
{
|
||||||
WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n",
|
WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n",
|
||||||
dest, xoffset, colorComp
|
dest, xoffset, colorComp
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
WRITE(p, " %s = Tex0.Sample(samp0, uv0 + float2(%d, 0) * sample_offset).%s;\n",
|
||||||
|
dest, xoffset, colorComp
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void WriteColorToIntensity(char*& p, const char* src, const char* dest)
|
static void WriteColorToIntensity(char*& p, const char* src, const char* dest)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue