Merge pull request #1966 from magumagu/unify-efb-encode
Unify EFB encoding shader generation
This commit is contained in:
commit
beaa9905a6
File diff suppressed because it is too large
Load Diff
|
@ -24,9 +24,7 @@ namespace DX11
|
|||
|
||||
class PSTextureEncoder : public TextureEncoder
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
PSTextureEncoder();
|
||||
|
||||
void Init();
|
||||
|
@ -36,25 +34,14 @@ public:
|
|||
bool isIntensity, bool scaleByHalf);
|
||||
|
||||
private:
|
||||
|
||||
bool m_ready;
|
||||
|
||||
ID3D11Texture2D* m_out;
|
||||
ID3D11RenderTargetView* m_outRTV;
|
||||
ID3D11Texture2D* m_outStage;
|
||||
ID3D11Buffer* m_encodeParams;
|
||||
ID3D11Buffer* m_quad;
|
||||
ID3D11VertexShader* m_vShader;
|
||||
ID3D11InputLayout* m_quadLayout;
|
||||
ID3D11BlendState* m_efbEncodeBlendState;
|
||||
ID3D11DepthStencilState* m_efbEncodeDepthState;
|
||||
ID3D11RasterizerState* m_efbEncodeRastState;
|
||||
ID3D11SamplerState* m_efbSampler;
|
||||
|
||||
// Stuff only used in static-linking mode (SM4.0-compatible)
|
||||
|
||||
bool InitStaticMode();
|
||||
bool SetStaticShader(unsigned int dstFormat,
|
||||
ID3D11PixelShader* SetStaticShader(unsigned int dstFormat,
|
||||
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
|
||||
|
||||
typedef unsigned int ComboKey; // Key for a shader combination
|
||||
|
@ -69,35 +56,6 @@ private:
|
|||
typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap;
|
||||
|
||||
ComboMap m_staticShaders;
|
||||
|
||||
// Stuff only used for dynamic-linking mode (SM5.0+, available as soon as
|
||||
// Microsoft fixes their bloody HLSL compiler)
|
||||
|
||||
bool InitDynamicMode();
|
||||
bool SetDynamicShader(unsigned int dstFormat,
|
||||
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);
|
||||
|
||||
ID3D11PixelShader* m_dynamicShader;
|
||||
ID3D11ClassLinkage* m_classLinkage;
|
||||
|
||||
// Interface slots
|
||||
UINT m_fetchSlot;
|
||||
UINT m_scaledFetchSlot;
|
||||
UINT m_intensitySlot;
|
||||
UINT m_generatorSlot;
|
||||
|
||||
// Class instances
|
||||
// Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z
|
||||
ID3D11ClassInstance* m_fetchClass[4];
|
||||
// ScaledFetch: 0 is off, 1 is on
|
||||
ID3D11ClassInstance* m_scaledFetchClass[2];
|
||||
// Intensity: 0 is off, 1 is on
|
||||
ID3D11ClassInstance* m_intensityClass[2];
|
||||
// Generator: one for each dst format, 16 total
|
||||
ID3D11ClassInstance* m_generatorClass[16];
|
||||
|
||||
std::vector<ID3D11ClassInstance*> m_linkageArray;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ static GLuint s_texConvFrameBuffer[2] = {0,0};
|
|||
static GLuint s_srcTexture = 0; // for decoding from RAM
|
||||
static GLuint s_dstTexture = 0; // for encoding to RAM
|
||||
|
||||
const int renderBufferWidth = 1024;
|
||||
const int renderBufferWidth = EFB_WIDTH * 4;
|
||||
const int renderBufferHeight = 1024;
|
||||
|
||||
static SHADER s_rgbToYuyvProgram;
|
||||
|
@ -320,14 +320,16 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer,
|
|||
source.left, source.top,
|
||||
expandedWidth, bScaleByHalf ? 2 : 1);
|
||||
|
||||
int cacheBytes = 32;
|
||||
unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format);
|
||||
unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format);
|
||||
unsigned int cacheLinesPerRow;
|
||||
if ((format & 0x0f) == 6)
|
||||
cacheBytes = 64;
|
||||
cacheLinesPerRow = numBlocksX * 2;
|
||||
else
|
||||
cacheLinesPerRow = numBlocksX;
|
||||
|
||||
int readStride = (expandedWidth * cacheBytes) /
|
||||
TexDecoder_GetBlockWidthInTexels(format);
|
||||
EncodeToRamUsingShader(source_texture,
|
||||
dest_ptr, expandedWidth / samples, expandedHeight, readStride,
|
||||
dest_ptr, cacheLinesPerRow * 8, numBlocksY, cacheLinesPerRow * 32,
|
||||
bScaleByHalf > 0 && !bFromZBuffer);
|
||||
return size_in_bytes; // TODO: D3D11 is calculating this value differently!
|
||||
|
||||
|
|
|
@ -70,36 +70,38 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
|||
|
||||
WRITE(p, " out vec4 ocol0;\n");
|
||||
WRITE(p, "void main()\n");
|
||||
WRITE(p, "{\n"
|
||||
" int2 sampleUv;\n"
|
||||
" int2 uv1 = int2(gl_FragCoord.xy);\n"
|
||||
);
|
||||
}
|
||||
else // D3D
|
||||
{
|
||||
WRITE(p,"sampler samp0 : register(s0);\n");
|
||||
WRITE(p, "sampler samp0 : register(s0);\n");
|
||||
WRITE(p, "Texture2D Tex0 : register(t0);\n");
|
||||
|
||||
WRITE(p,"void main(\n");
|
||||
WRITE(p," out float4 ocol0 : SV_Target)\n");
|
||||
WRITE(p, "void main(\n");
|
||||
WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n");
|
||||
WRITE(p, "{\n"
|
||||
" int2 sampleUv;\n"
|
||||
" int2 uv1 = int2(rawpos.xy);\n"
|
||||
);
|
||||
}
|
||||
|
||||
WRITE(p, "{\n"
|
||||
" int2 sampleUv;\n"
|
||||
" int2 uv1 = int2(gl_FragCoord.xy);\n"
|
||||
);
|
||||
|
||||
WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
|
||||
WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1);
|
||||
WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples));
|
||||
WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", IntLog2(blkH), ~(blkW - 1));
|
||||
WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW));
|
||||
WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH));
|
||||
if (samples == 1)
|
||||
{
|
||||
// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments
|
||||
WRITE(p, " bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders
|
||||
WRITE(p, " x_virtual_position = x_virtual_position << 1;\n");
|
||||
// With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
|
||||
WRITE(p, " bool first = (uv1.x & %d) == 0;\n", blkH * blkW / 2);
|
||||
samples = 2;
|
||||
}
|
||||
WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1);
|
||||
WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", IntLog2(blkW), blkH - 1);
|
||||
WRITE(p, " int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1);
|
||||
WRITE(p, " int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples));
|
||||
WRITE(p, " int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1, IntLog2(samples));
|
||||
|
||||
WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n");
|
||||
WRITE(p, " sampleUv.y = y_block_position + y_offset;\n");
|
||||
WRITE(p, " sampleUv.x = x_block_position + x_offset_in_block;\n");
|
||||
WRITE(p, " sampleUv.y = y_block_position + y_offset_in_block;\n");
|
||||
|
||||
WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords
|
||||
WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel
|
||||
|
@ -116,9 +118,18 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
|||
|
||||
static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
|
||||
{
|
||||
WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n",
|
||||
dest, xoffset, colorComp
|
||||
);
|
||||
if (ApiType == API_OPENGL)
|
||||
{
|
||||
WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n",
|
||||
dest, xoffset, colorComp
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
WRITE(p, " %s = Tex0.Sample(samp0, uv0 + float2(%d, 0) * sample_offset).%s;\n",
|
||||
dest, xoffset, colorComp
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static void WriteColorToIntensity(char*& p, const char* src, const char* dest)
|
||||
|
|
Loading…
Reference in New Issue