it works!

This commit is contained in:
RSDuck 2023-04-13 22:50:27 +02:00
parent 3428d3eb3e
commit 91bea108ea
7 changed files with 110 additions and 161 deletions

View File

@ -367,7 +367,7 @@ void SoftRenderer::VBlankEnd(Unit* unitA, Unit* unitB)
{ {
if ((unitA->CaptureCnt & (1<<31)) && (((unitA->CaptureCnt >> 29) & 0x3) != 1)) if ((unitA->CaptureCnt & (1<<31)) && (((unitA->CaptureCnt >> 29) & 0x3) != 1))
{ {
reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame(); //reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame();
} }
} }
#endif #endif

View File

@ -156,6 +156,8 @@ public:
virtual void RenderFrame() = 0; virtual void RenderFrame() = 0;
virtual void RestartFrame() {}; virtual void RestartFrame() {};
virtual u32* GetLine(int line) = 0; virtual u32* GetLine(int line) = 0;
virtual void SetupAccelFrame() {}
protected: protected:
Renderer3D(bool Accelerated); Renderer3D(bool Accelerated);
}; };

View File

@ -99,6 +99,14 @@ bool ComputeRenderer::Init()
glBindTexture(GL_TEXTURE_BUFFER, YSpanIndicesTexture); glBindTexture(GL_TEXTURE_BUFFER, YSpanIndicesTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA16UI, YSpanIndicesTextureMemory); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA16UI, YSpanIndicesTextureMemory);
glGenTextures(1, &Framebuffer);
glBindTexture(GL_TEXTURE_2D, Framebuffer);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 256, 192);
glGenBuffers(1, &MetaUniformMemory);
glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory);
glBufferData(GL_UNIFORM_BUFFER, sizeof(MetaUniform), nullptr, GL_DYNAMIC_DRAW);
CompileShader(ShaderInterpXSpans[0], ComputeRendererShaders::InterpSpans, {"InterpSpans", "ZBuffer"}); CompileShader(ShaderInterpXSpans[0], ComputeRendererShaders::InterpSpans, {"InterpSpans", "ZBuffer"});
CompileShader(ShaderInterpXSpans[1], ComputeRendererShaders::InterpSpans, {"InterpSpans", "WBuffer"}); CompileShader(ShaderInterpXSpans[1], ComputeRendererShaders::InterpSpans, {"InterpSpans", "WBuffer"});
CompileShader(ShaderBinCombined, ComputeRendererShaders::BinCombined, {"BinCombined"}); CompileShader(ShaderBinCombined, ComputeRendererShaders::BinCombined, {"BinCombined"});
@ -133,6 +141,19 @@ bool ComputeRenderer::Init()
CompileShader(ShaderFinalPass[6], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "Fog"}); CompileShader(ShaderFinalPass[6], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "Fog"});
CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"}); CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"});
glGenSamplers(9, Samplers);
for (u32 j = 0; j < 3; j++)
{
for (u32 i = 0; i < 3; i++)
{
const GLenum translateWrapMode[3] = {GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT};
glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_WRAP_S, translateWrapMode[i]);
glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_WRAP_T, translateWrapMode[j]);
glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameterf(Samplers[i+j*3], GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
}
return true; return true;
} }
@ -143,23 +164,17 @@ void ComputeRenderer::DeInit()
void ComputeRenderer::Reset() void ComputeRenderer::Reset()
{ {
/*for (u32 i = 0; i < 8; i++) for (u32 i = 0; i < 8; i++)
{ {
for (u32 j = 0; j < 8; j++) for (u32 j = 0; j < 8; j++)
{ {
for (u32 k = 0; k < TexArrays[i][j].size(); k++) for (u32 k = 0; k < TexArrays[i][j].size(); k++)
Gfx::TextureHeap->Free(TexArrays[i][j][k].Memory); glDeleteTextures(1, &TexArrays[i][j][k]);
TexArrays[i][j].clear(); TexArrays[i][j].clear();
FreeTextures[i][j].clear(); FreeTextures[i][j].clear();
} }
}*/
TexCache.clear();
FreeImageDescriptorsCount = TexCacheMaxImages;
for (int i = 0; i < TexCacheMaxImages; i++)
{
FreeImageDescriptors[i] = i;
} }
TexCache.clear();
} }
void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings) void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings)
@ -401,28 +416,6 @@ inline u16 Color3of5(u16 color0, u16 color1)
return r | g | b; return r | g | b;
} }
/*
inline void RGB5ToRGB6(uint8x16_t lo, uint8x16_t hi, uint8x16_t& red, uint8x16_t& green, uint8x16_t& blue)
{
red = vandq_u8(vshlq_n_u8(lo, 1), vdupq_n_u8(0x3E));
green = vbslq_u8(vdupq_n_u8(0xCE), vshrq_n_u8(lo, 4), vshlq_n_u8(hi, 4));
blue = vandq_u8(vshrq_n_u8(hi, 1), vdupq_n_u8(0x3E));
red = vandq_u8(vtstq_u8(red, red), vaddq_u8(red, vdupq_n_u8(1)));
green = vandq_u8(vtstq_u8(green, green), vaddq_u8(green, vdupq_n_u8(1)));
blue = vandq_u8(vtstq_u8(blue, blue), vaddq_u8(blue, vdupq_n_u8(1)));
}
inline void RGB5ToRGB6(uint8x8_t lo, uint8x8_t hi, uint8x8_t& red, uint8x8_t& green, uint8x8_t& blue)
{
red = vand_u8(vshl_n_u8(lo, 1), vdup_n_u8(0x3E));
green = vbsl_u8(vdup_n_u8(0xCE), vshr_n_u8(lo, 4), vshl_n_u8(hi, 4));
blue = vand_u8(vshr_n_u8(hi, 1), vdup_n_u8(0x3E));
red = vand_u8(vtst_u8(red, red), vadd_u8(red, vdup_n_u8(1)));
green = vand_u8(vtst_u8(green, green), vadd_u8(green, vdup_n_u8(1)));
blue = vand_u8(vtst_u8(blue, blue), vadd_u8(blue, vdup_n_u8(1)));
}*/
inline u32 ConvertRGB5ToRGB8(u16 val) inline u32 ConvertRGB5ToRGB8(u16 val)
{ {
return (((u32)val & 0x1F) << 3) return (((u32)val & 0x1F) << 3)
@ -555,7 +548,7 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
template <int outputFmt, int X, int Y> template <int outputFmt, int X, int Y>
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData) void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
{ {
/*for (int y = 0; y < height; y++) for (int y = 0; y < height; y++)
{ {
for (int x = 0; x < width; x++) for (int x = 0; x < width; x++)
{ {
@ -578,43 +571,7 @@ void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* pa
} }
output[x + y * width] = res; output[x + y * width] = res;
} }
}*/ }
}
void Convert16ColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
{
/*uint8x16x2_t palette = vld2q_u8((u8*)palData);
uint8x16_t paletteR, paletteG, paletteB;
RGB5ToRGB6(palette.val[0], palette.val[1], paletteR, paletteG, paletteB);
uint8x16_t firstEntryAlpha = vdupq_n_u8(color0Transparent ? 0 : 0x1F);
for (int i = 0; i < width*height/2; i += 16)
{
uint8x16_t packedIndices = vld1q_u8(&texData[i]);
// unpack indices
uint8x16_t oddIndices = vandq_u8(packedIndices, vdupq_n_u8(0xF));
uint8x16_t evenIndices = vshrq_n_u8(packedIndices, 4);
uint8x16_t indices0 = vzip1q_u8(oddIndices, evenIndices);
uint8x16_t indices1 = vzip2q_u8(oddIndices, evenIndices);
// palettise
uint8x16x4_t finalPixels0, finalPixels1;
finalPixels0.val[0] = vqtbl1q_u8(paletteR, indices0);
finalPixels0.val[1] = vqtbl1q_u8(paletteG, indices0);
finalPixels0.val[2] = vqtbl1q_u8(paletteB, indices0);
finalPixels0.val[3] = vbslq_u8(vceqzq_u8(indices0), firstEntryAlpha, vdupq_n_u8(0x1F));
finalPixels1.val[0] = vqtbl1q_u8(paletteR, indices1);
finalPixels1.val[1] = vqtbl1q_u8(paletteG, indices1);
finalPixels1.val[2] = vqtbl1q_u8(paletteB, indices1);
finalPixels1.val[3] = vbslq_u8(vceqzq_u8(indices1), firstEntryAlpha, vdupq_n_u8(0x1F));
vst4q_u8((u8*)&output[i*2], finalPixels0);
vst4q_u8((u8*)&output[i*2+16], finalPixels1);
}*/
} }
template <int outputFmt, int colorBits> template <int outputFmt, int colorBits>
@ -688,22 +645,12 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
{ {
entry.TextureRAMSize[0] = width*height*2; entry.TextureRAMSize[0] = width*height*2;
/*for (u32 i = 0; i < width*height; i += 16) for (u32 i = 0; i < width*height; i += 2)
{ {
uint8x16x2_t pixels = vld2q_u8(&GPU::VRAMFlat_Texture[addr + i * 2]); u16 value = *(u16*)&GPU::VRAMFlat_Texture[addr + i * 2];
uint8x16_t red, green, blue; TextureDecodingBuffer[i] = ConvertRGB5ToRGB6(value);
RGB5ToRGB6(pixels.val[0], pixels.val[1], red, green, blue); }
uint8x16_t alpha = vbslq_u8(vtstq_u8(pixels.val[1], vdupq_n_u8(0x80)), vdupq_n_u8(0x1F), vdupq_n_u8(0));
vst4q_u8((u8*)&TextureDecodingBuffer[i],
{
red,
green,
blue,
alpha
});
}*/
} }
else if (fmt == 5) else if (fmt == 5)
{ {
@ -756,7 +703,7 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, TextureDecodingBuffer, texData, palData); break; case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, TextureDecodingBuffer, texData, palData); break;
case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, TextureDecodingBuffer, texData, palData); break; case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, TextureDecodingBuffer, texData, palData); break;
case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break; case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
case 3: Convert16ColorsTexture(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break; case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break; case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
} }
} }
@ -772,55 +719,36 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
auto& texArrays = TexArrays[widthLog2][heightLog2]; auto& texArrays = TexArrays[widthLog2][heightLog2];
auto& freeTextures = FreeTextures[widthLog2][heightLog2]; auto& freeTextures = FreeTextures[widthLog2][heightLog2];
/*if (freeTextures.size() == 0) if (freeTextures.size() == 0)
{ {
texArrays.resize(texArrays.size()+1); texArrays.resize(texArrays.size()+1);
TexArray& array = texArrays[texArrays.size()-1]; GLuint& array = texArrays[texArrays.size()-1];
u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64); u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64);
// allocate new array texture // allocate new array texture
dk::ImageLayout imageLayout; glGenTextures(1, &array);
dk::ImageLayoutMaker{Gfx::Device} glBindTexture(GL_TEXTURE_2D_ARRAY, array);
.setType(DkImageType_2DArray) glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8UI, width, height, layers);
.setFormat(DkImageFormat_RGBA8_Uint)
.setDimensions(width, height, layers)
.initialize(imageLayout);
assert(FreeImageDescriptorsCount > 0);
array.ImageDescriptor = FreeImageDescriptors[--FreeImageDescriptorsCount];
array.Memory = Gfx::TextureHeap->Alloc(imageLayout.getSize(), imageLayout.getAlignment());
array.Image.initialize(imageLayout, Gfx::TextureHeap->MemBlock, array.Memory.Offset);
dk::ImageDescriptor descriptor;
descriptor.initialize(array.Image);
DkGpuAddr descriptors = Gfx::DataHeap->GpuAddr(ImageDescriptors);
EmuCmdBuf.pushData(descriptors + (descriptorOffset_TexcacheStart + array.ImageDescriptor) * sizeof(DkImageDescriptor),
&descriptor,
sizeof(DkImageDescriptor));
//printf("allocating new layer set for %d %d %d %d\n", width, height, texArrays.size()-1, array.ImageDescriptor); //printf("allocating new layer set for %d %d %d %d\n", width, height, texArrays.size()-1, array.ImageDescriptor);
for (u16 i = 0; i < layers; i++) for (u32 i = 0; i < layers; i++)
{ {
freeTextures.push_back(TexArrayEntry{(u16)(texArrays.size()-1), i}); freeTextures.push_back(TexArrayEntry{array, i});
} }
}*/ }
TexArrayEntry storagePlace = freeTextures[freeTextures.size()-1]; TexArrayEntry storagePlace = freeTextures[freeTextures.size()-1];
freeTextures.pop_back(); freeTextures.pop_back();
TexArray& array = texArrays[storagePlace.TexArrayIdx];
//printf("using storage place %d %d | %d %d (%d)\n", width, height, storagePlace.TexArrayIdx, storagePlace.LayerIdx, array.ImageDescriptor); //printf("using storage place %d %d | %d %d (%d)\n", width, height, storagePlace.TexArrayIdx, storagePlace.LayerIdx, array.ImageDescriptor);
/*UploadBuf.UploadAndCopyTexture(Gfx::EmuCmdBuf, array.Image, glBindTexture(GL_TEXTURE_2D_ARRAY, storagePlace.TextureID);
(u8*)TextureDecodingBuffer, glTexSubImage3D(GL_TEXTURE_2D_ARRAY,
0, 0, width, height, 0, 0, 0, storagePlace.Layer,
width*4, width, height, 1,
storagePlace.LayerIdx);*/ GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, TextureDecodingBuffer);
entry.DescriptorIdx = array.ImageDescriptor;
entry.Texture = storagePlace; entry.Texture = storagePlace;
return TexCache.emplace(std::make_pair(key, entry)).first->second; return TexCache.emplace(std::make_pair(key, entry)).first->second;
@ -828,7 +756,7 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
struct Variant struct Variant
{ {
s16 Texture, Sampler; GLuint Texture, Sampler;
u16 Width, Height; u16 Width, Height;
u8 BlendMode; u8 BlendMode;
@ -841,11 +769,11 @@ struct Variant
/* /*
Antialiasing Antialiasing
W-Buffer W-Buffer
Mit Textur With Texture
0 0
1, 3 1, 3
2 2
Ohne Textur without Texture
2 2
0, 1, 3 0, 1, 3
@ -925,6 +853,18 @@ void ComputeRenderer::RenderFrame()
int numYSpans = 0; int numYSpans = 0;
int numSetupIndices = 0; int numSetupIndices = 0;
/*
Some games really like to spam small textures, often
to store the data like PPU tiles. E.g. Shantae
or some Mega Man game. Fortunately they are usually kind
enough to not vary the texture size all too often (usually
they just use 8x8 or 16x for everything).
This is the reason we have this whole mess where textures of
the same size are put into array textures. This allows
to increase the batch size.
Less variance between each Variant hah!
*/
u32 numVariants = 0, prevVariant, prevTexLayer; u32 numVariants = 0, prevVariant, prevTexLayer;
Variant variants[MaxVariants]; Variant variants[MaxVariants];
@ -951,6 +891,8 @@ void ComputeRenderer::RenderFrame()
bool foundVariant = false; bool foundVariant = false;
if (i > 0) if (i > 0)
{ {
// if the whole texture attribute matches
// the texture layer will also match
Polygon* prevPolygon = RenderPolygonRAM[i - 1]; Polygon* prevPolygon = RenderPolygonRAM[i - 1];
foundVariant = prevPolygon->TexParam == polygon->TexParam foundVariant = prevPolygon->TexParam == polygon->TexParam
&& prevPolygon->TexPalette == polygon->TexPalette && prevPolygon->TexPalette == polygon->TexPalette
@ -964,9 +906,10 @@ void ComputeRenderer::RenderFrame()
{ {
Variant variant; Variant variant;
variant.BlendMode = polygon->IsShadowMask ? 4 : ((polygon->Attr >> 4) & 0x3); variant.BlendMode = polygon->IsShadowMask ? 4 : ((polygon->Attr >> 4) & 0x3);
variant.Texture = -1; variant.Texture = 0;
variant.Sampler = -1; variant.Sampler = 0;
TexCacheEntry* texcacheEntry = nullptr; TexCacheEntry* texcacheEntry = nullptr;
// we always need to look up the texture to get the layer of the array texture
if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7) if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7)
{ {
texcacheEntry = &GetTexture(polygon->TexParam, polygon->TexPalette); texcacheEntry = &GetTexture(polygon->TexParam, polygon->TexPalette);
@ -974,9 +917,10 @@ void ComputeRenderer::RenderFrame()
bool wrapT = (polygon->TexParam >> 17) & 1; bool wrapT = (polygon->TexParam >> 17) & 1;
bool mirrorS = (polygon->TexParam >> 18) & 1; bool mirrorS = (polygon->TexParam >> 18) & 1;
bool mirrorT = (polygon->TexParam >> 19) & 1; bool mirrorT = (polygon->TexParam >> 19) & 1;
variant.Sampler = (wrapS ? (mirrorS ? 2 : 1) : 0) + (wrapT ? (mirrorT ? 2 : 1) : 0) * 3; variant.Sampler = Samplers[(wrapS ? (mirrorS ? 2 : 1) : 0) + (wrapT ? (mirrorT ? 2 : 1) : 0) * 3];
variant.Texture = texcacheEntry->DescriptorIdx; variant.Texture = texcacheEntry->Texture.TextureID;
prevTexLayer = texcacheEntry->Texture.LayerIdx; prevTexLayer = texcacheEntry->Texture.Layer;
if (texcacheEntry->LastVariant < numVariants && variants[texcacheEntry->LastVariant] == variant) if (texcacheEntry->LastVariant < numVariants && variants[texcacheEntry->LastVariant] == variant)
{ {
foundVariant = true; foundVariant = true;
@ -1272,7 +1216,8 @@ void ComputeRenderer::RenderFrame()
u32 fogA = (RenderFogColor >> 16) & 0x1F; u32 fogA = (RenderFogColor >> 16) & 0x1F;
meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24); meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24);
} }
meta.XScroll = RenderXPos; meta.XScroll = 0;
//meta.XScroll = RenderXPos;
glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory); glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(MetaUniform), &meta); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(MetaUniform), &meta);
@ -1312,6 +1257,8 @@ void ComputeRenderer::RenderFrame()
glDispatchComputeIndirect(offsetof(BinResult, SortWorkWorkCount)); glDispatchComputeIndirect(offsetof(BinResult, SortWorkWorkCount));
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
glActiveTexture(GL_TEXTURE0);
// rasterise // rasterise
{ {
bool highLightMode = RenderDispCnt & (1<<1); bool highLightMode = RenderDispCnt & (1<<1);
@ -1338,7 +1285,7 @@ void ComputeRenderer::RenderFrame()
}; };
GLuint prevShader = 0; GLuint prevShader = 0;
s32 prevTexture = -1, prevSampler = -1; s32 prevTexture = 0, prevSampler = 0;
for (int i = 0; i < numVariants; i++) for (int i = 0; i < numVariants; i++)
{ {
GLuint shader = 0; GLuint shader = 0;
@ -1349,11 +1296,14 @@ void ComputeRenderer::RenderFrame()
else else
{ {
shader = shadersUseTexture[variants[i].BlendMode]; shader = shadersUseTexture[variants[i].BlendMode];
if (variants[i].Texture != prevTexture || variants[i].Sampler != prevSampler) if (variants[i].Texture != prevTexture)
{ {
assert(variants[i].Sampler < 9); glBindTexture(GL_TEXTURE_2D_ARRAY, variants[i].Texture);
glBindTexture(GL_TEXTURE_2D, variants[i].Texture);
prevTexture = variants[i].Texture; prevTexture = variants[i].Texture;
}
if (variants[i].Sampler != prevSampler)
{
glBindSampler(0, variants[i].Sampler);
prevSampler = variants[i].Sampler; prevSampler = variants[i].Sampler;
} }
} }
@ -1364,7 +1314,7 @@ void ComputeRenderer::RenderFrame()
prevShader = shader; prevShader = shader;
} }
glUniform1i(UniformIdxCurVariant, i); glUniform1ui(UniformIdxCurVariant, i);
glUniform2f(UniformIdxTextureSize, 1.f / variants[i].Width, 1.f / variants[i].Height); glUniform2f(UniformIdxTextureSize, 1.f / variants[i].Width, 1.f / variants[i].Height);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
glDispatchComputeIndirect(offsetof(BinResult, VariantWorkCount) + i*4*4); glDispatchComputeIndirect(offsetof(BinResult, VariantWorkCount) + i*4*4);
@ -1378,7 +1328,7 @@ void ComputeRenderer::RenderFrame()
glDispatchCompute(256/8, 192/8, 1); glDispatchCompute(256/8, 192/8, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
//glBindTexture(GL_TEXTURE_2D, ) glBindImageTexture(0, Framebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
u32 finalPassShader = 0; u32 finalPassShader = 0;
if (RenderDispCnt & (1<<4)) if (RenderDispCnt & (1<<4))
finalPassShader |= 0x4; finalPassShader |= 0x4;
@ -1448,4 +1398,9 @@ u32* ComputeRenderer::GetLine(int line)
return DummyLine; return DummyLine;
} }
void ComputeRenderer::SetupAccelFrame()
{
glBindTexture(GL_TEXTURE_2D, Framebuffer);
}
} }

View File

@ -48,8 +48,7 @@ public:
void RestartFrame() override; void RestartFrame() override;
u32* GetLine(int line) override; u32* GetLine(int line) override;
//dk::Fence FrameReady = {}; void SetupAccelFrame() override;
//dk::Fence FrameReserveFence = {};
private: private:
GLuint ShaderInterpXSpans[2]; GLuint ShaderInterpXSpans[2];
GLuint ShaderBinCombined; GLuint ShaderBinCombined;
@ -197,18 +196,12 @@ private:
struct TexArrayEntry struct TexArrayEntry
{ {
u16 TexArrayIdx; GLuint TextureID;
u16 LayerIdx; u32 Layer;
};
struct TexArray
{
GLuint Image;
u32 ImageDescriptor;
}; };
struct TexCacheEntry struct TexCacheEntry
{ {
u32 DescriptorIdx;
u32 LastVariant; // very cheap way to make variant lookup faster u32 LastVariant; // very cheap way to make variant lookup faster
u32 TextureRAMStart[2], TextureRAMSize[2]; u32 TextureRAMStart[2], TextureRAMSize[2];
@ -239,16 +232,15 @@ private:
}; };
GLuint MetaUniformMemory; GLuint MetaUniformMemory;
static const u32 TexCacheMaxImages = 4096;
u32 FreeImageDescriptorsCount = 0;
u32 FreeImageDescriptors[TexCacheMaxImages];
std::vector<TexArrayEntry> FreeTextures[8][8]; std::vector<TexArrayEntry> FreeTextures[8][8];
std::vector<TexArray> TexArrays[8][8]; std::vector<GLuint> TexArrays[8][8];
GLuint Samplers[9];
u32 TextureDecodingBuffer[1024*1024]; u32 TextureDecodingBuffer[1024*1024];
GLuint Framebuffer;
TexCacheEntry& GetTexture(u32 textureParam, u32 paletteParam); TexCacheEntry& GetTexture(u32 textureParam, u32 paletteParam);
void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to); void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to);

View File

@ -223,13 +223,8 @@ layout (std140, binding = 0) uniform MetaUniform
uint FogOffset, FogShift, FogColor; uint FogOffset, FogShift, FogColor;
int XScroll; int XScroll;
// only used/updated for rasteriation
uint CurVariant;
vec2 InvTextureSize;
}; };
#if defined(InterpSpans) || defined(Rasterise) #if defined(InterpSpans) || defined(Rasterise)
uint Umulh(uint a, uint b) uint Umulh(uint a, uint b)
{ {
@ -920,6 +915,9 @@ layout (local_size_x = TileSize, local_size_y = TileSize) in;
layout (binding = 0) uniform usampler2DArray CurrentTexture; layout (binding = 0) uniform usampler2DArray CurrentTexture;
layout (location = 0) uniform uint CurVariant;
layout (location = 1) uniform vec2 InvTextureSize;
void main() void main()
{ {
uvec2 workDesc = SortedWork[SortedWorkOffset[CurVariant] + gl_WorkGroupID.z]; uvec2 workDesc = SortedWork[SortedWorkOffset[CurVariant] + gl_WorkGroupID.z];
@ -1328,7 +1326,7 @@ const char* FinalPass = R"(
layout (local_size_x = 32) in; layout (local_size_x = 32) in;
layout (binding = 0, r32ui) writeonly uniform uimage2D FinalFB; layout (binding = 0, rgba8) writeonly uniform image2D FinalFB;
uint BlendFog(uint color, uint depth) uint BlendFog(uint color, uint depth)
{ {
@ -1482,15 +1480,17 @@ void main()
} }
#endif #endif
if (bitfieldExtract(color.x, 24, 8) != 0U) // if (bitfieldExtract(color.x, 24, 8) != 0U)
color.x |= 0x40000000U; // color.x |= 0x40000000U;
else // else
color.x = 0U; // color.x = 0U;
//if (gl_LocalInvocationID.x == 7 || gl_LocalInvocationID.y == 7) //if (gl_LocalInvocationID.x == 7 || gl_LocalInvocationID.y == 7)
//color.x = 0x1F00001FU | 0x40000000U; //color.x = 0x1F00001FU | 0x40000000U;
imageStore(FinalFB, ivec2(gl_GlobalInvocationID.xy), uvec4(color.x, 0, 0, 0)); vec4 result = vec4(bitfieldExtract(color.x, 16, 8), bitfieldExtract(color.x, 8, 8), color.x & 0x3FU, bitfieldExtract(color.x, 24, 8));
result /= vec4(63.0, 63.0, 63.0, 31.0);
imageStore(FinalFB, ivec2(gl_GlobalInvocationID.xy), result);
} }
)"; )";

View File

@ -37,7 +37,7 @@ public:
virtual void RenderFrame() override; virtual void RenderFrame() override;
virtual u32* GetLine(int line) override; virtual u32* GetLine(int line) override;
void SetupAccelFrame(); void SetupAccelFrame() override;
void PrepareCaptureFrame(); void PrepareCaptureFrame();
static std::unique_ptr<GLRenderer> New() noexcept; static std::unique_ptr<GLRenderer> New() noexcept;

View File

@ -213,7 +213,7 @@ void GLCompositor::RenderFrame()
} }
glActiveTexture(GL_TEXTURE1); glActiveTexture(GL_TEXTURE1);
reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->SetupAccelFrame(); GPU3D::CurrentRenderer->SetupAccelFrame();
glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID);
glBindVertexArray(CompVertexArrayID); glBindVertexArray(CompVertexArrayID);