it works!
This commit is contained in:
parent
3428d3eb3e
commit
91bea108ea
|
@ -367,7 +367,7 @@ void SoftRenderer::VBlankEnd(Unit* unitA, Unit* unitB)
|
||||||
{
|
{
|
||||||
if ((unitA->CaptureCnt & (1<<31)) && (((unitA->CaptureCnt >> 29) & 0x3) != 1))
|
if ((unitA->CaptureCnt & (1<<31)) && (((unitA->CaptureCnt >> 29) & 0x3) != 1))
|
||||||
{
|
{
|
||||||
reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame();
|
//reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -156,6 +156,8 @@ public:
|
||||||
virtual void RenderFrame() = 0;
|
virtual void RenderFrame() = 0;
|
||||||
virtual void RestartFrame() {};
|
virtual void RestartFrame() {};
|
||||||
virtual u32* GetLine(int line) = 0;
|
virtual u32* GetLine(int line) = 0;
|
||||||
|
|
||||||
|
virtual void SetupAccelFrame() {}
|
||||||
protected:
|
protected:
|
||||||
Renderer3D(bool Accelerated);
|
Renderer3D(bool Accelerated);
|
||||||
};
|
};
|
||||||
|
|
|
@ -99,6 +99,14 @@ bool ComputeRenderer::Init()
|
||||||
glBindTexture(GL_TEXTURE_BUFFER, YSpanIndicesTexture);
|
glBindTexture(GL_TEXTURE_BUFFER, YSpanIndicesTexture);
|
||||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA16UI, YSpanIndicesTextureMemory);
|
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA16UI, YSpanIndicesTextureMemory);
|
||||||
|
|
||||||
|
glGenTextures(1, &Framebuffer);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, Framebuffer);
|
||||||
|
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 256, 192);
|
||||||
|
|
||||||
|
glGenBuffers(1, &MetaUniformMemory);
|
||||||
|
glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory);
|
||||||
|
glBufferData(GL_UNIFORM_BUFFER, sizeof(MetaUniform), nullptr, GL_DYNAMIC_DRAW);
|
||||||
|
|
||||||
CompileShader(ShaderInterpXSpans[0], ComputeRendererShaders::InterpSpans, {"InterpSpans", "ZBuffer"});
|
CompileShader(ShaderInterpXSpans[0], ComputeRendererShaders::InterpSpans, {"InterpSpans", "ZBuffer"});
|
||||||
CompileShader(ShaderInterpXSpans[1], ComputeRendererShaders::InterpSpans, {"InterpSpans", "WBuffer"});
|
CompileShader(ShaderInterpXSpans[1], ComputeRendererShaders::InterpSpans, {"InterpSpans", "WBuffer"});
|
||||||
CompileShader(ShaderBinCombined, ComputeRendererShaders::BinCombined, {"BinCombined"});
|
CompileShader(ShaderBinCombined, ComputeRendererShaders::BinCombined, {"BinCombined"});
|
||||||
|
@ -133,6 +141,19 @@ bool ComputeRenderer::Init()
|
||||||
CompileShader(ShaderFinalPass[6], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "Fog"});
|
CompileShader(ShaderFinalPass[6], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "Fog"});
|
||||||
CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"});
|
CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"});
|
||||||
|
|
||||||
|
glGenSamplers(9, Samplers);
|
||||||
|
for (u32 j = 0; j < 3; j++)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
const GLenum translateWrapMode[3] = {GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT};
|
||||||
|
glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_WRAP_S, translateWrapMode[i]);
|
||||||
|
glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_WRAP_T, translateWrapMode[j]);
|
||||||
|
glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||||
|
glSamplerParameterf(Samplers[i+j*3], GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,23 +164,17 @@ void ComputeRenderer::DeInit()
|
||||||
|
|
||||||
void ComputeRenderer::Reset()
|
void ComputeRenderer::Reset()
|
||||||
{
|
{
|
||||||
/*for (u32 i = 0; i < 8; i++)
|
for (u32 i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
for (u32 j = 0; j < 8; j++)
|
for (u32 j = 0; j < 8; j++)
|
||||||
{
|
{
|
||||||
for (u32 k = 0; k < TexArrays[i][j].size(); k++)
|
for (u32 k = 0; k < TexArrays[i][j].size(); k++)
|
||||||
Gfx::TextureHeap->Free(TexArrays[i][j][k].Memory);
|
glDeleteTextures(1, &TexArrays[i][j][k]);
|
||||||
TexArrays[i][j].clear();
|
TexArrays[i][j].clear();
|
||||||
FreeTextures[i][j].clear();
|
FreeTextures[i][j].clear();
|
||||||
}
|
}
|
||||||
}*/
|
|
||||||
TexCache.clear();
|
|
||||||
|
|
||||||
FreeImageDescriptorsCount = TexCacheMaxImages;
|
|
||||||
for (int i = 0; i < TexCacheMaxImages; i++)
|
|
||||||
{
|
|
||||||
FreeImageDescriptors[i] = i;
|
|
||||||
}
|
}
|
||||||
|
TexCache.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings)
|
void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings)
|
||||||
|
@ -401,28 +416,6 @@ inline u16 Color3of5(u16 color0, u16 color1)
|
||||||
return r | g | b;
|
return r | g | b;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
inline void RGB5ToRGB6(uint8x16_t lo, uint8x16_t hi, uint8x16_t& red, uint8x16_t& green, uint8x16_t& blue)
|
|
||||||
{
|
|
||||||
red = vandq_u8(vshlq_n_u8(lo, 1), vdupq_n_u8(0x3E));
|
|
||||||
green = vbslq_u8(vdupq_n_u8(0xCE), vshrq_n_u8(lo, 4), vshlq_n_u8(hi, 4));
|
|
||||||
blue = vandq_u8(vshrq_n_u8(hi, 1), vdupq_n_u8(0x3E));
|
|
||||||
red = vandq_u8(vtstq_u8(red, red), vaddq_u8(red, vdupq_n_u8(1)));
|
|
||||||
green = vandq_u8(vtstq_u8(green, green), vaddq_u8(green, vdupq_n_u8(1)));
|
|
||||||
blue = vandq_u8(vtstq_u8(blue, blue), vaddq_u8(blue, vdupq_n_u8(1)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void RGB5ToRGB6(uint8x8_t lo, uint8x8_t hi, uint8x8_t& red, uint8x8_t& green, uint8x8_t& blue)
|
|
||||||
{
|
|
||||||
red = vand_u8(vshl_n_u8(lo, 1), vdup_n_u8(0x3E));
|
|
||||||
green = vbsl_u8(vdup_n_u8(0xCE), vshr_n_u8(lo, 4), vshl_n_u8(hi, 4));
|
|
||||||
blue = vand_u8(vshr_n_u8(hi, 1), vdup_n_u8(0x3E));
|
|
||||||
|
|
||||||
red = vand_u8(vtst_u8(red, red), vadd_u8(red, vdup_n_u8(1)));
|
|
||||||
green = vand_u8(vtst_u8(green, green), vadd_u8(green, vdup_n_u8(1)));
|
|
||||||
blue = vand_u8(vtst_u8(blue, blue), vadd_u8(blue, vdup_n_u8(1)));
|
|
||||||
}*/
|
|
||||||
|
|
||||||
inline u32 ConvertRGB5ToRGB8(u16 val)
|
inline u32 ConvertRGB5ToRGB8(u16 val)
|
||||||
{
|
{
|
||||||
return (((u32)val & 0x1F) << 3)
|
return (((u32)val & 0x1F) << 3)
|
||||||
|
@ -555,7 +548,7 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
|
||||||
template <int outputFmt, int X, int Y>
|
template <int outputFmt, int X, int Y>
|
||||||
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
|
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
|
||||||
{
|
{
|
||||||
/*for (int y = 0; y < height; y++)
|
for (int y = 0; y < height; y++)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < width; x++)
|
for (int x = 0; x < width; x++)
|
||||||
{
|
{
|
||||||
|
@ -578,43 +571,7 @@ void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* pa
|
||||||
}
|
}
|
||||||
output[x + y * width] = res;
|
output[x + y * width] = res;
|
||||||
}
|
}
|
||||||
}*/
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void Convert16ColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
|
|
||||||
{
|
|
||||||
/*uint8x16x2_t palette = vld2q_u8((u8*)palData);
|
|
||||||
|
|
||||||
uint8x16_t paletteR, paletteG, paletteB;
|
|
||||||
RGB5ToRGB6(palette.val[0], palette.val[1], paletteR, paletteG, paletteB);
|
|
||||||
|
|
||||||
uint8x16_t firstEntryAlpha = vdupq_n_u8(color0Transparent ? 0 : 0x1F);
|
|
||||||
|
|
||||||
for (int i = 0; i < width*height/2; i += 16)
|
|
||||||
{
|
|
||||||
uint8x16_t packedIndices = vld1q_u8(&texData[i]);
|
|
||||||
|
|
||||||
// unpack indices
|
|
||||||
uint8x16_t oddIndices = vandq_u8(packedIndices, vdupq_n_u8(0xF));
|
|
||||||
uint8x16_t evenIndices = vshrq_n_u8(packedIndices, 4);
|
|
||||||
|
|
||||||
uint8x16_t indices0 = vzip1q_u8(oddIndices, evenIndices);
|
|
||||||
uint8x16_t indices1 = vzip2q_u8(oddIndices, evenIndices);
|
|
||||||
|
|
||||||
// palettise
|
|
||||||
uint8x16x4_t finalPixels0, finalPixels1;
|
|
||||||
finalPixels0.val[0] = vqtbl1q_u8(paletteR, indices0);
|
|
||||||
finalPixels0.val[1] = vqtbl1q_u8(paletteG, indices0);
|
|
||||||
finalPixels0.val[2] = vqtbl1q_u8(paletteB, indices0);
|
|
||||||
finalPixels0.val[3] = vbslq_u8(vceqzq_u8(indices0), firstEntryAlpha, vdupq_n_u8(0x1F));
|
|
||||||
finalPixels1.val[0] = vqtbl1q_u8(paletteR, indices1);
|
|
||||||
finalPixels1.val[1] = vqtbl1q_u8(paletteG, indices1);
|
|
||||||
finalPixels1.val[2] = vqtbl1q_u8(paletteB, indices1);
|
|
||||||
finalPixels1.val[3] = vbslq_u8(vceqzq_u8(indices1), firstEntryAlpha, vdupq_n_u8(0x1F));
|
|
||||||
|
|
||||||
vst4q_u8((u8*)&output[i*2], finalPixels0);
|
|
||||||
vst4q_u8((u8*)&output[i*2+16], finalPixels1);
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int outputFmt, int colorBits>
|
template <int outputFmt, int colorBits>
|
||||||
|
@ -688,22 +645,12 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
|
||||||
{
|
{
|
||||||
entry.TextureRAMSize[0] = width*height*2;
|
entry.TextureRAMSize[0] = width*height*2;
|
||||||
|
|
||||||
/*for (u32 i = 0; i < width*height; i += 16)
|
for (u32 i = 0; i < width*height; i += 2)
|
||||||
{
|
{
|
||||||
uint8x16x2_t pixels = vld2q_u8(&GPU::VRAMFlat_Texture[addr + i * 2]);
|
u16 value = *(u16*)&GPU::VRAMFlat_Texture[addr + i * 2];
|
||||||
|
|
||||||
uint8x16_t red, green, blue;
|
TextureDecodingBuffer[i] = ConvertRGB5ToRGB6(value);
|
||||||
RGB5ToRGB6(pixels.val[0], pixels.val[1], red, green, blue);
|
}
|
||||||
uint8x16_t alpha = vbslq_u8(vtstq_u8(pixels.val[1], vdupq_n_u8(0x80)), vdupq_n_u8(0x1F), vdupq_n_u8(0));
|
|
||||||
|
|
||||||
vst4q_u8((u8*)&TextureDecodingBuffer[i],
|
|
||||||
{
|
|
||||||
red,
|
|
||||||
green,
|
|
||||||
blue,
|
|
||||||
alpha
|
|
||||||
});
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
else if (fmt == 5)
|
else if (fmt == 5)
|
||||||
{
|
{
|
||||||
|
@ -756,7 +703,7 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
|
||||||
case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, TextureDecodingBuffer, texData, palData); break;
|
case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, TextureDecodingBuffer, texData, palData); break;
|
||||||
case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, TextureDecodingBuffer, texData, palData); break;
|
case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, TextureDecodingBuffer, texData, palData); break;
|
||||||
case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
|
case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
|
||||||
case 3: Convert16ColorsTexture(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
|
case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
|
||||||
case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
|
case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, TextureDecodingBuffer, texData, palData, color0Transparent); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -772,55 +719,36 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
|
||||||
auto& texArrays = TexArrays[widthLog2][heightLog2];
|
auto& texArrays = TexArrays[widthLog2][heightLog2];
|
||||||
auto& freeTextures = FreeTextures[widthLog2][heightLog2];
|
auto& freeTextures = FreeTextures[widthLog2][heightLog2];
|
||||||
|
|
||||||
/*if (freeTextures.size() == 0)
|
if (freeTextures.size() == 0)
|
||||||
{
|
{
|
||||||
texArrays.resize(texArrays.size()+1);
|
texArrays.resize(texArrays.size()+1);
|
||||||
TexArray& array = texArrays[texArrays.size()-1];
|
GLuint& array = texArrays[texArrays.size()-1];
|
||||||
|
|
||||||
u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64);
|
u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64);
|
||||||
|
|
||||||
// allocate new array texture
|
// allocate new array texture
|
||||||
dk::ImageLayout imageLayout;
|
glGenTextures(1, &array);
|
||||||
dk::ImageLayoutMaker{Gfx::Device}
|
glBindTexture(GL_TEXTURE_2D_ARRAY, array);
|
||||||
.setType(DkImageType_2DArray)
|
glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8UI, width, height, layers);
|
||||||
.setFormat(DkImageFormat_RGBA8_Uint)
|
|
||||||
.setDimensions(width, height, layers)
|
|
||||||
.initialize(imageLayout);
|
|
||||||
|
|
||||||
assert(FreeImageDescriptorsCount > 0);
|
|
||||||
array.ImageDescriptor = FreeImageDescriptors[--FreeImageDescriptorsCount];
|
|
||||||
|
|
||||||
array.Memory = Gfx::TextureHeap->Alloc(imageLayout.getSize(), imageLayout.getAlignment());
|
|
||||||
array.Image.initialize(imageLayout, Gfx::TextureHeap->MemBlock, array.Memory.Offset);
|
|
||||||
|
|
||||||
dk::ImageDescriptor descriptor;
|
|
||||||
descriptor.initialize(array.Image);
|
|
||||||
DkGpuAddr descriptors = Gfx::DataHeap->GpuAddr(ImageDescriptors);
|
|
||||||
EmuCmdBuf.pushData(descriptors + (descriptorOffset_TexcacheStart + array.ImageDescriptor) * sizeof(DkImageDescriptor),
|
|
||||||
&descriptor,
|
|
||||||
sizeof(DkImageDescriptor));
|
|
||||||
|
|
||||||
//printf("allocating new layer set for %d %d %d %d\n", width, height, texArrays.size()-1, array.ImageDescriptor);
|
//printf("allocating new layer set for %d %d %d %d\n", width, height, texArrays.size()-1, array.ImageDescriptor);
|
||||||
|
|
||||||
for (u16 i = 0; i < layers; i++)
|
for (u32 i = 0; i < layers; i++)
|
||||||
{
|
{
|
||||||
freeTextures.push_back(TexArrayEntry{(u16)(texArrays.size()-1), i});
|
freeTextures.push_back(TexArrayEntry{array, i});
|
||||||
}
|
}
|
||||||
}*/
|
}
|
||||||
|
|
||||||
TexArrayEntry storagePlace = freeTextures[freeTextures.size()-1];
|
TexArrayEntry storagePlace = freeTextures[freeTextures.size()-1];
|
||||||
freeTextures.pop_back();
|
freeTextures.pop_back();
|
||||||
|
|
||||||
TexArray& array = texArrays[storagePlace.TexArrayIdx];
|
|
||||||
//printf("using storage place %d %d | %d %d (%d)\n", width, height, storagePlace.TexArrayIdx, storagePlace.LayerIdx, array.ImageDescriptor);
|
//printf("using storage place %d %d | %d %d (%d)\n", width, height, storagePlace.TexArrayIdx, storagePlace.LayerIdx, array.ImageDescriptor);
|
||||||
|
|
||||||
/*UploadBuf.UploadAndCopyTexture(Gfx::EmuCmdBuf, array.Image,
|
glBindTexture(GL_TEXTURE_2D_ARRAY, storagePlace.TextureID);
|
||||||
(u8*)TextureDecodingBuffer,
|
glTexSubImage3D(GL_TEXTURE_2D_ARRAY,
|
||||||
0, 0, width, height,
|
0, 0, 0, storagePlace.Layer,
|
||||||
width*4,
|
width, height, 1,
|
||||||
storagePlace.LayerIdx);*/
|
GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, TextureDecodingBuffer);
|
||||||
|
|
||||||
entry.DescriptorIdx = array.ImageDescriptor;
|
|
||||||
entry.Texture = storagePlace;
|
entry.Texture = storagePlace;
|
||||||
|
|
||||||
return TexCache.emplace(std::make_pair(key, entry)).first->second;
|
return TexCache.emplace(std::make_pair(key, entry)).first->second;
|
||||||
|
@ -828,7 +756,7 @@ ComputeRenderer::TexCacheEntry& ComputeRenderer::GetTexture(u32 texParam, u32 pa
|
||||||
|
|
||||||
struct Variant
|
struct Variant
|
||||||
{
|
{
|
||||||
s16 Texture, Sampler;
|
GLuint Texture, Sampler;
|
||||||
u16 Width, Height;
|
u16 Width, Height;
|
||||||
u8 BlendMode;
|
u8 BlendMode;
|
||||||
|
|
||||||
|
@ -841,11 +769,11 @@ struct Variant
|
||||||
/*
|
/*
|
||||||
Antialiasing
|
Antialiasing
|
||||||
W-Buffer
|
W-Buffer
|
||||||
Mit Textur
|
With Texture
|
||||||
0
|
0
|
||||||
1, 3
|
1, 3
|
||||||
2
|
2
|
||||||
Ohne Textur
|
without Texture
|
||||||
2
|
2
|
||||||
0, 1, 3
|
0, 1, 3
|
||||||
|
|
||||||
|
@ -925,6 +853,18 @@ void ComputeRenderer::RenderFrame()
|
||||||
int numYSpans = 0;
|
int numYSpans = 0;
|
||||||
int numSetupIndices = 0;
|
int numSetupIndices = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Some games really like to spam small textures, often
|
||||||
|
to store the data like PPU tiles. E.g. Shantae
|
||||||
|
or some Mega Man game. Fortunately they are usually kind
|
||||||
|
enough to not vary the texture size all too often (usually
|
||||||
|
they just use 8x8 or 16x for everything).
|
||||||
|
|
||||||
|
This is the reason we have this whole mess where textures of
|
||||||
|
the same size are put into array textures. This allows
|
||||||
|
to increase the batch size.
|
||||||
|
Less variance between each Variant hah!
|
||||||
|
*/
|
||||||
u32 numVariants = 0, prevVariant, prevTexLayer;
|
u32 numVariants = 0, prevVariant, prevTexLayer;
|
||||||
Variant variants[MaxVariants];
|
Variant variants[MaxVariants];
|
||||||
|
|
||||||
|
@ -951,6 +891,8 @@ void ComputeRenderer::RenderFrame()
|
||||||
bool foundVariant = false;
|
bool foundVariant = false;
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
{
|
{
|
||||||
|
// if the whole texture attribute matches
|
||||||
|
// the texture layer will also match
|
||||||
Polygon* prevPolygon = RenderPolygonRAM[i - 1];
|
Polygon* prevPolygon = RenderPolygonRAM[i - 1];
|
||||||
foundVariant = prevPolygon->TexParam == polygon->TexParam
|
foundVariant = prevPolygon->TexParam == polygon->TexParam
|
||||||
&& prevPolygon->TexPalette == polygon->TexPalette
|
&& prevPolygon->TexPalette == polygon->TexPalette
|
||||||
|
@ -964,9 +906,10 @@ void ComputeRenderer::RenderFrame()
|
||||||
{
|
{
|
||||||
Variant variant;
|
Variant variant;
|
||||||
variant.BlendMode = polygon->IsShadowMask ? 4 : ((polygon->Attr >> 4) & 0x3);
|
variant.BlendMode = polygon->IsShadowMask ? 4 : ((polygon->Attr >> 4) & 0x3);
|
||||||
variant.Texture = -1;
|
variant.Texture = 0;
|
||||||
variant.Sampler = -1;
|
variant.Sampler = 0;
|
||||||
TexCacheEntry* texcacheEntry = nullptr;
|
TexCacheEntry* texcacheEntry = nullptr;
|
||||||
|
// we always need to look up the texture to get the layer of the array texture
|
||||||
if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7)
|
if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7)
|
||||||
{
|
{
|
||||||
texcacheEntry = &GetTexture(polygon->TexParam, polygon->TexPalette);
|
texcacheEntry = &GetTexture(polygon->TexParam, polygon->TexPalette);
|
||||||
|
@ -974,9 +917,10 @@ void ComputeRenderer::RenderFrame()
|
||||||
bool wrapT = (polygon->TexParam >> 17) & 1;
|
bool wrapT = (polygon->TexParam >> 17) & 1;
|
||||||
bool mirrorS = (polygon->TexParam >> 18) & 1;
|
bool mirrorS = (polygon->TexParam >> 18) & 1;
|
||||||
bool mirrorT = (polygon->TexParam >> 19) & 1;
|
bool mirrorT = (polygon->TexParam >> 19) & 1;
|
||||||
variant.Sampler = (wrapS ? (mirrorS ? 2 : 1) : 0) + (wrapT ? (mirrorT ? 2 : 1) : 0) * 3;
|
variant.Sampler = Samplers[(wrapS ? (mirrorS ? 2 : 1) : 0) + (wrapT ? (mirrorT ? 2 : 1) : 0) * 3];
|
||||||
variant.Texture = texcacheEntry->DescriptorIdx;
|
variant.Texture = texcacheEntry->Texture.TextureID;
|
||||||
prevTexLayer = texcacheEntry->Texture.LayerIdx;
|
prevTexLayer = texcacheEntry->Texture.Layer;
|
||||||
|
|
||||||
if (texcacheEntry->LastVariant < numVariants && variants[texcacheEntry->LastVariant] == variant)
|
if (texcacheEntry->LastVariant < numVariants && variants[texcacheEntry->LastVariant] == variant)
|
||||||
{
|
{
|
||||||
foundVariant = true;
|
foundVariant = true;
|
||||||
|
@ -1272,7 +1216,8 @@ void ComputeRenderer::RenderFrame()
|
||||||
u32 fogA = (RenderFogColor >> 16) & 0x1F;
|
u32 fogA = (RenderFogColor >> 16) & 0x1F;
|
||||||
meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24);
|
meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24);
|
||||||
}
|
}
|
||||||
meta.XScroll = RenderXPos;
|
meta.XScroll = 0;
|
||||||
|
//meta.XScroll = RenderXPos;
|
||||||
|
|
||||||
glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory);
|
glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory);
|
||||||
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(MetaUniform), &meta);
|
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(MetaUniform), &meta);
|
||||||
|
@ -1312,6 +1257,8 @@ void ComputeRenderer::RenderFrame()
|
||||||
glDispatchComputeIndirect(offsetof(BinResult, SortWorkWorkCount));
|
glDispatchComputeIndirect(offsetof(BinResult, SortWorkWorkCount));
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
|
||||||
// rasterise
|
// rasterise
|
||||||
{
|
{
|
||||||
bool highLightMode = RenderDispCnt & (1<<1);
|
bool highLightMode = RenderDispCnt & (1<<1);
|
||||||
|
@ -1338,7 +1285,7 @@ void ComputeRenderer::RenderFrame()
|
||||||
};
|
};
|
||||||
|
|
||||||
GLuint prevShader = 0;
|
GLuint prevShader = 0;
|
||||||
s32 prevTexture = -1, prevSampler = -1;
|
s32 prevTexture = 0, prevSampler = 0;
|
||||||
for (int i = 0; i < numVariants; i++)
|
for (int i = 0; i < numVariants; i++)
|
||||||
{
|
{
|
||||||
GLuint shader = 0;
|
GLuint shader = 0;
|
||||||
|
@ -1349,11 +1296,14 @@ void ComputeRenderer::RenderFrame()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
shader = shadersUseTexture[variants[i].BlendMode];
|
shader = shadersUseTexture[variants[i].BlendMode];
|
||||||
if (variants[i].Texture != prevTexture || variants[i].Sampler != prevSampler)
|
if (variants[i].Texture != prevTexture)
|
||||||
{
|
{
|
||||||
assert(variants[i].Sampler < 9);
|
glBindTexture(GL_TEXTURE_2D_ARRAY, variants[i].Texture);
|
||||||
glBindTexture(GL_TEXTURE_2D, variants[i].Texture);
|
|
||||||
prevTexture = variants[i].Texture;
|
prevTexture = variants[i].Texture;
|
||||||
|
}
|
||||||
|
if (variants[i].Sampler != prevSampler)
|
||||||
|
{
|
||||||
|
glBindSampler(0, variants[i].Sampler);
|
||||||
prevSampler = variants[i].Sampler;
|
prevSampler = variants[i].Sampler;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1364,7 +1314,7 @@ void ComputeRenderer::RenderFrame()
|
||||||
prevShader = shader;
|
prevShader = shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
glUniform1i(UniformIdxCurVariant, i);
|
glUniform1ui(UniformIdxCurVariant, i);
|
||||||
glUniform2f(UniformIdxTextureSize, 1.f / variants[i].Width, 1.f / variants[i].Height);
|
glUniform2f(UniformIdxTextureSize, 1.f / variants[i].Width, 1.f / variants[i].Height);
|
||||||
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
|
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
|
||||||
glDispatchComputeIndirect(offsetof(BinResult, VariantWorkCount) + i*4*4);
|
glDispatchComputeIndirect(offsetof(BinResult, VariantWorkCount) + i*4*4);
|
||||||
|
@ -1378,7 +1328,7 @@ void ComputeRenderer::RenderFrame()
|
||||||
glDispatchCompute(256/8, 192/8, 1);
|
glDispatchCompute(256/8, 192/8, 1);
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
//glBindTexture(GL_TEXTURE_2D, )
|
glBindImageTexture(0, Framebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
|
||||||
u32 finalPassShader = 0;
|
u32 finalPassShader = 0;
|
||||||
if (RenderDispCnt & (1<<4))
|
if (RenderDispCnt & (1<<4))
|
||||||
finalPassShader |= 0x4;
|
finalPassShader |= 0x4;
|
||||||
|
@ -1448,4 +1398,9 @@ u32* ComputeRenderer::GetLine(int line)
|
||||||
return DummyLine;
|
return DummyLine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ComputeRenderer::SetupAccelFrame()
|
||||||
|
{
|
||||||
|
glBindTexture(GL_TEXTURE_2D, Framebuffer);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -48,8 +48,7 @@ public:
|
||||||
void RestartFrame() override;
|
void RestartFrame() override;
|
||||||
u32* GetLine(int line) override;
|
u32* GetLine(int line) override;
|
||||||
|
|
||||||
//dk::Fence FrameReady = {};
|
void SetupAccelFrame() override;
|
||||||
//dk::Fence FrameReserveFence = {};
|
|
||||||
private:
|
private:
|
||||||
GLuint ShaderInterpXSpans[2];
|
GLuint ShaderInterpXSpans[2];
|
||||||
GLuint ShaderBinCombined;
|
GLuint ShaderBinCombined;
|
||||||
|
@ -197,18 +196,12 @@ private:
|
||||||
|
|
||||||
struct TexArrayEntry
|
struct TexArrayEntry
|
||||||
{
|
{
|
||||||
u16 TexArrayIdx;
|
GLuint TextureID;
|
||||||
u16 LayerIdx;
|
u32 Layer;
|
||||||
};
|
|
||||||
struct TexArray
|
|
||||||
{
|
|
||||||
GLuint Image;
|
|
||||||
u32 ImageDescriptor;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TexCacheEntry
|
struct TexCacheEntry
|
||||||
{
|
{
|
||||||
u32 DescriptorIdx;
|
|
||||||
u32 LastVariant; // very cheap way to make variant lookup faster
|
u32 LastVariant; // very cheap way to make variant lookup faster
|
||||||
|
|
||||||
u32 TextureRAMStart[2], TextureRAMSize[2];
|
u32 TextureRAMStart[2], TextureRAMSize[2];
|
||||||
|
@ -239,16 +232,15 @@ private:
|
||||||
};
|
};
|
||||||
GLuint MetaUniformMemory;
|
GLuint MetaUniformMemory;
|
||||||
|
|
||||||
static const u32 TexCacheMaxImages = 4096;
|
|
||||||
|
|
||||||
u32 FreeImageDescriptorsCount = 0;
|
|
||||||
u32 FreeImageDescriptors[TexCacheMaxImages];
|
|
||||||
|
|
||||||
std::vector<TexArrayEntry> FreeTextures[8][8];
|
std::vector<TexArrayEntry> FreeTextures[8][8];
|
||||||
std::vector<TexArray> TexArrays[8][8];
|
std::vector<GLuint> TexArrays[8][8];
|
||||||
|
|
||||||
|
GLuint Samplers[9];
|
||||||
|
|
||||||
u32 TextureDecodingBuffer[1024*1024];
|
u32 TextureDecodingBuffer[1024*1024];
|
||||||
|
|
||||||
|
GLuint Framebuffer;
|
||||||
|
|
||||||
TexCacheEntry& GetTexture(u32 textureParam, u32 paletteParam);
|
TexCacheEntry& GetTexture(u32 textureParam, u32 paletteParam);
|
||||||
|
|
||||||
void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to);
|
void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to);
|
||||||
|
|
|
@ -223,13 +223,8 @@ layout (std140, binding = 0) uniform MetaUniform
|
||||||
uint FogOffset, FogShift, FogColor;
|
uint FogOffset, FogShift, FogColor;
|
||||||
|
|
||||||
int XScroll;
|
int XScroll;
|
||||||
|
|
||||||
// only used/updated for rasteriation
|
|
||||||
uint CurVariant;
|
|
||||||
vec2 InvTextureSize;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#if defined(InterpSpans) || defined(Rasterise)
|
#if defined(InterpSpans) || defined(Rasterise)
|
||||||
uint Umulh(uint a, uint b)
|
uint Umulh(uint a, uint b)
|
||||||
{
|
{
|
||||||
|
@ -920,6 +915,9 @@ layout (local_size_x = TileSize, local_size_y = TileSize) in;
|
||||||
|
|
||||||
layout (binding = 0) uniform usampler2DArray CurrentTexture;
|
layout (binding = 0) uniform usampler2DArray CurrentTexture;
|
||||||
|
|
||||||
|
layout (location = 0) uniform uint CurVariant;
|
||||||
|
layout (location = 1) uniform vec2 InvTextureSize;
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
uvec2 workDesc = SortedWork[SortedWorkOffset[CurVariant] + gl_WorkGroupID.z];
|
uvec2 workDesc = SortedWork[SortedWorkOffset[CurVariant] + gl_WorkGroupID.z];
|
||||||
|
@ -1328,7 +1326,7 @@ const char* FinalPass = R"(
|
||||||
|
|
||||||
layout (local_size_x = 32) in;
|
layout (local_size_x = 32) in;
|
||||||
|
|
||||||
layout (binding = 0, r32ui) writeonly uniform uimage2D FinalFB;
|
layout (binding = 0, rgba8) writeonly uniform image2D FinalFB;
|
||||||
|
|
||||||
uint BlendFog(uint color, uint depth)
|
uint BlendFog(uint color, uint depth)
|
||||||
{
|
{
|
||||||
|
@ -1482,15 +1480,17 @@ void main()
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (bitfieldExtract(color.x, 24, 8) != 0U)
|
// if (bitfieldExtract(color.x, 24, 8) != 0U)
|
||||||
color.x |= 0x40000000U;
|
// color.x |= 0x40000000U;
|
||||||
else
|
// else
|
||||||
color.x = 0U;
|
// color.x = 0U;
|
||||||
|
|
||||||
//if (gl_LocalInvocationID.x == 7 || gl_LocalInvocationID.y == 7)
|
//if (gl_LocalInvocationID.x == 7 || gl_LocalInvocationID.y == 7)
|
||||||
//color.x = 0x1F00001FU | 0x40000000U;
|
//color.x = 0x1F00001FU | 0x40000000U;
|
||||||
|
|
||||||
imageStore(FinalFB, ivec2(gl_GlobalInvocationID.xy), uvec4(color.x, 0, 0, 0));
|
vec4 result = vec4(bitfieldExtract(color.x, 16, 8), bitfieldExtract(color.x, 8, 8), color.x & 0x3FU, bitfieldExtract(color.x, 24, 8));
|
||||||
|
result /= vec4(63.0, 63.0, 63.0, 31.0);
|
||||||
|
imageStore(FinalFB, ivec2(gl_GlobalInvocationID.xy), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
)";
|
)";
|
||||||
|
|
|
@ -37,7 +37,7 @@ public:
|
||||||
virtual void RenderFrame() override;
|
virtual void RenderFrame() override;
|
||||||
virtual u32* GetLine(int line) override;
|
virtual u32* GetLine(int line) override;
|
||||||
|
|
||||||
void SetupAccelFrame();
|
void SetupAccelFrame() override;
|
||||||
void PrepareCaptureFrame();
|
void PrepareCaptureFrame();
|
||||||
|
|
||||||
static std::unique_ptr<GLRenderer> New() noexcept;
|
static std::unique_ptr<GLRenderer> New() noexcept;
|
||||||
|
|
|
@ -213,7 +213,7 @@ void GLCompositor::RenderFrame()
|
||||||
}
|
}
|
||||||
|
|
||||||
glActiveTexture(GL_TEXTURE1);
|
glActiveTexture(GL_TEXTURE1);
|
||||||
reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->SetupAccelFrame();
|
GPU3D::CurrentRenderer->SetupAccelFrame();
|
||||||
|
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID);
|
glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID);
|
||||||
glBindVertexArray(CompVertexArrayID);
|
glBindVertexArray(CompVertexArrayID);
|
||||||
|
|
Loading…
Reference in New Issue