Allow for a more modular renderer backends (#990)
* Draft GPU3D renderer modularization * Update sources C++ standard to C++17 The top-level `CMakeLists.txt` is already using the C++17 standard. * Move GLCompositor into class type Some other misc fixes to push towards better modularity * Make renderer-implementation types move-only These types are going to be holding onto handles of GPU-side resources and shouldn't ever be copied around. * Fix OSX: Remove 'register' storage class specifier `register` has been removed in C++17... But this keyword hasn't done anything in years anyways. OSX builds consider this "warning" an error and it stops the whole build. * Add RestartFrame to Renderer3D interface * Move Accelerated property to Renderer3D interface There are points in the code base where we do: `renderer != 0` to know if we are feeding an openGL renderer. Rather than that we can instead just have this be a property of the renderer itself. With this pattern a renderer can just say how it wants its data to come in rather than have everyone know that they're talking to an OpenGL renderer. * Remove Accelerated flag from GPU * Move 2D_Soft interface in separate header Also make the current 2D engine an "owned" unique_ptr. * Update alignment attribute to standard alignas Uses standardized `alignas` rather than compiler-specific attributes. https://en.cppreference.com/w/cpp/language/alignas * Fix Clang: alignas specifier Alignment must be specified before the array to align the entire array. https://en.cppreference.com/w/cpp/language/alignas * Converted Renderer3D Accelerated to variable This flag is checked a lot during scanline rasterization. So rather than having an expensive vtable-lookup call during mainline rendering code, it is now a public constant bool type that is written to only once during Renderer3D initialization.
This commit is contained in:
parent
891427c75c
commit
a7029aebae
|
@ -1,6 +1,6 @@
|
|||
project(core)
|
||||
|
||||
set (CMAKE_CXX_STANDARD 14)
|
||||
set (CMAKE_CXX_STANDARD 17)
|
||||
|
||||
add_library(core STATIC
|
||||
ARCodeFile.cpp
|
||||
|
|
101
src/GPU.cpp
101
src/GPU.cpp
|
@ -21,6 +21,7 @@
|
|||
#include "NDS.h"
|
||||
#include "GPU.h"
|
||||
|
||||
#include "GPU2D_Soft.h"
|
||||
|
||||
namespace GPU
|
||||
{
|
||||
|
@ -79,11 +80,10 @@ u8* VRAMPtr_BOBJ[0x8];
|
|||
|
||||
int FrontBuffer;
|
||||
u32* Framebuffer[2][2];
|
||||
int Renderer;
|
||||
bool Accelerated;
|
||||
int Renderer = 0;
|
||||
|
||||
GPU2D* GPU2D_A;
|
||||
GPU2D* GPU2D_B;
|
||||
std::unique_ptr<GPU2D> GPU2D_A = {};
|
||||
std::unique_ptr<GPU2D> GPU2D_B = {};
|
||||
|
||||
/*
|
||||
VRAM invalidation tracking
|
||||
|
@ -145,25 +145,28 @@ u8 VRAMFlat_TexPal[128*1024];
|
|||
u32 OAMDirty;
|
||||
u32 PaletteDirty;
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
std::unique_ptr<GLCompositor> CurGLCompositor = {};
|
||||
#endif
|
||||
|
||||
bool Init()
|
||||
{
|
||||
GPU2D_A = new GPU2D_Soft(0);
|
||||
GPU2D_B = new GPU2D_Soft(1);
|
||||
GPU2D_A = std::make_unique<GPU2D_Soft>(0);
|
||||
GPU2D_B = std::make_unique<GPU2D_Soft>(1);
|
||||
if (!GPU3D::Init()) return false;
|
||||
|
||||
FrontBuffer = 0;
|
||||
Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL;
|
||||
Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL;
|
||||
Renderer = 0;
|
||||
Accelerated = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DeInit()
|
||||
{
|
||||
delete GPU2D_A;
|
||||
delete GPU2D_B;
|
||||
GPU2D_A.reset();
|
||||
GPU2D_B.reset();
|
||||
GPU3D::DeInit();
|
||||
|
||||
if (Framebuffer[0][0]) delete[] Framebuffer[0][0];
|
||||
|
@ -250,9 +253,12 @@ void Reset()
|
|||
memset(VRAMPtr_BBG, 0, sizeof(VRAMPtr_BBG));
|
||||
memset(VRAMPtr_BOBJ, 0, sizeof(VRAMPtr_BOBJ));
|
||||
|
||||
int fbsize;
|
||||
if (Accelerated) fbsize = (256*3 + 1) * 192;
|
||||
else fbsize = 256 * 192;
|
||||
size_t fbsize;
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
fbsize = (256*3 + 1) * 192;
|
||||
else
|
||||
fbsize = 256 * 192;
|
||||
|
||||
for (int i = 0; i < fbsize; i++)
|
||||
{
|
||||
Framebuffer[0][0][i] = 0xFFFFFFFF;
|
||||
|
@ -283,17 +289,22 @@ void Reset()
|
|||
void Stop()
|
||||
{
|
||||
int fbsize;
|
||||
if (Accelerated) fbsize = (256*3 + 1) * 192;
|
||||
else fbsize = 256 * 192;
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
fbsize = (256*3 + 1) * 192;
|
||||
else
|
||||
fbsize = 256 * 192;
|
||||
|
||||
memset(Framebuffer[0][0], 0, fbsize*4);
|
||||
memset(Framebuffer[0][1], 0, fbsize*4);
|
||||
memset(Framebuffer[1][0], 0, fbsize*4);
|
||||
memset(Framebuffer[1][1], 0, fbsize*4);
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
if (Accelerated)
|
||||
GLCompositor::Stop();
|
||||
#endif
|
||||
// This needs a better way to know that we're
|
||||
// using the OpenGL renderer specifically
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
CurGLCompositor->Stop();
|
||||
#endif
|
||||
}
|
||||
|
||||
void DoSavestate(Savestate* file)
|
||||
|
@ -382,37 +393,42 @@ void InitRenderer(int renderer)
|
|||
#ifdef OGLRENDERER_ENABLED
|
||||
if (renderer == 1)
|
||||
{
|
||||
if (!GLCompositor::Init())
|
||||
CurGLCompositor = std::make_unique<GLCompositor>();
|
||||
// Create opengl rendrerer
|
||||
if (!CurGLCompositor->Init())
|
||||
{
|
||||
// Fallback on software renderer
|
||||
renderer = 0;
|
||||
GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>();
|
||||
GPU3D::CurrentRenderer->Init();
|
||||
}
|
||||
else if (!GPU3D::GLRenderer::Init())
|
||||
GPU3D::CurrentRenderer = std::make_unique<GPU3D::GLRenderer>();
|
||||
if (!GPU3D::CurrentRenderer->Init())
|
||||
{
|
||||
GLCompositor::DeInit();
|
||||
// Fallback on software renderer
|
||||
CurGLCompositor->DeInit();
|
||||
CurGLCompositor.reset();
|
||||
renderer = 0;
|
||||
GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>();
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
GPU3D::SoftRenderer::Init();
|
||||
GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>();
|
||||
GPU3D::CurrentRenderer->Init();
|
||||
}
|
||||
|
||||
Renderer = renderer;
|
||||
Accelerated = renderer != 0;
|
||||
}
|
||||
|
||||
void DeInitRenderer()
|
||||
{
|
||||
if (Renderer == 0)
|
||||
{
|
||||
GPU3D::SoftRenderer::DeInit();
|
||||
}
|
||||
GPU3D::CurrentRenderer->DeInit();
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
else
|
||||
if (Renderer == 1)
|
||||
{
|
||||
GPU3D::GLRenderer::DeInit();
|
||||
GLCompositor::DeInit();
|
||||
CurGLCompositor->DeInit();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -421,13 +437,13 @@ void ResetRenderer()
|
|||
{
|
||||
if (Renderer == 0)
|
||||
{
|
||||
GPU3D::SoftRenderer::Reset();
|
||||
GPU3D::CurrentRenderer->Reset();
|
||||
}
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
else
|
||||
{
|
||||
GLCompositor::Reset();
|
||||
GPU3D::GLRenderer::Reset();
|
||||
CurGLCompositor->Reset();
|
||||
GPU3D::CurrentRenderer->Reset();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -440,10 +456,12 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
|
|||
InitRenderer(renderer);
|
||||
}
|
||||
|
||||
bool accel = Accelerated;
|
||||
int fbsize;
|
||||
if (accel) fbsize = (256*3 + 1) * 192;
|
||||
else fbsize = 256 * 192;
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
fbsize = (256*3 + 1) * 192;
|
||||
else
|
||||
fbsize = 256 * 192;
|
||||
|
||||
if (Framebuffer[0][0]) { delete[] Framebuffer[0][0]; Framebuffer[0][0] = nullptr; }
|
||||
if (Framebuffer[1][0]) { delete[] Framebuffer[1][0]; Framebuffer[1][0] = nullptr; }
|
||||
if (Framebuffer[0][1]) { delete[] Framebuffer[0][1]; Framebuffer[0][1] = nullptr; }
|
||||
|
@ -461,18 +479,15 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
|
|||
|
||||
AssignFramebuffers();
|
||||
|
||||
GPU2D_A->SetRenderSettings(accel);
|
||||
GPU2D_B->SetRenderSettings(accel);
|
||||
|
||||
if (Renderer == 0)
|
||||
{
|
||||
GPU3D::SoftRenderer::SetRenderSettings(settings);
|
||||
GPU3D::CurrentRenderer->SetRenderSettings(settings);
|
||||
}
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
else
|
||||
{
|
||||
GLCompositor::SetRenderSettings(settings);
|
||||
GPU3D::GLRenderer::SetRenderSettings(settings);
|
||||
CurGLCompositor->SetRenderSettings(settings);
|
||||
GPU3D::CurrentRenderer->SetRenderSettings(settings);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -1149,7 +1164,9 @@ void StartScanline(u32 line)
|
|||
GPU3D::VBlank();
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
if (Accelerated) GLCompositor::RenderFrame();
|
||||
// Need a better way to identify the openGL renderer in particular
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
CurGLCompositor->RenderFrame();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
32
src/GPU.h
32
src/GPU.h
|
@ -19,9 +19,15 @@
|
|||
#ifndef GPU_H
|
||||
#define GPU_H
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "GPU2D.h"
|
||||
#include "NonStupidBitfield.h"
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
#include "GPU_OpenGL.h"
|
||||
#endif
|
||||
|
||||
namespace GPU
|
||||
{
|
||||
|
||||
|
@ -69,8 +75,8 @@ extern u8* VRAMPtr_BOBJ[0x8];
|
|||
extern int FrontBuffer;
|
||||
extern u32* Framebuffer[2][2];
|
||||
|
||||
extern GPU2D* GPU2D_A;
|
||||
extern GPU2D* GPU2D_B;
|
||||
extern std::unique_ptr<GPU2D> GPU2D_A;
|
||||
extern std::unique_ptr<GPU2D> GPU2D_B;
|
||||
|
||||
extern int Renderer;
|
||||
|
||||
|
@ -149,6 +155,10 @@ void SyncDirtyFlags();
|
|||
extern u32 OAMDirty;
|
||||
extern u32 PaletteDirty;
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
extern std::unique_ptr<GLCompositor> CurGLCompositor;
|
||||
#endif
|
||||
|
||||
struct RenderSettings
|
||||
{
|
||||
bool Soft_Threaded;
|
||||
|
@ -550,24 +560,6 @@ void DisplayFIFO(u32 x);
|
|||
void SetDispStat(u32 cpu, u16 val);
|
||||
|
||||
void SetVCount(u16 val);
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
namespace GLCompositor
|
||||
{
|
||||
|
||||
bool Init();
|
||||
void DeInit();
|
||||
void Reset();
|
||||
|
||||
void SetRenderSettings(RenderSettings& settings);
|
||||
|
||||
void Stop();
|
||||
void RenderFrame();
|
||||
void BindOutputTexture(int buf);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#include "GPU3D.h"
|
||||
|
|
69
src/GPU2D.h
69
src/GPU2D.h
|
@ -28,13 +28,15 @@ public:
|
|||
GPU2D(u32 num);
|
||||
virtual ~GPU2D() {}
|
||||
|
||||
GPU2D(const GPU2D&) = delete;
|
||||
GPU2D& operator=(const GPU2D&) = delete;
|
||||
|
||||
void Reset();
|
||||
|
||||
void DoSavestate(Savestate* file);
|
||||
|
||||
void SetEnabled(bool enable) { Enabled = enable; }
|
||||
void SetFramebuffer(u32* buf);
|
||||
virtual void SetRenderSettings(bool accel) = 0;
|
||||
|
||||
u8 Read8(u32 addr);
|
||||
u16 Read16(u32 addr);
|
||||
|
@ -115,8 +117,8 @@ protected:
|
|||
|
||||
u16 MasterBrightness;
|
||||
|
||||
u8 WindowMask[256] __attribute__((aligned (8)));
|
||||
u8 OBJWindow[256] __attribute__((aligned (8)));
|
||||
alignas(8) u8 WindowMask[256];
|
||||
alignas(8) u8 OBJWindow[256];
|
||||
|
||||
void UpdateMosaicCounters(u32 line);
|
||||
void CalculateWindowMask(u32 line);
|
||||
|
@ -124,65 +126,4 @@ protected:
|
|||
virtual void MosaicXSizeChanged() = 0;
|
||||
};
|
||||
|
||||
class GPU2D_Soft : public GPU2D
|
||||
{
|
||||
public:
|
||||
GPU2D_Soft(u32 num);
|
||||
~GPU2D_Soft() override {}
|
||||
|
||||
void SetRenderSettings(bool accel) override;
|
||||
|
||||
void DrawScanline(u32 line) override;
|
||||
void DrawSprites(u32 line) override;
|
||||
void VBlankEnd() override;
|
||||
|
||||
protected:
|
||||
void MosaicXSizeChanged() override;
|
||||
|
||||
private:
|
||||
bool Accelerated;
|
||||
|
||||
u32 BGOBJLine[256*3] __attribute__((aligned (8)));
|
||||
u32* _3DLine;
|
||||
|
||||
u32 OBJLine[256] __attribute__((aligned (8)));
|
||||
u8 OBJIndex[256] __attribute__((aligned (8)));
|
||||
|
||||
u32 NumSprites;
|
||||
|
||||
u8 MosaicTable[16][256];
|
||||
u8* CurBGXMosaicTable;
|
||||
u8* CurOBJXMosaicTable;
|
||||
|
||||
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
|
||||
u32 ColorBlend5(u32 val1, u32 val2);
|
||||
u32 ColorBrightnessUp(u32 val, u32 factor);
|
||||
u32 ColorBrightnessDown(u32 val, u32 factor);
|
||||
u32 ColorComposite(int i, u32 val1, u32 val2);
|
||||
|
||||
template<u32 bgmode> void DrawScanlineBGMode(u32 line);
|
||||
void DrawScanlineBGMode6(u32 line);
|
||||
void DrawScanlineBGMode7(u32 line);
|
||||
void DrawScanline_BGOBJ(u32 line);
|
||||
|
||||
static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
|
||||
static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
|
||||
|
||||
typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
|
||||
|
||||
void DrawBG_3D();
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum);
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum);
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum);
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line);
|
||||
|
||||
void ApplySpriteMosaicX();
|
||||
template<DrawPixel drawPixel>
|
||||
void InterleaveSprites(u32 prio);
|
||||
template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
|
||||
template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
|
||||
|
||||
void DoCapture(u32 line, u32 width);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "GPU2D.h"
|
||||
#include "GPU2D_Soft.h"
|
||||
#include "GPU.h"
|
||||
|
||||
GPU2D_Soft::GPU2D_Soft(u32 num)
|
||||
|
@ -15,11 +15,6 @@ GPU2D_Soft::GPU2D_Soft(u32 num)
|
|||
}
|
||||
}
|
||||
|
||||
void GPU2D_Soft::SetRenderSettings(bool accel)
|
||||
{
|
||||
Accelerated = accel;
|
||||
}
|
||||
|
||||
u32 GPU2D_Soft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb)
|
||||
{
|
||||
u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4;
|
||||
|
@ -152,7 +147,7 @@ u32 GPU2D_Soft::ColorComposite(int i, u32 val1, u32 val2)
|
|||
|
||||
void GPU2D_Soft::DrawScanline(u32 line)
|
||||
{
|
||||
int stride = Accelerated ? (256*3 + 1) : 256;
|
||||
int stride = GPU3D::CurrentRenderer->Accelerated ? (256*3 + 1) : 256;
|
||||
u32* dst = &Framebuffer[stride * line];
|
||||
|
||||
int n3dline = line;
|
||||
|
@ -192,7 +187,7 @@ void GPU2D_Soft::DrawScanline(u32 line)
|
|||
|
||||
if (Num == 0)
|
||||
{
|
||||
if (!Accelerated)
|
||||
if (!GPU3D::CurrentRenderer->Accelerated)
|
||||
_3DLine = GPU3D::GetLine(n3dline);
|
||||
else if (CaptureLatch && (((CaptureCnt >> 29) & 0x3) != 1))
|
||||
{
|
||||
|
@ -206,7 +201,7 @@ void GPU2D_Soft::DrawScanline(u32 line)
|
|||
for (int i = 0; i < 256; i++)
|
||||
dst[i] = 0xFFFFFFFF;
|
||||
|
||||
if (Accelerated)
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
{
|
||||
dst[256*3] = 0;
|
||||
}
|
||||
|
@ -296,7 +291,7 @@ void GPU2D_Soft::DrawScanline(u32 line)
|
|||
DoCapture(line, capwidth);
|
||||
}
|
||||
|
||||
if (Accelerated)
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
{
|
||||
dst[256*3] = MasterBrightness | (DispCnt & 0x30000);
|
||||
return;
|
||||
|
@ -350,11 +345,11 @@ void GPU2D_Soft::VBlankEnd()
|
|||
GPU2D::VBlankEnd();
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
if (Accelerated)
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
{
|
||||
if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1))
|
||||
{
|
||||
GPU3D::GLRenderer::PrepareCaptureFrame();
|
||||
reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -372,7 +367,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
|
|||
u16* dst = (u16*)GPU::VRAM[dstvram];
|
||||
u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
|
||||
|
||||
// TODO: handle 3D in accelerated mode!!
|
||||
// TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!!
|
||||
|
||||
u32* srcA;
|
||||
if (CaptureCnt & (1<<24))
|
||||
|
@ -382,9 +377,9 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
|
|||
else
|
||||
{
|
||||
srcA = BGOBJLine;
|
||||
if (Accelerated)
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
{
|
||||
// in accelerated mode, compositing is normally done on the GPU
|
||||
// in GPU3D::CurrentRenderer->Accelerated mode, compositing is normally done on the GPU
|
||||
// but when doing display capture, we do need the composited output
|
||||
// so we do it here
|
||||
|
||||
|
@ -586,12 +581,12 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
|
|||
{ \
|
||||
if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \
|
||||
{ \
|
||||
if (Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \
|
||||
if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \
|
||||
else DrawBG_##type<true, DrawPixel_Normal>(line, num); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if (Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \
|
||||
if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \
|
||||
else DrawBG_##type<false, DrawPixel_Normal>(line, num); \
|
||||
} \
|
||||
} while (false)
|
||||
|
@ -601,18 +596,18 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
|
|||
{ \
|
||||
if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \
|
||||
{ \
|
||||
if (Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \
|
||||
if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \
|
||||
else DrawBG_Large<true, DrawPixel_Normal>(line); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if (Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \
|
||||
if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \
|
||||
else DrawBG_Large<false, DrawPixel_Normal>(line); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define DoInterleaveSprites(prio) \
|
||||
if (Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio);
|
||||
if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio);
|
||||
|
||||
template<u32 bgmode>
|
||||
void GPU2D_Soft::DrawScanlineBGMode(u32 line)
|
||||
|
@ -773,7 +768,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line)
|
|||
// color special effects
|
||||
// can likely be optimized
|
||||
|
||||
if (!Accelerated)
|
||||
if (!GPU3D::CurrentRenderer->Accelerated)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
|
@ -919,7 +914,7 @@ void GPU2D_Soft::DrawBG_3D()
|
|||
{
|
||||
int i = 0;
|
||||
|
||||
if (Accelerated)
|
||||
if (GPU3D::CurrentRenderer->Accelerated)
|
||||
{
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
Copyright 2016-2020 Arisotura
|
||||
|
||||
This file is part of melonDS.
|
||||
|
||||
melonDS is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation, either version 3 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GPU2D.h"
|
||||
|
||||
class GPU2D_Soft : public GPU2D
|
||||
{
|
||||
public:
|
||||
GPU2D_Soft(u32 num);
|
||||
~GPU2D_Soft() override {}
|
||||
|
||||
void DrawScanline(u32 line) override;
|
||||
void DrawSprites(u32 line) override;
|
||||
void VBlankEnd() override;
|
||||
|
||||
protected:
|
||||
void MosaicXSizeChanged() override;
|
||||
|
||||
private:
|
||||
|
||||
alignas(8) u32 BGOBJLine[256*3];
|
||||
u32* _3DLine;
|
||||
|
||||
alignas(8) u32 OBJLine[256];
|
||||
alignas(8) u8 OBJIndex[256];
|
||||
|
||||
u32 NumSprites;
|
||||
|
||||
u8 MosaicTable[16][256];
|
||||
u8* CurBGXMosaicTable;
|
||||
u8* CurOBJXMosaicTable;
|
||||
|
||||
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
|
||||
u32 ColorBlend5(u32 val1, u32 val2);
|
||||
u32 ColorBrightnessUp(u32 val, u32 factor);
|
||||
u32 ColorBrightnessDown(u32 val, u32 factor);
|
||||
u32 ColorComposite(int i, u32 val1, u32 val2);
|
||||
|
||||
template<u32 bgmode> void DrawScanlineBGMode(u32 line);
|
||||
void DrawScanlineBGMode6(u32 line);
|
||||
void DrawScanlineBGMode7(u32 line);
|
||||
void DrawScanline_BGOBJ(u32 line);
|
||||
|
||||
static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
|
||||
static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
|
||||
|
||||
typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
|
||||
|
||||
void DrawBG_3D();
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum);
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum);
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum);
|
||||
template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line);
|
||||
|
||||
void ApplySpriteMosaicX();
|
||||
template<DrawPixel drawPixel>
|
||||
void InterleaveSprites(u32 prio);
|
||||
template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
|
||||
template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
|
||||
|
||||
void DoCapture(u32 line, u32 width);
|
||||
};
|
|
@ -273,7 +273,7 @@ u32 RenderNumPolygons;
|
|||
u32 FlushRequest;
|
||||
u32 FlushAttributes;
|
||||
|
||||
|
||||
std::unique_ptr<GPU3D::Renderer3D> CurrentRenderer = {};
|
||||
|
||||
bool Init()
|
||||
{
|
||||
|
@ -2497,12 +2497,12 @@ void CheckFIFODMA()
|
|||
|
||||
void VCount144()
|
||||
{
|
||||
if (GPU::Renderer == 0) SoftRenderer::VCount144();
|
||||
CurrentRenderer->VCount144();
|
||||
}
|
||||
|
||||
void RestartFrame()
|
||||
{
|
||||
if (GPU::Renderer == 0) SoftRenderer::SetupRenderThread();
|
||||
CurrentRenderer->RestartFrame();
|
||||
}
|
||||
|
||||
|
||||
|
@ -2597,10 +2597,7 @@ void VBlank()
|
|||
|
||||
void VCount215()
|
||||
{
|
||||
if (GPU::Renderer == 0) SoftRenderer::RenderFrame();
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
else GLRenderer::RenderFrame();
|
||||
#endif
|
||||
CurrentRenderer->RenderFrame();
|
||||
}
|
||||
|
||||
void SetRenderXPos(u16 xpos)
|
||||
|
@ -2614,12 +2611,7 @@ u32 ScrolledLine[256];
|
|||
|
||||
u32* GetLine(int line)
|
||||
{
|
||||
u32* rawline = NULL;
|
||||
|
||||
if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line);
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
else rawline = GLRenderer::GetLine(line);
|
||||
#endif
|
||||
u32* rawline = CurrentRenderer->GetLine(line);
|
||||
|
||||
if (RenderXPos == 0) return rawline;
|
||||
|
||||
|
@ -3055,5 +3047,9 @@ void Write32(u32 addr, u32 val)
|
|||
printf("unknown GPU3D write32 %08X %08X\n", addr, val);
|
||||
}
|
||||
|
||||
Renderer3D::Renderer3D(bool Accelerated)
|
||||
: Accelerated(Accelerated)
|
||||
{ }
|
||||
|
||||
}
|
||||
|
||||
|
|
59
src/GPU3D.h
59
src/GPU3D.h
|
@ -20,6 +20,9 @@
|
|||
#define GPU3D_H
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "GPU.h"
|
||||
#include "Savestate.h"
|
||||
|
||||
namespace GPU3D
|
||||
|
@ -96,8 +99,6 @@ extern u32 RenderNumPolygons;
|
|||
|
||||
extern u64 Timestamp;
|
||||
|
||||
extern int Renderer;
|
||||
|
||||
bool Init();
|
||||
void DeInit();
|
||||
void Reset();
|
||||
|
@ -131,40 +132,42 @@ void Write8(u32 addr, u8 val);
|
|||
void Write16(u32 addr, u16 val);
|
||||
void Write32(u32 addr, u32 val);
|
||||
|
||||
namespace SoftRenderer
|
||||
class Renderer3D
|
||||
{
|
||||
public:
|
||||
Renderer3D(bool Accelerated);
|
||||
virtual ~Renderer3D() {};
|
||||
|
||||
bool Init();
|
||||
void DeInit();
|
||||
void Reset();
|
||||
Renderer3D(const Renderer3D&) = delete;
|
||||
Renderer3D& operator=(const Renderer3D&) = delete;
|
||||
|
||||
void SetRenderSettings(GPU::RenderSettings& settings);
|
||||
void SetupRenderThread();
|
||||
virtual bool Init() = 0;
|
||||
virtual void DeInit() = 0;
|
||||
virtual void Reset() = 0;
|
||||
|
||||
void VCount144();
|
||||
void RenderFrame();
|
||||
u32* GetLine(int line);
|
||||
// This "Accelerated" flag currently communicates if the framebuffer should
|
||||
// be allocated differently and other little misc handlers. Ideally there
|
||||
// are more detailed "traits" that we can ask of the Renderer3D type
|
||||
const bool Accelerated;
|
||||
|
||||
virtual void SetRenderSettings(GPU::RenderSettings& settings) = 0;
|
||||
|
||||
virtual void VCount144() {};
|
||||
|
||||
virtual void RenderFrame() = 0;
|
||||
virtual void RestartFrame() {};
|
||||
virtual u32* GetLine(int line) = 0;
|
||||
};
|
||||
|
||||
extern int Renderer;
|
||||
extern std::unique_ptr<Renderer3D> CurrentRenderer;
|
||||
|
||||
}
|
||||
|
||||
#include "GPU3D_Soft.h"
|
||||
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
namespace GLRenderer
|
||||
{
|
||||
|
||||
bool Init();
|
||||
void DeInit();
|
||||
void Reset();
|
||||
|
||||
void SetRenderSettings(GPU::RenderSettings& settings);
|
||||
|
||||
void RenderFrame();
|
||||
void PrepareCaptureFrame();
|
||||
u32* GetLine(int line);
|
||||
void SetupAccelFrame();
|
||||
|
||||
}
|
||||
#include "GPU3D_OpenGL.h"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -16,118 +16,19 @@
|
|||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#include "GPU3D_OpenGL.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "NDS.h"
|
||||
#include "GPU.h"
|
||||
#include "Config.h"
|
||||
#include "OpenGLSupport.h"
|
||||
#include "GPU3D_OpenGL_shaders.h"
|
||||
|
||||
namespace GPU3D
|
||||
{
|
||||
namespace GLRenderer
|
||||
{
|
||||
|
||||
using namespace OpenGL;
|
||||
|
||||
// GL version requirements
|
||||
// * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS)
|
||||
// * UBO: 3.1
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
RenderFlag_WBuffer = 0x01,
|
||||
RenderFlag_Trans = 0x02,
|
||||
RenderFlag_ShadowMask = 0x04,
|
||||
RenderFlag_Edge = 0x08,
|
||||
};
|
||||
|
||||
|
||||
GLuint ClearShaderPlain[3];
|
||||
|
||||
GLuint RenderShader[16][3];
|
||||
GLuint CurShaderID = -1;
|
||||
|
||||
GLuint FinalPassEdgeShader[3];
|
||||
GLuint FinalPassFogShader[3];
|
||||
|
||||
// std140 compliant structure
|
||||
struct
|
||||
{
|
||||
float uScreenSize[2]; // vec2 0 / 2
|
||||
u32 uDispCnt; // int 2 / 1
|
||||
u32 __pad0;
|
||||
float uToonColors[32][4]; // vec4[32] 4 / 128
|
||||
float uEdgeColors[8][4]; // vec4[8] 132 / 32
|
||||
float uFogColor[4]; // vec4 164 / 4
|
||||
float uFogDensity[34][4]; // float[34] 168 / 136
|
||||
u32 uFogOffset; // int 304 / 1
|
||||
u32 uFogShift; // int 305 / 1
|
||||
u32 _pad1[2]; // int 306 / 2
|
||||
} ShaderConfig;
|
||||
|
||||
GLuint ShaderConfigUBO;
|
||||
|
||||
struct RendererPolygon
|
||||
{
|
||||
Polygon* PolyData;
|
||||
|
||||
u32 NumIndices;
|
||||
u32 IndicesOffset;
|
||||
GLuint PrimType;
|
||||
|
||||
u32 NumEdgeIndices;
|
||||
u32 EdgeIndicesOffset;
|
||||
|
||||
u32 RenderKey;
|
||||
};
|
||||
|
||||
RendererPolygon PolygonList[2048];
|
||||
int NumFinalPolys, NumOpaqueFinalPolys;
|
||||
|
||||
GLuint ClearVertexBufferID, ClearVertexArrayID;
|
||||
GLint ClearUniformLoc[4];
|
||||
|
||||
// vertex buffer
|
||||
// * XYZW: 4x16bit
|
||||
// * RGBA: 4x8bit
|
||||
// * ST: 2x16bit
|
||||
// * polygon data: 3x32bit (polygon/texture attributes)
|
||||
//
|
||||
// polygon attributes:
|
||||
// * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR
|
||||
// * bit16-20: Z shift
|
||||
// * bit8: front-facing (?)
|
||||
// * bit9: W-buffering (?)
|
||||
|
||||
GLuint VertexBufferID;
|
||||
u32 VertexBuffer[10240 * 7];
|
||||
u32 NumVertices;
|
||||
|
||||
GLuint VertexArrayID;
|
||||
GLuint IndexBufferID;
|
||||
u16 IndexBuffer[2048 * 40];
|
||||
u32 NumIndices, NumEdgeIndices;
|
||||
|
||||
const u32 EdgeIndicesOffset = 2048 * 30;
|
||||
|
||||
GLuint TexMemID;
|
||||
GLuint TexPalMemID;
|
||||
|
||||
int ScaleFactor;
|
||||
bool BetterPolygons;
|
||||
int ScreenW, ScreenH;
|
||||
|
||||
GLuint FramebufferTex[8];
|
||||
int FrontBuffer;
|
||||
GLuint FramebufferID[4], PixelbufferID;
|
||||
u32 Framebuffer[256*192];
|
||||
|
||||
|
||||
|
||||
bool BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
||||
bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
||||
{
|
||||
char shadername[32];
|
||||
sprintf(shadername, "RenderShader%02X", flags);
|
||||
|
@ -180,7 +81,7 @@ bool BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
|||
return true;
|
||||
}
|
||||
|
||||
void UseRenderShader(u32 flags)
|
||||
void GLRenderer::UseRenderShader(u32 flags)
|
||||
{
|
||||
if (CurShaderID == flags) return;
|
||||
glUseProgram(RenderShader[flags][2]);
|
||||
|
@ -196,7 +97,12 @@ void SetupDefaultTexParams(GLuint tex)
|
|||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
}
|
||||
|
||||
bool Init()
|
||||
GLRenderer::GLRenderer()
|
||||
: Renderer3D(true)
|
||||
{
|
||||
}
|
||||
|
||||
bool GLRenderer::Init()
|
||||
{
|
||||
GLint uni_id;
|
||||
|
||||
|
@ -382,7 +288,7 @@ bool Init()
|
|||
return true;
|
||||
}
|
||||
|
||||
void DeInit()
|
||||
void GLRenderer::DeInit()
|
||||
{
|
||||
glDeleteTextures(1, &TexMemID);
|
||||
glDeleteTextures(1, &TexPalMemID);
|
||||
|
@ -404,11 +310,11 @@ void DeInit()
|
|||
}
|
||||
}
|
||||
|
||||
void Reset()
|
||||
void GLRenderer::Reset()
|
||||
{
|
||||
}
|
||||
|
||||
void SetRenderSettings(GPU::RenderSettings& settings)
|
||||
void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings)
|
||||
{
|
||||
int scale = settings.GL_ScaleFactor;
|
||||
|
||||
|
@ -462,7 +368,7 @@ void SetRenderSettings(GPU::RenderSettings& settings)
|
|||
}
|
||||
|
||||
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
|
||||
void GLRenderer::SetupPolygon(GLRenderer::RendererPolygon* rp, Polygon* polygon)
|
||||
{
|
||||
rp->PolyData = polygon;
|
||||
|
||||
|
@ -508,7 +414,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
|
|||
}
|
||||
}
|
||||
|
||||
u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr)
|
||||
u32* GLRenderer::SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr)
|
||||
{
|
||||
u32 z = poly->FinalZ[vid];
|
||||
u32 w = poly->FinalW[vid];
|
||||
|
@ -569,7 +475,7 @@ u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr)
|
|||
return vptr;
|
||||
}
|
||||
|
||||
void BuildPolygons(RendererPolygon* polygons, int npolys)
|
||||
void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys)
|
||||
{
|
||||
u32* vptr = &VertexBuffer[0];
|
||||
u32 vidx = 0;
|
||||
|
@ -791,7 +697,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
|
|||
NumEdgeIndices = eidx - EdgeIndicesOffset;
|
||||
}
|
||||
|
||||
int RenderSinglePolygon(int i)
|
||||
int GLRenderer::RenderSinglePolygon(int i)
|
||||
{
|
||||
RendererPolygon* rp = &PolygonList[i];
|
||||
|
||||
|
@ -800,7 +706,7 @@ int RenderSinglePolygon(int i)
|
|||
return 1;
|
||||
}
|
||||
|
||||
int RenderPolygonBatch(int i)
|
||||
int GLRenderer::RenderPolygonBatch(int i)
|
||||
{
|
||||
RendererPolygon* rp = &PolygonList[i];
|
||||
GLuint primtype = rp->PrimType;
|
||||
|
@ -822,7 +728,7 @@ int RenderPolygonBatch(int i)
|
|||
return numpolys;
|
||||
}
|
||||
|
||||
int RenderPolygonEdgeBatch(int i)
|
||||
int GLRenderer::RenderPolygonEdgeBatch(int i)
|
||||
{
|
||||
RendererPolygon* rp = &PolygonList[i];
|
||||
u32 key = rp->RenderKey;
|
||||
|
@ -842,7 +748,7 @@ int RenderPolygonEdgeBatch(int i)
|
|||
return numpolys;
|
||||
}
|
||||
|
||||
void RenderSceneChunk(int y, int h)
|
||||
void GLRenderer::RenderSceneChunk(int y, int h)
|
||||
{
|
||||
u32 flags = 0;
|
||||
if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer;
|
||||
|
@ -1206,7 +1112,7 @@ void RenderSceneChunk(int y, int h)
|
|||
}
|
||||
|
||||
|
||||
void RenderFrame()
|
||||
void GLRenderer::RenderFrame()
|
||||
{
|
||||
CurShaderID = -1;
|
||||
|
||||
|
@ -1381,7 +1287,7 @@ void RenderFrame()
|
|||
FrontBuffer = FrontBuffer ? 0 : 1;
|
||||
}
|
||||
|
||||
void PrepareCaptureFrame()
|
||||
void GLRenderer::PrepareCaptureFrame()
|
||||
{
|
||||
// TODO: make sure this picks the right buffer when doing antialiasing
|
||||
int original_fb = FrontBuffer^1;
|
||||
|
@ -1396,7 +1302,7 @@ void PrepareCaptureFrame()
|
|||
glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
|
||||
}
|
||||
|
||||
u32* GetLine(int line)
|
||||
u32* GLRenderer::GetLine(int line)
|
||||
{
|
||||
int stride = 256;
|
||||
|
||||
|
@ -1419,10 +1325,9 @@ u32* GetLine(int line)
|
|||
return &Framebuffer[stride * line];
|
||||
}
|
||||
|
||||
void SetupAccelFrame()
|
||||
void GLRenderer::SetupAccelFrame()
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
Copyright 2016-2020 Arisotura
|
||||
|
||||
This file is part of melonDS.
|
||||
|
||||
melonDS is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation, either version 3 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GPU3D.h"
|
||||
|
||||
#include "OpenGLSupport.h"
|
||||
|
||||
|
||||
namespace GPU3D
|
||||
{
|
||||
class GLRenderer : public Renderer3D
|
||||
{
|
||||
public:
|
||||
GLRenderer();
|
||||
virtual ~GLRenderer() override {};
|
||||
virtual bool Init() override;
|
||||
virtual void DeInit() override;
|
||||
virtual void Reset() override;
|
||||
|
||||
virtual void SetRenderSettings(GPU::RenderSettings& settings) override;
|
||||
|
||||
virtual void VCount144() override {};
|
||||
virtual void RenderFrame() override;
|
||||
virtual u32* GetLine(int line) override;
|
||||
|
||||
void SetupAccelFrame();
|
||||
void PrepareCaptureFrame();
|
||||
private:
|
||||
|
||||
// GL version requirements
|
||||
// * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS)
|
||||
// * UBO: 3.1
|
||||
|
||||
struct RendererPolygon
|
||||
{
|
||||
Polygon* PolyData;
|
||||
|
||||
u32 NumIndices;
|
||||
u32 IndicesOffset;
|
||||
GLuint PrimType;
|
||||
|
||||
u32 NumEdgeIndices;
|
||||
u32 EdgeIndicesOffset;
|
||||
|
||||
u32 RenderKey;
|
||||
};
|
||||
|
||||
RendererPolygon PolygonList[2048];
|
||||
|
||||
bool BuildRenderShader(u32 flags, const char* vs, const char* fs);
|
||||
void UseRenderShader(u32 flags);
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
|
||||
u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr);
|
||||
void BuildPolygons(RendererPolygon* polygons, int npolys);
|
||||
int RenderSinglePolygon(int i);
|
||||
int RenderPolygonBatch(int i);
|
||||
int RenderPolygonEdgeBatch(int i);
|
||||
void RenderSceneChunk(int y, int h);
|
||||
|
||||
enum
|
||||
{
|
||||
RenderFlag_WBuffer = 0x01,
|
||||
RenderFlag_Trans = 0x02,
|
||||
RenderFlag_ShadowMask = 0x04,
|
||||
RenderFlag_Edge = 0x08,
|
||||
};
|
||||
|
||||
|
||||
GLuint ClearShaderPlain[3];
|
||||
|
||||
GLuint RenderShader[16][3];
|
||||
GLuint CurShaderID = -1;
|
||||
|
||||
GLuint FinalPassEdgeShader[3];
|
||||
GLuint FinalPassFogShader[3];
|
||||
|
||||
// std140 compliant structure
|
||||
struct
|
||||
{
|
||||
float uScreenSize[2]; // vec2 0 / 2
|
||||
u32 uDispCnt; // int 2 / 1
|
||||
u32 __pad0;
|
||||
float uToonColors[32][4]; // vec4[32] 4 / 128
|
||||
float uEdgeColors[8][4]; // vec4[8] 132 / 32
|
||||
float uFogColor[4]; // vec4 164 / 4
|
||||
float uFogDensity[34][4]; // float[34] 168 / 136
|
||||
u32 uFogOffset; // int 304 / 1
|
||||
u32 uFogShift; // int 305 / 1
|
||||
u32 _pad1[2]; // int 306 / 2
|
||||
} ShaderConfig;
|
||||
|
||||
GLuint ShaderConfigUBO;
|
||||
int NumFinalPolys, NumOpaqueFinalPolys;
|
||||
|
||||
GLuint ClearVertexBufferID, ClearVertexArrayID;
|
||||
GLint ClearUniformLoc[4];
|
||||
|
||||
// vertex buffer
|
||||
// * XYZW: 4x16bit
|
||||
// * RGBA: 4x8bit
|
||||
// * ST: 2x16bit
|
||||
// * polygon data: 3x32bit (polygon/texture attributes)
|
||||
//
|
||||
// polygon attributes:
|
||||
// * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR
|
||||
// * bit16-20: Z shift
|
||||
// * bit8: front-facing (?)
|
||||
// * bit9: W-buffering (?)
|
||||
|
||||
GLuint VertexBufferID;
|
||||
u32 VertexBuffer[10240 * 7];
|
||||
u32 NumVertices;
|
||||
|
||||
GLuint VertexArrayID;
|
||||
GLuint IndexBufferID;
|
||||
u16 IndexBuffer[2048 * 40];
|
||||
u32 NumIndices, NumEdgeIndices;
|
||||
|
||||
const u32 EdgeIndicesOffset = 2048 * 30;
|
||||
|
||||
GLuint TexMemID;
|
||||
GLuint TexPalMemID;
|
||||
|
||||
int ScaleFactor;
|
||||
bool BetterPolygons;
|
||||
int ScreenW, ScreenH;
|
||||
|
||||
GLuint FramebufferTex[8];
|
||||
int FrontBuffer;
|
||||
GLuint FramebufferID[4], PixelbufferID;
|
||||
u32 Framebuffer[256*192];
|
||||
|
||||
|
||||
};
|
||||
}
|
|
@ -16,82 +16,43 @@
|
|||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#include "GPU3D_Soft.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "NDS.h"
|
||||
#include "GPU.h"
|
||||
#include "Config.h"
|
||||
#include "Platform.h"
|
||||
|
||||
|
||||
namespace GPU3D
|
||||
{
|
||||
namespace SoftRenderer
|
||||
{
|
||||
|
||||
// buffer dimensions are 258x194 to add a offscreen 1px border
|
||||
// which simplifies edge marking tests
|
||||
// buffer is duplicated to keep track of the two topmost pixels
|
||||
// TODO: check if the hardware can accidentally plot pixels
|
||||
// offscreen in that border
|
||||
|
||||
const int ScanlineWidth = 258;
|
||||
const int NumScanlines = 194;
|
||||
const int BufferSize = ScanlineWidth * NumScanlines;
|
||||
const int FirstPixelOffset = ScanlineWidth + 1;
|
||||
|
||||
u32 ColorBuffer[BufferSize * 2];
|
||||
u32 DepthBuffer[BufferSize * 2];
|
||||
u32 AttrBuffer[BufferSize * 2];
|
||||
|
||||
// attribute buffer:
|
||||
// bit0-3: edge flags (left/right/top/bottom)
|
||||
// bit4: backfacing flag
|
||||
// bit8-12: antialiasing alpha
|
||||
// bit15: fog enable
|
||||
// bit16-21: polygon ID for translucent pixels
|
||||
// bit22: translucent flag
|
||||
// bit24-29: polygon ID for opaque pixels
|
||||
|
||||
u8 StencilBuffer[256*2];
|
||||
bool PrevIsShadowMask;
|
||||
|
||||
bool Enabled;
|
||||
|
||||
bool FrameIdentical;
|
||||
|
||||
// threading
|
||||
|
||||
bool Threaded;
|
||||
Platform::Thread* RenderThread;
|
||||
bool RenderThreadRunning;
|
||||
bool RenderThreadRendering;
|
||||
Platform::Semaphore* Sema_RenderStart;
|
||||
Platform::Semaphore* Sema_RenderDone;
|
||||
Platform::Semaphore* Sema_ScanlineCount;
|
||||
|
||||
void RenderThreadFunc();
|
||||
|
||||
|
||||
void StopRenderThread()
|
||||
void SoftRenderer::StopRenderThread()
|
||||
{
|
||||
if (RenderThreadRunning)
|
||||
{
|
||||
RenderThreadRunning = false;
|
||||
Platform::Semaphore_Post(Sema_RenderStart);
|
||||
Platform::Thread_Wait(RenderThread);
|
||||
Platform::Thread_Free(RenderThread);
|
||||
// Platform::Thread_Wait(RenderThread);
|
||||
// Platform::Thread_Free(RenderThread);
|
||||
RenderThread.join();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void SetupRenderThread()
|
||||
void SoftRenderer::SetupRenderThread()
|
||||
{
|
||||
if (Threaded)
|
||||
{
|
||||
if (!RenderThreadRunning)
|
||||
{
|
||||
RenderThreadRunning = true;
|
||||
RenderThread = Platform::Thread_Create(RenderThreadFunc);
|
||||
//RenderThread = Platform::Thread_Create(RenderThreadFunc);
|
||||
RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this);
|
||||
}
|
||||
|
||||
// otherwise more than one frame can be queued up at once
|
||||
|
@ -113,7 +74,13 @@ void SetupRenderThread()
|
|||
}
|
||||
|
||||
|
||||
bool Init()
|
||||
SoftRenderer::SoftRenderer()
|
||||
: Renderer3D(false)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool SoftRenderer::Init()
|
||||
{
|
||||
Sema_RenderStart = Platform::Semaphore_Create();
|
||||
Sema_RenderDone = Platform::Semaphore_Create();
|
||||
|
@ -126,7 +93,7 @@ bool Init()
|
|||
return true;
|
||||
}
|
||||
|
||||
void DeInit()
|
||||
void SoftRenderer::DeInit()
|
||||
{
|
||||
StopRenderThread();
|
||||
|
||||
|
@ -135,7 +102,7 @@ void DeInit()
|
|||
Platform::Semaphore_Free(Sema_ScanlineCount);
|
||||
}
|
||||
|
||||
void Reset()
|
||||
void SoftRenderer::Reset()
|
||||
{
|
||||
memset(ColorBuffer, 0, BufferSize * 2 * 4);
|
||||
memset(DepthBuffer, 0, BufferSize * 2 * 4);
|
||||
|
@ -146,428 +113,13 @@ void Reset()
|
|||
SetupRenderThread();
|
||||
}
|
||||
|
||||
void SetRenderSettings(GPU::RenderSettings& settings)
|
||||
void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings)
|
||||
{
|
||||
Threaded = settings.Soft_Threaded;
|
||||
SetupRenderThread();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Notes on the interpolator:
|
||||
//
|
||||
// This is a theory on how the DS hardware interpolates values. It matches hardware output
|
||||
// in the tests I did, but the hardware may be doing it differently. You never know.
|
||||
//
|
||||
// Assuming you want to perspective-correctly interpolate a variable named A across two points
|
||||
// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly,
|
||||
// then divide A/W by 1/W to recover the correct A value.
|
||||
//
|
||||
// The DS GPU approximates interpolation by calculating a perspective-correct interpolation
|
||||
// between 0 and 1, then using the result as a factor to linearly interpolate the actual
|
||||
// vertex attributes. The factor has 9 bits of precision when interpolating along Y and
|
||||
// 8 bits along X.
|
||||
//
|
||||
// There's a special path for when the two W values are equal: it directly does linear
|
||||
// interpolation, avoiding precision loss from the aforementioned approximation.
|
||||
// Which is desirable when using the GPU to draw 2D graphics.
|
||||
|
||||
template<int dir>
|
||||
class Interpolator
|
||||
{
|
||||
public:
|
||||
Interpolator() {}
|
||||
Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
{
|
||||
Setup(x0, x1, w0, w1);
|
||||
}
|
||||
|
||||
void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->x1 = x1;
|
||||
this->xdiff = x1 - x0;
|
||||
|
||||
// calculate reciprocals for linear mode and Z interpolation
|
||||
// TODO eventually: use a faster reciprocal function?
|
||||
if (this->xdiff != 0)
|
||||
this->xrecip = (1<<30) / this->xdiff;
|
||||
else
|
||||
this->xrecip = 0;
|
||||
this->xrecip_z = this->xrecip >> 8;
|
||||
|
||||
// linear mode is used if both W values are equal and have
|
||||
// low-order bits cleared (0-6 along X, 1-6 along Y)
|
||||
u32 mask = dir ? 0x7E : 0x7F;
|
||||
if ((w0 == w1) && !(w0 & mask) && !(w1 & mask))
|
||||
this->linear = true;
|
||||
else
|
||||
this->linear = false;
|
||||
|
||||
if (dir)
|
||||
{
|
||||
// along Y
|
||||
|
||||
if ((w0 & 0x1) && !(w1 & 0x1))
|
||||
{
|
||||
this->w0n = w0 - 1;
|
||||
this->w0d = w0 + 1;
|
||||
this->w1d = w1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->w0n = w0 & 0xFFFE;
|
||||
this->w0d = w0 & 0xFFFE;
|
||||
this->w1d = w1 & 0xFFFE;
|
||||
}
|
||||
|
||||
this->shift = 9;
|
||||
}
|
||||
else
|
||||
{
|
||||
// along X
|
||||
|
||||
this->w0n = w0;
|
||||
this->w0d = w0;
|
||||
this->w1d = w1;
|
||||
|
||||
this->shift = 8;
|
||||
}
|
||||
}
|
||||
|
||||
void SetX(s32 x)
|
||||
{
|
||||
x -= x0;
|
||||
this->x = x;
|
||||
if (xdiff != 0 && !linear)
|
||||
{
|
||||
s64 num = ((s64)x * w0n) << shift;
|
||||
s32 den = (x * w0d) + ((xdiff-x) * w1d);
|
||||
|
||||
// this seems to be a proper division on hardware :/
|
||||
// I haven't been able to find cases that produce imperfect output
|
||||
if (den == 0) yfactor = 0;
|
||||
else yfactor = (s32)(num / den);
|
||||
}
|
||||
}
|
||||
|
||||
s32 Interpolate(s32 y0, s32 y1)
|
||||
{
|
||||
if (xdiff == 0 || y0 == y1) return y0;
|
||||
|
||||
if (!linear)
|
||||
{
|
||||
// perspective-correct approx. interpolation
|
||||
if (y0 < y1)
|
||||
return y0 + (((y1-y0) * yfactor) >> shift);
|
||||
else
|
||||
return y1 + (((y0-y1) * ((1<<shift)-yfactor)) >> shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
// linear interpolation
|
||||
// checkme: the rounding bias there (3<<24) is a guess
|
||||
if (y0 < y1)
|
||||
return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30);
|
||||
else
|
||||
return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30);
|
||||
}
|
||||
}
|
||||
|
||||
s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer)
|
||||
{
|
||||
if (xdiff == 0 || z0 == z1) return z0;
|
||||
|
||||
if (wbuffer)
|
||||
{
|
||||
// W-buffering: perspective-correct approx. interpolation
|
||||
if (z0 < z1)
|
||||
return z0 + (((s64)(z1-z0) * yfactor) >> shift);
|
||||
else
|
||||
return z1 + (((s64)(z0-z1) * ((1<<shift)-yfactor)) >> shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Z-buffering: linear interpolation
|
||||
// still doesn't quite match hardware...
|
||||
s32 base, disp, factor;
|
||||
|
||||
if (z0 < z1)
|
||||
{
|
||||
base = z0;
|
||||
disp = z1 - z0;
|
||||
factor = x;
|
||||
}
|
||||
else
|
||||
{
|
||||
base = z1;
|
||||
disp = z0 - z1,
|
||||
factor = xdiff - x;
|
||||
}
|
||||
|
||||
if (dir)
|
||||
{
|
||||
int shift = 0;
|
||||
while (disp > 0x3FF)
|
||||
{
|
||||
disp >>= 1;
|
||||
shift++;
|
||||
}
|
||||
|
||||
return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
disp >>= 9;
|
||||
return base + (((s64)disp * factor * xrecip_z) >> 13);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
s32 x0, x1, xdiff, x;
|
||||
|
||||
int shift;
|
||||
bool linear;
|
||||
|
||||
s32 xrecip, xrecip_z;
|
||||
s32 w0n, w0d, w1d;
|
||||
|
||||
u32 yfactor;
|
||||
};
|
||||
|
||||
|
||||
template<int side>
|
||||
class Slope
|
||||
{
|
||||
public:
|
||||
Slope() {}
|
||||
|
||||
s32 SetupDummy(s32 x0)
|
||||
{
|
||||
if (side)
|
||||
{
|
||||
dx = -0x40000;
|
||||
x0--;
|
||||
}
|
||||
else
|
||||
{
|
||||
dx = 0;
|
||||
}
|
||||
|
||||
this->x0 = x0;
|
||||
this->xmin = x0;
|
||||
this->xmax = x0;
|
||||
|
||||
Increment = 0;
|
||||
XMajor = false;
|
||||
|
||||
Interp.Setup(0, 0, 0, 0);
|
||||
Interp.SetX(0);
|
||||
|
||||
xcov_incr = 0;
|
||||
|
||||
return x0;
|
||||
}
|
||||
|
||||
s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->y = y;
|
||||
|
||||
if (x1 > x0)
|
||||
{
|
||||
this->xmin = x0;
|
||||
this->xmax = x1-1;
|
||||
this->Negative = false;
|
||||
}
|
||||
else if (x1 < x0)
|
||||
{
|
||||
this->xmin = x1;
|
||||
this->xmax = x0-1;
|
||||
this->Negative = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->xmin = x0;
|
||||
if (side) this->xmin--;
|
||||
this->xmax = this->xmin;
|
||||
this->Negative = false;
|
||||
}
|
||||
|
||||
xlen = xmax+1 - xmin;
|
||||
ylen = y1 - y0;
|
||||
|
||||
// slope increment has a 18-bit fractional part
|
||||
// note: for some reason, x/y isn't calculated directly,
|
||||
// instead, 1/y is calculated and then multiplied by x
|
||||
// TODO: this is still not perfect (see for example x=169 y=33)
|
||||
if (ylen == 0)
|
||||
Increment = 0;
|
||||
else if (ylen == xlen)
|
||||
Increment = 0x40000;
|
||||
else
|
||||
{
|
||||
s32 yrecip = (1<<18) / ylen;
|
||||
Increment = (x1-x0) * yrecip;
|
||||
if (Increment < 0) Increment = -Increment;
|
||||
}
|
||||
|
||||
XMajor = (Increment > 0x40000);
|
||||
|
||||
if (side)
|
||||
{
|
||||
// right
|
||||
|
||||
if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000);
|
||||
else if (Increment != 0) dx = Negative ? 0x40000 : 0;
|
||||
else dx = -0x40000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// left
|
||||
|
||||
if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000;
|
||||
else if (Increment != 0) dx = Negative ? 0x40000 : 0;
|
||||
else dx = 0;
|
||||
}
|
||||
|
||||
dx += (y - y0) * Increment;
|
||||
|
||||
s32 x = XVal();
|
||||
|
||||
if (XMajor)
|
||||
{
|
||||
if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme
|
||||
else Interp.Setup(x0, x1, w0, w1);
|
||||
Interp.SetX(x);
|
||||
|
||||
// used for calculating AA coverage
|
||||
xcov_incr = (ylen << 10) / xlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
Interp.Setup(y0, y1, w0, w1);
|
||||
Interp.SetX(y);
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
s32 Step()
|
||||
{
|
||||
dx += Increment;
|
||||
y++;
|
||||
|
||||
s32 x = XVal();
|
||||
if (XMajor)
|
||||
{
|
||||
Interp.SetX(x);
|
||||
}
|
||||
else
|
||||
{
|
||||
Interp.SetX(y);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
s32 XVal()
|
||||
{
|
||||
s32 ret;
|
||||
if (Negative) ret = x0 - (dx >> 18);
|
||||
else ret = x0 + (dx >> 18);
|
||||
|
||||
if (ret < xmin) ret = xmin;
|
||||
else if (ret > xmax) ret = xmax;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void EdgeParams_XMajor(s32* length, s32* coverage)
|
||||
{
|
||||
if (side ^ Negative)
|
||||
*length = (dx >> 18) - ((dx-Increment) >> 18);
|
||||
else
|
||||
*length = ((dx+Increment) >> 18) - (dx >> 18);
|
||||
|
||||
// for X-major edges, we return the coverage
|
||||
// for the first pixel, and the increment for
|
||||
// further pixels on the same scanline
|
||||
s32 startx = dx >> 18;
|
||||
if (Negative) startx = xlen - startx;
|
||||
if (side) startx = startx - *length + 1;
|
||||
|
||||
s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen;
|
||||
*coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF);
|
||||
}
|
||||
|
||||
void EdgeParams_YMajor(s32* length, s32* coverage)
|
||||
{
|
||||
*length = 1;
|
||||
|
||||
if (Increment == 0)
|
||||
{
|
||||
*coverage = 31;
|
||||
}
|
||||
else
|
||||
{
|
||||
s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4;
|
||||
if ((cov >> 5) != (dx >> 18)) cov = 31;
|
||||
cov &= 0x1F;
|
||||
if (!(side ^ Negative)) cov = 0x1F - cov;
|
||||
|
||||
*coverage = cov;
|
||||
}
|
||||
}
|
||||
|
||||
void EdgeParams(s32* length, s32* coverage)
|
||||
{
|
||||
if (XMajor)
|
||||
return EdgeParams_XMajor(length, coverage);
|
||||
else
|
||||
return EdgeParams_YMajor(length, coverage);
|
||||
}
|
||||
|
||||
s32 Increment;
|
||||
bool Negative;
|
||||
bool XMajor;
|
||||
Interpolator<1> Interp;
|
||||
|
||||
private:
|
||||
s32 x0, xmin, xmax;
|
||||
s32 xlen, ylen;
|
||||
s32 dx;
|
||||
s32 y;
|
||||
|
||||
s32 xcov_incr;
|
||||
s32 ycoverage, ycov_incr;
|
||||
};
|
||||
|
||||
struct RendererPolygon
|
||||
{
|
||||
Polygon* PolyData;
|
||||
|
||||
Slope<0> SlopeL;
|
||||
Slope<1> SlopeR;
|
||||
s32 XL, XR;
|
||||
u32 CurVL, CurVR;
|
||||
u32 NextVL, NextVR;
|
||||
|
||||
};
|
||||
|
||||
RendererPolygon PolygonList[2048];
|
||||
|
||||
template <typename T>
|
||||
inline T ReadVRAM_Texture(u32 addr)
|
||||
{
|
||||
return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
|
||||
}
|
||||
template <typename T>
|
||||
inline T ReadVRAM_TexPal(u32 addr)
|
||||
{
|
||||
return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
|
||||
}
|
||||
|
||||
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
|
||||
void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
|
||||
{
|
||||
u32 vramaddr = (texparam & 0xFFFF) << 3;
|
||||
|
||||
|
@ -873,7 +425,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha)
|
|||
return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24);
|
||||
}
|
||||
|
||||
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
|
||||
u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
|
||||
{
|
||||
u8 r, g, b, a;
|
||||
|
||||
|
@ -981,7 +533,7 @@ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
|
|||
return r | (g << 8) | (b << 16) | (a << 24);
|
||||
}
|
||||
|
||||
void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow)
|
||||
void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow)
|
||||
{
|
||||
u32 dstattr = AttrBuffer[pixeladdr];
|
||||
u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F);
|
||||
|
@ -1020,7 +572,7 @@ void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 sha
|
|||
AttrBuffer[pixeladdr] = attr;
|
||||
}
|
||||
|
||||
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y)
|
||||
void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y)
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
||||
|
@ -1047,7 +599,7 @@ void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y)
|
|||
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
||||
}
|
||||
|
||||
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y)
|
||||
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y)
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
||||
|
@ -1074,7 +626,7 @@ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y)
|
|||
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
||||
}
|
||||
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
|
||||
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon)
|
||||
{
|
||||
u32 nverts = polygon->NumVertices;
|
||||
|
||||
|
@ -1127,7 +679,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
|
|||
}
|
||||
}
|
||||
|
||||
void RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
|
||||
void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
||||
|
@ -1340,7 +892,7 @@ void RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
|
|||
rp->XR = rp->SlopeR.Step();
|
||||
}
|
||||
|
||||
void RenderPolygonScanline(RendererPolygon* rp, s32 y)
|
||||
void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
||||
|
@ -1755,7 +1307,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
|
|||
rp->XR = rp->SlopeR.Step();
|
||||
}
|
||||
|
||||
void RenderScanline(s32 y, int npolys)
|
||||
void SoftRenderer::RenderScanline(s32 y, int npolys)
|
||||
{
|
||||
for (int i = 0; i < npolys; i++)
|
||||
{
|
||||
|
@ -1772,8 +1324,7 @@ void RenderScanline(s32 y, int npolys)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
u32 CalculateFogDensity(u32 pixeladdr)
|
||||
u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr)
|
||||
{
|
||||
u32 z = DepthBuffer[pixeladdr];
|
||||
u32 densityid, densityfrac;
|
||||
|
@ -1812,7 +1363,7 @@ u32 CalculateFogDensity(u32 pixeladdr)
|
|||
return density;
|
||||
}
|
||||
|
||||
void ScanlineFinalPass(s32 y)
|
||||
void SoftRenderer::ScanlineFinalPass(s32 y)
|
||||
{
|
||||
// to consider:
|
||||
// clearing all polygon fog flags if the master flag isn't set?
|
||||
|
@ -1981,7 +1532,7 @@ void ScanlineFinalPass(s32 y)
|
|||
}
|
||||
}
|
||||
|
||||
void ClearBuffers()
|
||||
void SoftRenderer::ClearBuffers()
|
||||
{
|
||||
u32 clearz = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
|
||||
u32 polyid = RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID
|
||||
|
@ -2055,7 +1606,7 @@ void ClearBuffers()
|
|||
u32 a = (RenderClearAttr1 >> 16) & 0x1F;
|
||||
u32 color = r | (g << 8) | (b << 16) | (a << 24);
|
||||
|
||||
polyid |= (RenderClearAttr1 & 0x8000);
|
||||
polyid |= (RenderClearAttr1 & 0x8000);
|
||||
|
||||
for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth)
|
||||
{
|
||||
|
@ -2070,7 +1621,7 @@ void ClearBuffers()
|
|||
}
|
||||
}
|
||||
|
||||
void RenderPolygons(bool threaded, Polygon** polygons, int npolys)
|
||||
void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
|
||||
{
|
||||
int j = 0;
|
||||
for (int i = 0; i < npolys; i++)
|
||||
|
@ -2096,13 +1647,13 @@ void RenderPolygons(bool threaded, Polygon** polygons, int npolys)
|
|||
Platform::Semaphore_Post(Sema_ScanlineCount);
|
||||
}
|
||||
|
||||
void VCount144()
|
||||
void SoftRenderer::VCount144()
|
||||
{
|
||||
if (RenderThreadRunning)
|
||||
Platform::Semaphore_Wait(Sema_RenderDone);
|
||||
}
|
||||
|
||||
void RenderFrame()
|
||||
void SoftRenderer::RenderFrame()
|
||||
{
|
||||
auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
|
||||
auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
|
||||
|
@ -2123,7 +1674,12 @@ void RenderFrame()
|
|||
}
|
||||
}
|
||||
|
||||
void RenderThreadFunc()
|
||||
void SoftRenderer::RestartFrame()
|
||||
{
|
||||
SetupRenderThread();
|
||||
}
|
||||
|
||||
void SoftRenderer::RenderThreadFunc()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
|
@ -2146,7 +1702,7 @@ void RenderThreadFunc()
|
|||
}
|
||||
}
|
||||
|
||||
u32* GetLine(int line)
|
||||
u32* SoftRenderer::GetLine(int line)
|
||||
{
|
||||
if (RenderThreadRunning)
|
||||
{
|
||||
|
@ -2158,4 +1714,3 @@ u32* GetLine(int line)
|
|||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,516 @@
|
|||
/*
|
||||
Copyright 2016-2020 Arisotura
|
||||
|
||||
This file is part of melonDS.
|
||||
|
||||
melonDS is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation, either version 3 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GPU3D.h"
|
||||
#include "Platform.h"
|
||||
#include <thread>
|
||||
|
||||
namespace GPU3D
|
||||
{
|
||||
class SoftRenderer : public Renderer3D
|
||||
{
|
||||
public:
|
||||
SoftRenderer();
|
||||
virtual ~SoftRenderer() override {};
|
||||
virtual bool Init() override;
|
||||
virtual void DeInit() override;
|
||||
virtual void Reset() override;
|
||||
|
||||
virtual void SetRenderSettings(GPU::RenderSettings& settings) override;
|
||||
|
||||
virtual void VCount144() override;
|
||||
virtual void RenderFrame() override;
|
||||
virtual void RestartFrame() override;
|
||||
virtual u32* GetLine(int line) override;
|
||||
|
||||
void SetupRenderThread();
|
||||
void StopRenderThread();
|
||||
private:
|
||||
// Notes on the interpolator:
|
||||
//
|
||||
// This is a theory on how the DS hardware interpolates values. It matches hardware output
|
||||
// in the tests I did, but the hardware may be doing it differently. You never know.
|
||||
//
|
||||
// Assuming you want to perspective-correctly interpolate a variable named A across two points
|
||||
// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly,
|
||||
// then divide A/W by 1/W to recover the correct A value.
|
||||
//
|
||||
// The DS GPU approximates interpolation by calculating a perspective-correct interpolation
|
||||
// between 0 and 1, then using the result as a factor to linearly interpolate the actual
|
||||
// vertex attributes. The factor has 9 bits of precision when interpolating along Y and
|
||||
// 8 bits along X.
|
||||
//
|
||||
// There's a special path for when the two W values are equal: it directly does linear
|
||||
// interpolation, avoiding precision loss from the aforementioned approximation.
|
||||
// Which is desirable when using the GPU to draw 2D graphics.
|
||||
|
||||
template<int dir>
|
||||
class Interpolator
|
||||
{
|
||||
public:
|
||||
Interpolator() {}
|
||||
Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
{
|
||||
Setup(x0, x1, w0, w1);
|
||||
}
|
||||
|
||||
void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->x1 = x1;
|
||||
this->xdiff = x1 - x0;
|
||||
|
||||
// calculate reciprocals for linear mode and Z interpolation
|
||||
// TODO eventually: use a faster reciprocal function?
|
||||
if (this->xdiff != 0)
|
||||
this->xrecip = (1<<30) / this->xdiff;
|
||||
else
|
||||
this->xrecip = 0;
|
||||
this->xrecip_z = this->xrecip >> 8;
|
||||
|
||||
// linear mode is used if both W values are equal and have
|
||||
// low-order bits cleared (0-6 along X, 1-6 along Y)
|
||||
u32 mask = dir ? 0x7E : 0x7F;
|
||||
if ((w0 == w1) && !(w0 & mask) && !(w1 & mask))
|
||||
this->linear = true;
|
||||
else
|
||||
this->linear = false;
|
||||
|
||||
if (dir)
|
||||
{
|
||||
// along Y
|
||||
|
||||
if ((w0 & 0x1) && !(w1 & 0x1))
|
||||
{
|
||||
this->w0n = w0 - 1;
|
||||
this->w0d = w0 + 1;
|
||||
this->w1d = w1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->w0n = w0 & 0xFFFE;
|
||||
this->w0d = w0 & 0xFFFE;
|
||||
this->w1d = w1 & 0xFFFE;
|
||||
}
|
||||
|
||||
this->shift = 9;
|
||||
}
|
||||
else
|
||||
{
|
||||
// along X
|
||||
|
||||
this->w0n = w0;
|
||||
this->w0d = w0;
|
||||
this->w1d = w1;
|
||||
|
||||
this->shift = 8;
|
||||
}
|
||||
}
|
||||
|
||||
void SetX(s32 x)
|
||||
{
|
||||
x -= x0;
|
||||
this->x = x;
|
||||
if (xdiff != 0 && !linear)
|
||||
{
|
||||
s64 num = ((s64)x * w0n) << shift;
|
||||
s32 den = (x * w0d) + ((xdiff-x) * w1d);
|
||||
|
||||
// this seems to be a proper division on hardware :/
|
||||
// I haven't been able to find cases that produce imperfect output
|
||||
if (den == 0) yfactor = 0;
|
||||
else yfactor = (s32)(num / den);
|
||||
}
|
||||
}
|
||||
|
||||
s32 Interpolate(s32 y0, s32 y1)
|
||||
{
|
||||
if (xdiff == 0 || y0 == y1) return y0;
|
||||
|
||||
if (!linear)
|
||||
{
|
||||
// perspective-correct approx. interpolation
|
||||
if (y0 < y1)
|
||||
return y0 + (((y1-y0) * yfactor) >> shift);
|
||||
else
|
||||
return y1 + (((y0-y1) * ((1<<shift)-yfactor)) >> shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
// linear interpolation
|
||||
// checkme: the rounding bias there (3<<24) is a guess
|
||||
if (y0 < y1)
|
||||
return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30);
|
||||
else
|
||||
return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30);
|
||||
}
|
||||
}
|
||||
|
||||
s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer)
|
||||
{
|
||||
if (xdiff == 0 || z0 == z1) return z0;
|
||||
|
||||
if (wbuffer)
|
||||
{
|
||||
// W-buffering: perspective-correct approx. interpolation
|
||||
if (z0 < z1)
|
||||
return z0 + (((s64)(z1-z0) * yfactor) >> shift);
|
||||
else
|
||||
return z1 + (((s64)(z0-z1) * ((1<<shift)-yfactor)) >> shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Z-buffering: linear interpolation
|
||||
// still doesn't quite match hardware...
|
||||
s32 base, disp, factor;
|
||||
|
||||
if (z0 < z1)
|
||||
{
|
||||
base = z0;
|
||||
disp = z1 - z0;
|
||||
factor = x;
|
||||
}
|
||||
else
|
||||
{
|
||||
base = z1;
|
||||
disp = z0 - z1,
|
||||
factor = xdiff - x;
|
||||
}
|
||||
|
||||
if (dir)
|
||||
{
|
||||
int shift = 0;
|
||||
while (disp > 0x3FF)
|
||||
{
|
||||
disp >>= 1;
|
||||
shift++;
|
||||
}
|
||||
|
||||
return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
disp >>= 9;
|
||||
return base + (((s64)disp * factor * xrecip_z) >> 13);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
s32 x0, x1, xdiff, x;
|
||||
|
||||
int shift;
|
||||
bool linear;
|
||||
|
||||
s32 xrecip, xrecip_z;
|
||||
s32 w0n, w0d, w1d;
|
||||
|
||||
u32 yfactor;
|
||||
};
|
||||
|
||||
|
||||
template<int side>
|
||||
class Slope
|
||||
{
|
||||
public:
|
||||
Slope() {}
|
||||
|
||||
s32 SetupDummy(s32 x0)
|
||||
{
|
||||
if (side)
|
||||
{
|
||||
dx = -0x40000;
|
||||
x0--;
|
||||
}
|
||||
else
|
||||
{
|
||||
dx = 0;
|
||||
}
|
||||
|
||||
this->x0 = x0;
|
||||
this->xmin = x0;
|
||||
this->xmax = x0;
|
||||
|
||||
Increment = 0;
|
||||
XMajor = false;
|
||||
|
||||
Interp.Setup(0, 0, 0, 0);
|
||||
Interp.SetX(0);
|
||||
|
||||
xcov_incr = 0;
|
||||
|
||||
return x0;
|
||||
}
|
||||
|
||||
s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->y = y;
|
||||
|
||||
if (x1 > x0)
|
||||
{
|
||||
this->xmin = x0;
|
||||
this->xmax = x1-1;
|
||||
this->Negative = false;
|
||||
}
|
||||
else if (x1 < x0)
|
||||
{
|
||||
this->xmin = x1;
|
||||
this->xmax = x0-1;
|
||||
this->Negative = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->xmin = x0;
|
||||
if (side) this->xmin--;
|
||||
this->xmax = this->xmin;
|
||||
this->Negative = false;
|
||||
}
|
||||
|
||||
xlen = xmax+1 - xmin;
|
||||
ylen = y1 - y0;
|
||||
|
||||
// slope increment has a 18-bit fractional part
|
||||
// note: for some reason, x/y isn't calculated directly,
|
||||
// instead, 1/y is calculated and then multiplied by x
|
||||
// TODO: this is still not perfect (see for example x=169 y=33)
|
||||
if (ylen == 0)
|
||||
Increment = 0;
|
||||
else if (ylen == xlen)
|
||||
Increment = 0x40000;
|
||||
else
|
||||
{
|
||||
s32 yrecip = (1<<18) / ylen;
|
||||
Increment = (x1-x0) * yrecip;
|
||||
if (Increment < 0) Increment = -Increment;
|
||||
}
|
||||
|
||||
XMajor = (Increment > 0x40000);
|
||||
|
||||
if (side)
|
||||
{
|
||||
// right
|
||||
|
||||
if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000);
|
||||
else if (Increment != 0) dx = Negative ? 0x40000 : 0;
|
||||
else dx = -0x40000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// left
|
||||
|
||||
if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000;
|
||||
else if (Increment != 0) dx = Negative ? 0x40000 : 0;
|
||||
else dx = 0;
|
||||
}
|
||||
|
||||
dx += (y - y0) * Increment;
|
||||
|
||||
s32 x = XVal();
|
||||
|
||||
if (XMajor)
|
||||
{
|
||||
if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme
|
||||
else Interp.Setup(x0, x1, w0, w1);
|
||||
Interp.SetX(x);
|
||||
|
||||
// used for calculating AA coverage
|
||||
xcov_incr = (ylen << 10) / xlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
Interp.Setup(y0, y1, w0, w1);
|
||||
Interp.SetX(y);
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
s32 Step()
|
||||
{
|
||||
dx += Increment;
|
||||
y++;
|
||||
|
||||
s32 x = XVal();
|
||||
if (XMajor)
|
||||
{
|
||||
Interp.SetX(x);
|
||||
}
|
||||
else
|
||||
{
|
||||
Interp.SetX(y);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
s32 XVal()
|
||||
{
|
||||
s32 ret;
|
||||
if (Negative) ret = x0 - (dx >> 18);
|
||||
else ret = x0 + (dx >> 18);
|
||||
|
||||
if (ret < xmin) ret = xmin;
|
||||
else if (ret > xmax) ret = xmax;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void EdgeParams_XMajor(s32* length, s32* coverage)
|
||||
{
|
||||
if (side ^ Negative)
|
||||
*length = (dx >> 18) - ((dx-Increment) >> 18);
|
||||
else
|
||||
*length = ((dx+Increment) >> 18) - (dx >> 18);
|
||||
|
||||
// for X-major edges, we return the coverage
|
||||
// for the first pixel, and the increment for
|
||||
// further pixels on the same scanline
|
||||
s32 startx = dx >> 18;
|
||||
if (Negative) startx = xlen - startx;
|
||||
if (side) startx = startx - *length + 1;
|
||||
|
||||
s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen;
|
||||
*coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF);
|
||||
}
|
||||
|
||||
void EdgeParams_YMajor(s32* length, s32* coverage)
|
||||
{
|
||||
*length = 1;
|
||||
|
||||
if (Increment == 0)
|
||||
{
|
||||
*coverage = 31;
|
||||
}
|
||||
else
|
||||
{
|
||||
s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4;
|
||||
if ((cov >> 5) != (dx >> 18)) cov = 31;
|
||||
cov &= 0x1F;
|
||||
if (!(side ^ Negative)) cov = 0x1F - cov;
|
||||
|
||||
*coverage = cov;
|
||||
}
|
||||
}
|
||||
|
||||
void EdgeParams(s32* length, s32* coverage)
|
||||
{
|
||||
if (XMajor)
|
||||
return EdgeParams_XMajor(length, coverage);
|
||||
else
|
||||
return EdgeParams_YMajor(length, coverage);
|
||||
}
|
||||
|
||||
s32 Increment;
|
||||
bool Negative;
|
||||
bool XMajor;
|
||||
Interpolator<1> Interp;
|
||||
|
||||
private:
|
||||
s32 x0, xmin, xmax;
|
||||
s32 xlen, ylen;
|
||||
s32 dx;
|
||||
s32 y;
|
||||
|
||||
s32 xcov_incr;
|
||||
s32 ycoverage, ycov_incr;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline T ReadVRAM_Texture(u32 addr)
|
||||
{
|
||||
return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
|
||||
}
|
||||
template <typename T>
|
||||
inline T ReadVRAM_TexPal(u32 addr)
|
||||
{
|
||||
return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
|
||||
}
|
||||
|
||||
struct RendererPolygon
|
||||
{
|
||||
Polygon* PolyData;
|
||||
|
||||
Slope<0> SlopeL;
|
||||
Slope<1> SlopeR;
|
||||
s32 XL, XR;
|
||||
u32 CurVL, CurVR;
|
||||
u32 NextVL, NextVR;
|
||||
|
||||
};
|
||||
|
||||
RendererPolygon PolygonList[2048];
|
||||
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
|
||||
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
|
||||
void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
|
||||
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y);
|
||||
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y);
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
|
||||
void RenderShadowMaskScanline(RendererPolygon* rp, s32 y);
|
||||
void RenderPolygonScanline(RendererPolygon* rp, s32 y);
|
||||
void RenderScanline(s32 y, int npolys);
|
||||
u32 CalculateFogDensity(u32 pixeladdr);
|
||||
void ScanlineFinalPass(s32 y);
|
||||
void ClearBuffers();
|
||||
void RenderPolygons(bool threaded, Polygon** polygons, int npolys);
|
||||
|
||||
void RenderThreadFunc();
|
||||
|
||||
// buffer dimensions are 258x194 to add a offscreen 1px border
|
||||
// which simplifies edge marking tests
|
||||
// buffer is duplicated to keep track of the two topmost pixels
|
||||
// TODO: check if the hardware can accidentally plot pixels
|
||||
// offscreen in that border
|
||||
|
||||
static constexpr int ScanlineWidth = 258;
|
||||
static constexpr int NumScanlines = 194;
|
||||
static constexpr int BufferSize = ScanlineWidth * NumScanlines;
|
||||
static constexpr int FirstPixelOffset = ScanlineWidth + 1;
|
||||
|
||||
u32 ColorBuffer[BufferSize * 2];
|
||||
u32 DepthBuffer[BufferSize * 2];
|
||||
u32 AttrBuffer[BufferSize * 2];
|
||||
|
||||
// attribute buffer:
|
||||
// bit0-3: edge flags (left/right/top/bottom)
|
||||
// bit4: backfacing flag
|
||||
// bit8-12: antialiasing alpha
|
||||
// bit15: fog enable
|
||||
// bit16-21: polygon ID for translucent pixels
|
||||
// bit22: translucent flag
|
||||
// bit24-29: polygon ID for opaque pixels
|
||||
|
||||
u8 StencilBuffer[256*2];
|
||||
bool PrevIsShadowMask;
|
||||
|
||||
bool Enabled;
|
||||
|
||||
bool FrameIdentical;
|
||||
|
||||
// threading
|
||||
|
||||
bool Threaded;
|
||||
// Platform::Thread* RenderThread;
|
||||
std::thread RenderThread;
|
||||
bool RenderThreadRunning;
|
||||
bool RenderThreadRendering;
|
||||
Platform::Semaphore* Sema_RenderStart;
|
||||
Platform::Semaphore* Sema_RenderDone;
|
||||
Platform::Semaphore* Sema_ScanlineCount;
|
||||
};
|
||||
}
|
|
@ -16,8 +16,11 @@
|
|||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "GPU_OpenGL.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "NDS.h"
|
||||
#include "GPU.h"
|
||||
#include "Config.h"
|
||||
|
@ -26,34 +29,10 @@
|
|||
|
||||
namespace GPU
|
||||
{
|
||||
namespace GLCompositor
|
||||
{
|
||||
|
||||
using namespace OpenGL;
|
||||
|
||||
int Scale;
|
||||
int ScreenH, ScreenW;
|
||||
|
||||
GLuint CompShader[1][3];
|
||||
GLuint CompScaleLoc[1];
|
||||
GLuint Comp3DXPosLoc[1];
|
||||
|
||||
GLuint CompVertexBufferID;
|
||||
GLuint CompVertexArrayID;
|
||||
|
||||
struct CompVertex
|
||||
{
|
||||
float Position[2];
|
||||
float Texcoord[2];
|
||||
};
|
||||
CompVertex CompVertices[2 * 3*2];
|
||||
|
||||
GLuint CompScreenInputTex;
|
||||
GLuint CompScreenOutputTex[2];
|
||||
GLuint CompScreenOutputFB[2];
|
||||
|
||||
|
||||
bool Init()
|
||||
bool GLCompositor::Init()
|
||||
{
|
||||
if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, CompShader[0], "CompositorShader"))
|
||||
//if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Linear, CompShader[0], "CompositorShader"))
|
||||
|
@ -144,7 +123,7 @@ bool Init()
|
|||
return true;
|
||||
}
|
||||
|
||||
void DeInit()
|
||||
void GLCompositor::DeInit()
|
||||
{
|
||||
glDeleteFramebuffers(2, CompScreenOutputFB);
|
||||
glDeleteTextures(1, &CompScreenInputTex);
|
||||
|
@ -157,12 +136,12 @@ void DeInit()
|
|||
OpenGL::DeleteShaderProgram(CompShader[i]);
|
||||
}
|
||||
|
||||
void Reset()
|
||||
void GLCompositor::Reset()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void SetRenderSettings(RenderSettings& settings)
|
||||
void GLCompositor::SetRenderSettings(RenderSettings& settings)
|
||||
{
|
||||
int scale = settings.GL_ScaleFactor;
|
||||
|
||||
|
@ -188,7 +167,7 @@ void SetRenderSettings(RenderSettings& settings)
|
|||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
}
|
||||
|
||||
void Stop()
|
||||
void GLCompositor::Stop()
|
||||
{
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
|
@ -202,7 +181,7 @@ void Stop()
|
|||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
}
|
||||
|
||||
void RenderFrame()
|
||||
void GLCompositor::RenderFrame()
|
||||
{
|
||||
int frontbuf = GPU::FrontBuffer;
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||
|
@ -236,17 +215,16 @@ void RenderFrame()
|
|||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
GPU3D::GLRenderer::SetupAccelFrame();
|
||||
reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->SetupAccelFrame();
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID);
|
||||
glBindVertexArray(CompVertexArrayID);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 4*3);
|
||||
}
|
||||
|
||||
void BindOutputTexture(int buf)
|
||||
void GLCompositor::BindOutputTexture(int buf)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
Copyright 2016-2020 Arisotura
|
||||
|
||||
This file is part of melonDS.
|
||||
|
||||
melonDS is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation, either version 3 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "OpenGLSupport.h"
|
||||
|
||||
namespace GPU
|
||||
{
|
||||
|
||||
struct RenderSettings;
|
||||
|
||||
class GLCompositor
|
||||
{
|
||||
public:
|
||||
GLCompositor() = default;
|
||||
GLCompositor(const GLCompositor&) = delete;
|
||||
GLCompositor& operator=(const GLCompositor&) = delete;
|
||||
|
||||
bool Init();
|
||||
void DeInit();
|
||||
void Reset();
|
||||
|
||||
void SetRenderSettings(RenderSettings& settings);
|
||||
|
||||
void Stop();
|
||||
void RenderFrame();
|
||||
void BindOutputTexture(int buf);
|
||||
private:
|
||||
|
||||
int Scale;
|
||||
int ScreenH, ScreenW;
|
||||
|
||||
GLuint CompShader[1][3];
|
||||
GLuint CompScaleLoc[1];
|
||||
GLuint Comp3DXPosLoc[1];
|
||||
|
||||
GLuint CompVertexBufferID;
|
||||
GLuint CompVertexArrayID;
|
||||
|
||||
struct CompVertex
|
||||
{
|
||||
float Position[2];
|
||||
float Texcoord[2];
|
||||
};
|
||||
CompVertex CompVertices[2 * 3*2];
|
||||
|
||||
GLuint CompScreenInputTex;
|
||||
GLuint CompScreenOutputTex[2];
|
||||
GLuint CompScreenOutputFB[2];
|
||||
};
|
||||
|
||||
}
|
|
@ -1564,7 +1564,7 @@ void RunTimer(u32 tid, s32 cycles)
|
|||
|
||||
void RunTimers(u32 cpu)
|
||||
{
|
||||
register u32 timermask = TimerCheckMask[cpu];
|
||||
u32 timermask = TimerCheckMask[cpu];
|
||||
s32 cycles;
|
||||
|
||||
if (cpu == 0)
|
||||
|
|
|
@ -1013,7 +1013,7 @@ void ScreenPanelGL::paintGL()
|
|||
if (GPU::Renderer != 0)
|
||||
{
|
||||
// hardware-accelerated render
|
||||
GPU::GLCompositor::BindOutputTexture(frontbuf);
|
||||
GPU::CurGLCompositor->BindOutputTexture(frontbuf);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@ -2536,9 +2536,15 @@ int main(int argc, char** argv)
|
|||
|
||||
Config::Load();
|
||||
|
||||
#define SANITIZE(var, min, max) { if (var < min) var = min; else if (var > max) var = max; }
|
||||
#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); }
|
||||
SANITIZE(Config::ConsoleType, 0, 1);
|
||||
SANITIZE(Config::_3DRenderer, 0, 1);
|
||||
SANITIZE(Config::_3DRenderer,
|
||||
0,
|
||||
0 // Minimum, Software renderer
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
+ 1 // OpenGL Renderer
|
||||
#endif
|
||||
);
|
||||
SANITIZE(Config::ScreenVSyncInterval, 1, 20);
|
||||
SANITIZE(Config::GL_ScaleFactor, 1, 16);
|
||||
SANITIZE(Config::AudioVolume, 0, 256);
|
||||
|
|
Loading…
Reference in New Issue