mgba/src/platform/3ds/ctr-gpu.c

420 lines
11 KiB
C

/* Copyright (c) 2015 Yuri Kunde Schlesner
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <3ds.h>
#include <3ds/gpu/gpu.h>
#include <3ds/gpu/gx.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "ctr-gpu.h"
#include "uishader.h"
#include "uishader.shbin.h"
struct ctrUIVertex {
s16 x,y;
s16 u,v;
u32 abgr;
};
#define VRAM_BASE 0x18000000u
#define MAX_NUM_QUADS 1024
#define COMMAND_LIST_LENGTH (16 * 1024)
// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
static struct ctrUIVertex* ctrVertexBuffer = NULL;
static u16* ctrIndexBuffer = NULL;
static u16 ctrNumQuads = 0;
static void* gpuColorBuffer[2] = { NULL, NULL };
static u32* gpuCommandList = NULL;
static void* screenTexture = NULL;
static shaderProgram_s gpuShader;
static DVLB_s* passthroughShader = NULL;
static int pendingEvents = 0;
static const struct ctrTexture* activeTexture = NULL;
void ctrClearPending(int events) {
int toClear = events & pendingEvents;
if (toClear & (1 << GSPGPU_EVENT_PSC0)) {
gspWaitForPSC0();
}
if (toClear & (1 << GSPGPU_EVENT_PPF)) {
gspWaitForPPF();
}
pendingEvents ^= toClear;
}
// Replacements for the limiting GPU_SetViewport function in ctrulib
static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
u32 buf[4];
// Unknown
GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 0x00000001);
GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 0x00000001);
// Set depth/color buffer address and dimensions
buf[0] = depthBuffer >> 3;
buf[1] = colorBuffer >> 3;
buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
GPUCMD_AddWrite(GPUREG_RENDERBUF_DIM, buf[2]);
// Set depth/color buffer pixel format
GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_BLOCK32, 0); // Unknown
// Enable color/depth buffers
buf[0] = colorBuffer != 0 ? 0xF : 0x0;
buf[1] = buf[0];
buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
buf[3] = buf[2];
GPUCMD_AddIncrementalWrites(GPUREG_COLORBUFFER_READ, buf, 4);
}
static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
u32 buf[4];
buf[0] = f32tof24(w / 2.0f);
buf[1] = f32tof31(2.0f / w) << 1;
buf[2] = f32tof24(h / 2.0f);
buf[3] = f32tof31(2.0f / h) << 1;
GPUCMD_AddIncrementalWrites(GPUREG_VIEWPORT_WIDTH, buf, 4);
GPUCMD_AddWrite(GPUREG_VIEWPORT_XY, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
buf[0] = 0;
buf[1] = 0;
buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
}
static void _setDummyTexEnv(int id) {
GPU_SetTexEnv(id,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_REPLACE,
GPU_REPLACE,
0x00000000);
}
Result ctrInitGpu() {
Result res = -1;
// Allocate buffers
gpuColorBuffer[0] = vramAlloc(400 * 240 * 4);
gpuColorBuffer[1] = vramAlloc(320 * 240 * 4);
gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
if (gpuColorBuffer[0] == NULL || gpuColorBuffer[1] == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
res = -1;
goto error_allocs;
}
// Both buffers share the same allocation, index buffer follows the vertex buffer
ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
// Load vertex shader binary
passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
if (passthroughShader == NULL) {
res = -1;
goto error_dvlb;
}
// Create shader
shaderProgramInit(&gpuShader);
res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
if (res < 0) {
goto error_shader;
}
// Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
GPU_Init(NULL);
GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
return 0;
error_shader:
shaderProgramFree(&gpuShader);
error_dvlb:
if (passthroughShader != NULL) {
DVLB_Free(passthroughShader);
passthroughShader = NULL;
}
error_allocs:
if (ctrVertexBuffer != NULL) {
linearFree(ctrVertexBuffer);
ctrVertexBuffer = NULL;
ctrIndexBuffer = NULL;
}
if (gpuCommandList != NULL) {
GPUCMD_SetBuffer(NULL, 0, 0);
linearFree(gpuCommandList);
gpuCommandList = NULL;
}
if (gpuColorBuffer[0] != NULL) {
vramFree(gpuColorBuffer[0]);
gpuColorBuffer[0] = NULL;
}
if (gpuColorBuffer[1] != NULL) {
vramFree(gpuColorBuffer[1]);
gpuColorBuffer[1] = NULL;
}
return res;
}
void ctrDeinitGpu() {
shaderProgramFree(&gpuShader);
DVLB_Free(passthroughShader);
passthroughShader = NULL;
linearFree(screenTexture);
screenTexture = NULL;
linearFree(ctrVertexBuffer);
ctrVertexBuffer = NULL;
ctrIndexBuffer = NULL;
GPUCMD_SetBuffer(NULL, 0, 0);
linearFree(gpuCommandList);
gpuCommandList = NULL;
vramFree(gpuColorBuffer[0]);
gpuColorBuffer[0] = NULL;
vramFree(gpuColorBuffer[1]);
gpuColorBuffer[1] = NULL;
}
void ctrGpuBeginFrame(int screen) {
if (screen > 1) {
return;
}
int fw;
if (screen == 0) {
fw = 400;
} else {
fw = 320;
}
_GPU_SetFramebuffer(osConvertVirtToPhys(gpuColorBuffer[screen]), 0, 240, fw);
}
void ctrGpuBeginDrawing(void) {
shaderProgramUse(&gpuShader);
// Disable depth and stencil testing
GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
GPU_DepthMap(-1.0f, 0.0f);
// Enable alpha blending
GPU_SetAlphaBlending(
GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
GPU_SetBlendingColor(0, 0, 0, 0);
// Disable alpha testing
GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
// Unknown
GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_TEST1, 0x1, 0);
GPUCMD_AddWrite(GPUREG_EARLYDEPTH_TEST2, 0);
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
GPU_TEVOPERANDS(0, 0, 0), // RGB
GPU_TEVOPERANDS(0, 0, 0), // Alpha
GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
0x00000000); // Constant color
_setDummyTexEnv(1);
_setDummyTexEnv(2);
_setDummyTexEnv(3);
_setDummyTexEnv(4);
_setDummyTexEnv(5);
// Configure vertex attribute format
u32 bufferOffsets[] = { osConvertVirtToPhys(ctrVertexBuffer) - VRAM_BASE };
u64 arrayTargetAttributes[] = { 0x210 };
u8 numAttributesInArray[] = { 3 };
GPU_SetAttributeBuffers(
3, // Number of attributes
(u32*)VRAM_BASE, // Base address
GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
0xFF8, // Non-fixed vertex inputs
0x210, // Vertex shader input map
1, // Use 1 vertex array
bufferOffsets, arrayTargetAttributes, numAttributesInArray);
}
void ctrGpuEndFrame(int screen, void* outputFramebuffer, int w, int h) {
if (screen > 1) {
return;
}
int fw;
if (screen == 0) {
fw = 400;
} else {
fw = 320;
}
ctrFlushBatch();
void* colorBuffer = (u8*)gpuColorBuffer[screen] + ((fw - w) * 240 * 4);
const u32 GX_CROP_INPUT_LINES = (1 << 2);
ctrClearPending(1 << GSPGPU_EVENT_PSC0);
ctrClearPending(1 << GSPGPU_EVENT_PPF);
GX_DisplayTransfer(
colorBuffer, GX_BUFFER_DIM(240, fw),
outputFramebuffer, GX_BUFFER_DIM(h, w),
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
GX_CROP_INPUT_LINES);
pendingEvents |= (1 << GSPGPU_EVENT_PPF);
}
void ctrGpuEndDrawing(void) {
ctrClearPending(1 << GSPGPU_EVENT_PPF);
gfxSwapBuffersGpu();
gspWaitForEvent(GSPGPU_EVENT_VBlank0, false);
void* gpuColorBuffer0End = (char*)gpuColorBuffer[0] + 240 * 400 * 4;
void* gpuColorBuffer1End = (char*)gpuColorBuffer[1] + 240 * 320 * 4;
GX_MemoryFill(
gpuColorBuffer[0], 0x00000000, gpuColorBuffer0End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
gpuColorBuffer[1], 0x00000000, gpuColorBuffer1End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER);
pendingEvents |= 1 << GSPGPU_EVENT_PSC0;
}
void ctrSetViewportSize(s16 w, s16 h) {
// Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
// bottom-right, taking into account the 3DS' screens' portrait
// orientation.
float projectionMtx[4 * 4] = {
// Rows are in the order w z y x, because ctrulib
1.0f, 0.0f, -2.0f / h, 0.0f,
1.0f, 0.0f, 0.0f, -2.0f / w,
-0.5f, 0.0f, 0.0f, 0.0f,
1.0f, 0.0f, 0.0f, 0.0f,
};
GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
_GPU_SetViewportEx(0, 0, h, w);
}
void ctrActivateTexture(const struct ctrTexture* texture) {
if (activeTexture == texture) {
return;
}
ctrFlushBatch();
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexture(
GPU_TEXUNIT0, (u32*)osConvertVirtToPhys(texture->data),
texture->width, texture->height,
GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
texture->format);
GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
float textureMtx[2 * 4] = {
// Rows are in the order w z y x, because ctrulib
0.0f, 0.0f, 0.0f, 1.0f / texture->width,
0.0f, 0.0f, 1.0f / texture->height, 0.0f,
};
GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
activeTexture = texture;
}
void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
if (ctrNumQuads == MAX_NUM_QUADS) {
ctrFlushBatch();
}
u16 index = ctrNumQuads * 4;
struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
vtx->x = x; vtx->y = y;
vtx->u = u; vtx->v = v;
vtx->abgr = color;
vtx++;
vtx->x = x + w; vtx->y = y;
vtx->u = u + uw; vtx->v = v;
vtx->abgr = color;
vtx++;
vtx->x = x; vtx->y = y + h;
vtx->u = u; vtx->v = v + vh;
vtx->abgr = color;
vtx++;
vtx->x = x + w; vtx->y = y + h;
vtx->u = u + uw; vtx->v = v + vh;
vtx->abgr = color;
u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
ctrNumQuads += 1;
}
void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
ctrAddRectScaled(color,
x, y, w, h,
u, v, w, h);
}
void ctrFlushBatch(void) {
if (ctrNumQuads == 0) {
return;
}
ctrClearPending((1 << GSPGPU_EVENT_PSC0));
GSPGPU_FlushDataCache(ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
GPU_DrawElements(GPU_GEOMETRY_PRIM, (u32*)(osConvertVirtToPhys(ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
GPU_FinishDrawing();
GPUCMD_Finalize();
GSPGPU_FlushDataCache((u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
GPUCMD_FlushAndRun();
gspWaitForP3D();
GPUCMD_SetBufferOffset(0);
ctrNumQuads = 0;
}