* fix a few bugs related to 2D GPU registers (so there are atleast some of the graphics tests that pass)

* less shitty texture mapping (and perspective-correct interpolation in general). extra bonus is that it also takes less divisions.
This commit is contained in:
StapleButter 2017-03-04 14:47:20 +01:00
parent ff4c1c9cf6
commit b6e12e0c18
4 changed files with 34 additions and 13 deletions

View File

@ -125,6 +125,9 @@ u16 GPU2D::Read16(u32 addr)
case 0x00A: return BGCnt[1];
case 0x00C: return BGCnt[2];
case 0x00E: return BGCnt[3];
case 0x064: return CaptureCnt & 0xFFFF;
case 0x066: return CaptureCnt >> 16;
}
printf("unknown GPU read16 %08X\n", addr);
@ -136,6 +139,8 @@ u32 GPU2D::Read32(u32 addr)
switch (addr & 0x00000FFF)
{
case 0x000: return DispCnt;
case 0x064: return CaptureCnt;
}
return Read16(addr) | (Read16(addr+2) << 16);

View File

@ -467,10 +467,10 @@ void RenderPolygon(Polygon* polygon)
s32 wl = vlcur->FinalPosition[3] + (((s64)(vlnext->FinalPosition[3] - vlcur->FinalPosition[3]) * lfactor) >> 12);
s32 wr = vrcur->FinalPosition[3] + (((s64)(vrnext->FinalPosition[3] - vrcur->FinalPosition[3]) * rfactor) >> 12);
s64 perspfactorl1 = ((s64)(0x1000 - lfactor) << 12) / vlcur->FinalPosition[3];
s64 perspfactorl2 = ((s64)lfactor << 12) / vlnext->FinalPosition[3];
s64 perspfactorr1 = ((s64)(0x1000 - rfactor) << 12) / vrcur->FinalPosition[3];
s64 perspfactorr2 = ((s64)rfactor << 12) / vrnext->FinalPosition[3];
s64 perspfactorl1 = ((s64)(0x1000 - lfactor) * vlnext->FinalPosition[3]) >> 12;
s64 perspfactorl2 = ((s64)lfactor * vlcur->FinalPosition[3]) >> 12;
s64 perspfactorr1 = ((s64)(0x1000 - rfactor) * vrnext->FinalPosition[3]) >> 12;
s64 perspfactorr2 = ((s64)rfactor * vrcur->FinalPosition[3]) >> 12;
if (perspfactorl1 + perspfactorl2 == 0)
{
@ -512,8 +512,8 @@ void RenderPolygon(Polygon* polygon)
//z -= 0x1FF;
//if (z < 0) z = 0;
s32 perspfactor1 = ((0x1000 - xfactor) << 12) / wl;
s32 perspfactor2 = (xfactor << 12) / wr;
s32 perspfactor1 = ((s64)(0x1000 - xfactor) * wr) >> 12;
s32 perspfactor2 = ((s64)xfactor * wl) >> 12;
if (perspfactor1 + perspfactor2 == 0)
{
@ -528,8 +528,10 @@ void RenderPolygon(Polygon* polygon)
u32 vg = ((perspfactor1 * gl) + (perspfactor2 * gr)) / (perspfactor1 + perspfactor2);
u32 vb = ((perspfactor1 * bl) + (perspfactor2 * br)) / (perspfactor1 + perspfactor2);
s16 s = ((perspfactor1 * sl) + (perspfactor2 * sr)) / (perspfactor1 + perspfactor2);
s16 t = ((perspfactor1 * tl) + (perspfactor2 * tr)) / (perspfactor1 + perspfactor2);
s16 s = ((perspfactor1 * (s64)sl) + (perspfactor2 * (s64)sr)) / (perspfactor1 + perspfactor2);
s16 t = ((perspfactor1 * (s64)tl) + (perspfactor2 * (s64)tr)) / (perspfactor1 + perspfactor2);
//printf("y=%d x=%d: s=%04X t=%04X\n", y, x, s, t);
RenderPixel(polygon, x, y, z, vr>>3, vg>>3, vb>>3, s, t);
}

14
NDS.cpp
View File

@ -1296,6 +1296,8 @@ u16 ARM9IORead16(u32 addr)
case 0x04000006: return GPU::VCount;
case 0x04000060: return 0;
case 0x04000064:
case 0x04000066: return GPU::GPU2D_A->Read16(addr);
case 0x040000B8: return DMAs[0]->Cnt & 0xFFFF;
case 0x040000BA: return DMAs[0]->Cnt >> 16;
@ -1343,6 +1345,12 @@ u16 ARM9IORead16(u32 addr)
case 0x04000204: return ExMemCnt[0];
case 0x04000208: return IME[0];
case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8);
case 0x04000242: return GPU::VRAMCNT[2] | (GPU::VRAMCNT[3] << 8);
case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8);
case 0x04000246: return GPU::VRAMCNT[6] | (WRAMCnt << 8);
case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);
case 0x04000280: return DivCnt;
case 0x040002B0: return SqrtCnt;
@ -1374,6 +1382,8 @@ u32 ARM9IORead32(u32 addr)
{
case 0x04000004: return GPU::DispStat[0] | (GPU::VCount << 16);
case 0x04000064: return GPU::GPU2D_A->Read32(addr);
case 0x040000B0: return DMAs[0]->SrcAddr;
case 0x040000B4: return DMAs[0]->DstAddr;
case 0x040000B8: return DMAs[0]->Cnt;
@ -1404,6 +1414,10 @@ u32 ARM9IORead32(u32 addr)
case 0x04000210: return IE[0];
case 0x04000214: return IF[0];
case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8) | (GPU::VRAMCNT[2] << 16) | (GPU::VRAMCNT[3] << 24);
case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24);
case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);
case 0x04000290: return DivNumerator[0];
case 0x04000294: return DivNumerator[1];
case 0x04000298: return DivDenominator[0];

View File

@ -10,7 +10,7 @@
1481161027 c:\documents\sources\melonds\types.h
1488589906 source:c:\documents\sources\melonds\nds.cpp
1488594513 source:c:\documents\sources\melonds\nds.cpp
<stdio.h>
<string.h>
"NDS.h"
@ -87,7 +87,7 @@
"NDS.h"
"SPI.h"
1488590400 source:c:\documents\sources\melonds\gpu2d.cpp
1488594249 source:c:\documents\sources\melonds\gpu2d.cpp
<stdio.h>
<string.h>
"NDS.h"
@ -109,7 +109,7 @@
1486511075 c:\documents\sources\melonds\fifo.h
"types.h"
1488589063 source:c:\documents\sources\melonds\dma.cpp
1488591116 source:c:\documents\sources\melonds\dma.cpp
<stdio.h>
"NDS.h"
"DMA.h"
@ -148,14 +148,14 @@
1488497796 c:\documents\sources\melonds\gpu3d.h
1488497825 source:c:\documents\sources\melonds\gpu3d.cpp
1488594938 source:c:\documents\sources\melonds\gpu3d.cpp
<stdio.h>
<string.h>
"NDS.h"
"GPU.h"
"FIFO.h"
1488415378 source:c:\documents\sources\melonds\gpu3d_soft.cpp
1488634954 source:c:\documents\sources\melonds\gpu3d_soft.cpp
<stdio.h>
<string.h>
"NDS.h"