Texture coordinates are stored in fixed point format in TEV which allows overflows to be emulated correctly. Added logic to calculated texture LOD and use the correct mip. Dumping textures will now dump all mip levels. Added line rendering. Changed data stored in vertex from float arrays to vectors for cleaner math.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5178 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
donkopunchstania 2010-03-09 04:38:07 +00:00
parent 5beb6dfd47
commit cc7c6cd35f
16 changed files with 704 additions and 323 deletions

View File

@ -451,7 +451,8 @@ union TexMode0
unsigned mag_filter : 1;
unsigned min_filter : 3;
unsigned diag_lod : 1;
signed lod_bias : 10;
signed lod_bias : 8;
unsigned pad0 : 2;
unsigned max_aniso : 2;
unsigned lod_clamp : 1;
};

View File

@ -90,13 +90,13 @@ namespace Clipper
static inline int CalcClipMask(OutputVertexData *v)
{
int cmask = 0;
float* pos = v->projectedPosition;
if (pos[3] - pos[0] < 0) cmask |= CLIP_POS_X_BIT;
if (pos[0] + pos[3] < 0) cmask |= CLIP_NEG_X_BIT;
if (pos[3] - pos[1] < 0) cmask |= CLIP_POS_Y_BIT;
if (pos[1] + pos[3] < 0) cmask |= CLIP_NEG_Y_BIT;
if (pos[3] * pos[2] > 0) cmask |= CLIP_POS_Z_BIT;
if (pos[2] + pos[3] < 0) cmask |= CLIP_NEG_Z_BIT;
Vec4 pos = v->projectedPosition;
if (pos.w - pos.x < 0) cmask |= CLIP_POS_X_BIT;
if (pos.x + pos.w < 0) cmask |= CLIP_NEG_X_BIT;
if (pos.w - pos.y < 0) cmask |= CLIP_POS_Y_BIT;
if (pos.y + pos.w < 0) cmask |= CLIP_NEG_Y_BIT;
if (pos.w * pos.z > 0) cmask |= CLIP_POS_Z_BIT;
if (pos.z + pos.w < 0) cmask |= CLIP_NEG_Z_BIT;
return cmask;
}
@ -109,7 +109,7 @@ namespace Clipper
#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))
#define CLIP_DOTPROD(I, A, B, C, D) \
(Vertices[I]->projectedPosition[0] * A + Vertices[I]->projectedPosition[1] * B + Vertices[I]->projectedPosition[2] * C + Vertices[I]->projectedPosition[3] * D)
(Vertices[I]->projectedPosition.x * A + Vertices[I]->projectedPosition.y * B + Vertices[I]->projectedPosition.z * C + Vertices[I]->projectedPosition.w * D)
#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \
{ \
@ -153,6 +153,27 @@ namespace Clipper
} \
}
#define LINE_CLIP(PLANE_BIT, A, B, C, D ) \
{ \
if (mask & PLANE_BIT) { \
const float dp0 = CLIP_DOTPROD( 0, A, B, C, D ); \
const float dp1 = CLIP_DOTPROD( 1, A, B, C, D ); \
const bool neg_dp0 = dp0 < 0; \
const bool neg_dp1 = dp1 < 0; \
\
if (neg_dp0 && neg_dp1) \
return; \
\
if (neg_dp1) { \
float t = dp1 / (dp1 - dp0); \
if (t > t1) t1 = t; \
} else if (neg_dp0) { \
float t = dp0 / (dp0 - dp1); \
if (t > t0) t0 = t; \
} \
} \
}
void ClipTriangle(int *indices, int &numIndices)
{
int mask = 0;
@ -202,6 +223,53 @@ namespace Clipper
}
}
void ClipLine(int *indices)
{
int mask = 0;
int clip_mask[2] = { 0, 0 };
for (int i = 0; i < 2; ++i)
{
clip_mask[i] = CalcClipMask(Vertices[i]);
mask |= clip_mask[i];
}
if (mask == 0)
return;
float t0 = 0;
float t1 = 0;
// Mark unused in case of early termination
// of the macros below. (When fully clipped)
indices[0] = SKIP_FLAG;
indices[1] = SKIP_FLAG;
LINE_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1);
LINE_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1);
LINE_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1);
LINE_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1);
LINE_CLIP(CLIP_POS_Z_BIT, 0, 0, -1, 1);
LINE_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1);
// Restore the old values as this line
// was not fully clipped.
indices[0] = 0;
indices[1] = 1;
int numVertices = 2;
if (clip_mask[0]) {
indices[0] = numVertices;
AddInterpolatedVertex(t0, 0, 1, numVertices);
}
if (clip_mask[1]) {
indices[1] = numVertices;
AddInterpolatedVertex(t1, 1, 0, numVertices);
}
}
void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
{
if (stats.thisFrame.numDrawnObjects < g_Config.drawStart || stats.thisFrame.numDrawnObjects >= g_Config.drawEnd )
@ -247,6 +315,75 @@ namespace Clipper
}
}
void CopyVertex(OutputVertexData *dst, OutputVertexData *src, float dx, float dy, unsigned int sOffset)
{
dst->screenPosition.x = src->screenPosition.x + dx;
dst->screenPosition.y = src->screenPosition.y + dy;
dst->screenPosition.z = src->screenPosition.z;
for (int i = 0; i < 3; ++i)
dst->normal[i] = src->normal[i];
for (int i = 0; i < 4; ++i)
dst->color[0][i] = src->color[0][i];
// todo - s offset
for (int i = 0; i < 8; ++i)
dst->texCoords[i] = src->texCoords[i];
}
void ProcessLine(OutputVertexData *lineV0, OutputVertexData *lineV1)
{
int indices[4] = { 0, 1, SKIP_FLAG, SKIP_FLAG };
Vertices[0] = lineV0;
Vertices[1] = lineV1;
ClipLine(indices);
if(indices[0] != SKIP_FLAG)
{
OutputVertexData *v0 = Vertices[indices[0]];
OutputVertexData *v1 = Vertices[indices[1]];
PerspectiveDivide(v0);
PerspectiveDivide(v1);
float dx = v1->screenPosition.x - v0->screenPosition.x;
float dy = v1->screenPosition.y - v0->screenPosition.y;
float screenDx = 0;
float screenDy = 0;
if(abs(dx) > abs(dy))
{
if(dx > 0)
screenDy = bpmem.lineptwidth.linesize / -12.0f;
else
screenDy = bpmem.lineptwidth.linesize / 12.0f;
}
else
{
if(dy > 0)
screenDx = bpmem.lineptwidth.linesize / 12.0f;
else
screenDx = bpmem.lineptwidth.linesize / -12.0f;
}
OutputVertexData triangle[3];
CopyVertex(&triangle[0], v0, screenDx, screenDy, 0);
CopyVertex(&triangle[1], v1, screenDx, screenDy, 0);
CopyVertex(&triangle[2], v1, -screenDx, -screenDy, bpmem.lineptwidth.lineoff);
// ccw winding
Rasterizer::DrawTriangleFrontFace(&triangle[2], &triangle[1], &triangle[0]);
CopyVertex(&triangle[1], v0, -screenDx, -screenDy, bpmem.lineptwidth.lineoff);
Rasterizer::DrawTriangleFrontFace(&triangle[0], &triangle[1], &triangle[2]);
}
}
bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface)
{
@ -260,15 +397,15 @@ namespace Clipper
return false;
}
float x0 = v0->projectedPosition[0];
float x1 = v1->projectedPosition[0];
float x2 = v2->projectedPosition[0];
float y1 = v1->projectedPosition[1];
float y0 = v0->projectedPosition[1];
float y2 = v2->projectedPosition[1];
float w0 = v0->projectedPosition[3];
float w1 = v1->projectedPosition[3];
float w2 = v2->projectedPosition[3];
float x0 = v0->projectedPosition.x;
float x1 = v1->projectedPosition.x;
float x2 = v2->projectedPosition.x;
float y1 = v1->projectedPosition.y;
float y0 = v0->projectedPosition.y;
float y2 = v2->projectedPosition.y;
float w0 = v0->projectedPosition.w;
float w1 = v1->projectedPosition.w;
float w2 = v2->projectedPosition.w;
float normalZDir = (x0*w2 - x2*w0)*y1 + (x2*y0 - x0*y2)*w1 + (y2*w0 - y0*w2)*x1;
@ -291,13 +428,13 @@ namespace Clipper
void PerspectiveDivide(OutputVertexData *vertex)
{
float *projected = vertex->projectedPosition;
float *screen = vertex->screenPosition;
Vec4 &projected = vertex->projectedPosition;
Vec3 &screen = vertex->screenPosition;
float wInverse = 1.0f/projected[3];
screen[0] = projected[0] * wInverse * xfregs.viewport.wd + m_ViewOffset[0];
screen[1] = projected[1] * wInverse * xfregs.viewport.ht + m_ViewOffset[1];
screen[2] = projected[2] * wInverse + m_ViewOffset[2];
float wInverse = 1.0f/projected.w;
screen.x = projected.x * wInverse * xfregs.viewport.wd + m_ViewOffset[0];
screen.y = projected.y * wInverse * xfregs.viewport.ht + m_ViewOffset[1];
screen.z = projected.z * wInverse + m_ViewOffset[2];
}
}

View File

@ -31,6 +31,7 @@ namespace Clipper
void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2);
void ProcessLine(OutputVertexData *v0, OutputVertexData *v1);
bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface);

View File

@ -49,36 +49,32 @@ void Init()
}
}
bool SaveTexture(const char* filename, u32 texmap, int width, int height)
{
u8 *data = new u8[width * height * 4];
GetTextureBGRA(data, texmap, width, height);
bool result = SaveTGA(filename, width, height, data);
delete []data;
return result;
}
void SaveTexture(const char* filename, u32 texmap)
void SaveTexture(const char* filename, u32 texmap, s32 mip)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;
TexImage0& ti0 = texUnit.texImage0[subTexmap];
SaveTexture(filename, texmap, ti0.width + 1, ti0.height + 1);
int width = ti0.width + 1;
int height = ti0.height + 1;
u8 *data = new u8[width * height * 4];
GetTextureBGRA(data, texmap, mip, width, height);
bool result = SaveTGA(filename, width, height, data);
delete []data;
}
void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height)
void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height)
{
u8 sample[4];
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++) {
TextureSampler::Sample((float)x, (float)y, 0, texmap, sample);
TextureSampler::SampleMip(x << 7, y << 7, mip, false, texmap, sample);
// rgba to bgra
*(dst++) = sample[2];
*(dst++) = sample[1];
@ -87,13 +83,32 @@ void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height)
}
}
s32 GetMaxTextureLod(u32 texmap)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;
u8 maxLod = texUnit.texMode1[subTexmap].max_lod;
u8 mip = maxLod >> 4;
u8 fract = maxLod & 0xf;
if(fract)
++mip;
return (s32)mip;
}
void DumpActiveTextures()
{
for (unsigned int stageNum = 0; stageNum < bpmem.genMode.numindstages; stageNum++)
{
u32 texmap = bpmem.tevindref.getTexMap(stageNum);
SaveTexture(StringFromFormat("%star%i_ind%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap);
s32 maxLod = GetMaxTextureLod(texmap);
for (s32 mip = 0; mip < maxLod; ++mip)
{
SaveTexture(StringFromFormat("%star%i_ind%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip);
}
}
for (unsigned int stageNum = 0; stageNum <= bpmem.genMode.numtevstages; stageNum++)
@ -104,7 +119,11 @@ void DumpActiveTextures()
int texmap = order.getTexMap(stageOdd);
SaveTexture(StringFromFormat("%star%i_stage%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap);
s32 maxLod = GetMaxTextureLod(texmap);
for (s32 mip = 0; mip < maxLod; ++mip)
{
SaveTexture(StringFromFormat("%star%i_stage%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip);
}
}
}

View File

@ -22,7 +22,7 @@ namespace DebugUtil
{
void Init();
void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height);
void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height);
void DumpActiveTextures();

View File

@ -155,7 +155,7 @@ namespace HwRasterizer
int width = texImage0.width;
int height = texImage0.height;
DebugUtil::GetTextureBGRA(temp, 0, width, height);
DebugUtil::GetTextureBGRA(temp, 0, 0, width, height);
glGenTextures(1, (GLuint *)&texture);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture);

View File

@ -18,6 +18,8 @@
#ifndef _NATIVEVERTEXFORMAT_H
#define _NATIVEVERTEXFORMAT_H
#include "../../Plugin_VideoDX9/Src/Vec3.h"
#ifdef WIN32
#define LOADERDECL __cdecl
#else
@ -26,25 +28,33 @@
typedef void (LOADERDECL *TPipelineFunction)();
struct Vec4
{
float x;
float y;
float z;
float w;
};
struct InputVertexData
{
u8 posMtx;
u8 texMtx[8];
float position[4];
float normal[3][3];
Vec3 position;
Vec3 normal[3];
u8 color[2][4];
float texCoords[8][2];
};
struct OutputVertexData
{
float mvPosition[3];
float projectedPosition[4];
float screenPosition[3];
float normal[3][3];
Vec3 mvPosition;
Vec4 projectedPosition;
Vec3 screenPosition;
Vec3 normal[3];
u8 color[2][4];
float texCoords[8][3];
Vec3 texCoords[8];
void Lerp(float t, OutputVertexData *a, OutputVertexData *b)
{
@ -52,17 +62,16 @@ struct OutputVertexData
#define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8)
for (int i = 0; i < 3; ++i)
mvPosition[i] = LINTERP(t, a->mvPosition[i], b->mvPosition[i]);
mvPosition = LINTERP(t, a->mvPosition, b->mvPosition);
for (int i = 0; i < 4; ++i)
projectedPosition[i] = LINTERP(t, a->projectedPosition[i], b->projectedPosition[i]);
projectedPosition.x = LINTERP(t, a->projectedPosition.x, b->projectedPosition.x);
projectedPosition.y = LINTERP(t, a->projectedPosition.y, b->projectedPosition.y);
projectedPosition.z = LINTERP(t, a->projectedPosition.z, b->projectedPosition.z);
projectedPosition.w = LINTERP(t, a->projectedPosition.w, b->projectedPosition.w);
for (int i = 0; i < 3; ++i)
{
normal[i][0] = LINTERP(t, a->normal[i][0], b->normal[i][0]);
normal[i][1] = LINTERP(t, a->normal[i][1], b->normal[i][1]);
normal[i][2] = LINTERP(t, a->normal[i][2], b->normal[i][2]);
normal[i] = LINTERP(t, a->normal[i], b->normal[i]);
}
u16 t_int = (u16)(t * 256);
@ -74,9 +83,7 @@ struct OutputVertexData
for (int i = 0; i < 8; ++i)
{
texCoords[i][0] = LINTERP(t, a->texCoords[i][0], b->texCoords[i][0]);
texCoords[i][1] = LINTERP(t, a->texCoords[i][1], b->texCoords[i][1]);
texCoords[i][2] = LINTERP(t, a->texCoords[i][2], b->texCoords[i][2]);
texCoords[i] = LINTERP(t, a->texCoords[i], b->texCoords[i]);
}
#undef LINTERP

View File

@ -27,8 +27,20 @@
#include "VideoConfig.h"
#define BLOCK_SIZE 8
#define BLOCK_SIZE 2
#define CLAMP(x, a, b) (x>b)?b:(x<a)?a:x
// returns approximation of log2(f) in s28.4
// results are close enough to use for LOD
static inline s32 FixedLog2(float f)
{
u32 *x = (u32*)&f;
s32 logInt = ((*x & 0x7F800000) >> 19) - 2032; // integer part
s32 logFract = (*x & 0x007fffff) >> 19; // approximate fractional part
return logInt + logFract;
}
namespace Rasterizer
{
@ -43,6 +55,7 @@ s32 scissorRight = 0;
s32 scissorBottom = 0;
Tev tev;
RasterBlock rasterBlock;
void Init()
{
@ -91,53 +104,58 @@ void SetTevReg(int reg, int comp, bool konst, s16 color)
tev.SetRegColor(reg, comp, konst, color);
}
inline void Draw(s32 x, s32 y)
inline void Draw(s32 x, s32 y, s32 xi, s32 yi)
{
INCSTAT(stats.thisFrame.rasterizedPixels);
float zFloat = 1.0f + ZSlope.GetValue(x, y);
if(zFloat < 0|| zFloat > 1)
return;
float zFloat = 1.0f + ZSlope.GetValue(x, y);
if (zFloat < 0.0f || zFloat > 1.0f)
return;
u32 z = (u32)(zFloat * 0x00ffffff);
s32 z = (s32)(zFloat * 0x00ffffff);
if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable)
{
// early z
if (!EfbInterface::ZCompare(x, y, z))
return;
}
if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable)
{
// early z
if (!EfbInterface::ZCompare(x, y, z))
return;
}
float invW = 1.0f / WSlope.GetValue(x, y);
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
tev.Position[0] = x;
tev.Position[1] = y;
tev.Position[2] = z;
float invW = pixel.InvW;
for(unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
{
for(int comp = 0; comp < 4; comp++)
tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y);
}
tev.Position[0] = x;
tev.Position[1] = y;
tev.Position[2] = z;
for(unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
{
if (xfregs.texMtxInfo[i].projection)
{
float q = TexSlopes[i][2].GetValue(x, y) * invW;
float invQ = invW / q;
tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invQ * (bpmem.texcoords[i].s.scale_minus_1 + 1);
tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invQ * (bpmem.texcoords[i].t.scale_minus_1 + 1);
tev.Lod[i] = 0;
}
else
{
tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invW * (bpmem.texcoords[i].s.scale_minus_1 + 1);
tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invW * (bpmem.texcoords[i].t.scale_minus_1 + 1);
tev.Lod[i] = 0;
}
}
// colors
for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
{
for(int comp = 0; comp < 4; comp++)
tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y);
}
// tex coords
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
{
// multiply by 128 because TEV stores stores UVs as s17.7
tev.Uv[i].s = (s32)(pixel.Uv[i][0] * 128);
tev.Uv[i].t = (s32)(pixel.Uv[i][1] * 128);
}
for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
{
tev.IndirectLod[i] = rasterBlock.IndirectLod[i];
tev.IndirectLinear[i] = rasterBlock.IndirectLinear[i];
}
for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
{
tev.TextureLod[i] = rasterBlock.TextureLod[i];
tev.TextureLinear[i] = rasterBlock.TextureLinear[i];
}
tev.Draw();
}
@ -155,6 +173,109 @@ void InitSlope(Slope *slope, float f1, float f2, float f3, float DX31, float DX1
slope->y0 = Y1;
}
inline void CalculateLOD(s32 &lod, bool &linear, u32 texmap, u32 texcoord)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;
// LOD calculation requires data from the texture mode for bias, etc.
// it does not seem to use the actual texture size
TexMode0& tm0 = texUnit.texMode0[subTexmap];
TexMode1& tm1 = texUnit.texMode1[subTexmap];
float sDelta, tDelta;
if (tm0.diag_lod)
{
float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
float *uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];
sDelta = abs(uv0[0] - uv1[0]);
tDelta = abs(uv0[1] - uv1[1]);
}
else
{
float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
float *uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
float *uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];
sDelta = max(abs(uv0[0] - uv1[0]), abs(uv0[0] - uv2[0]));
tDelta = max(abs(uv0[1] - uv1[1]), abs(uv0[1] - uv2[1]));
}
// get LOD in s28.4
lod = FixedLog2(max(sDelta, tDelta));
// bias is s2.5
int bias = tm0.lod_bias;
bias >>= 1;
lod += bias;
linear = (lod >= 0 && (tm0.min_filter & 4) || lod < 0 && tm0.mag_filter);
// order of checks matters
// should be:
// if lod > max then max
// else if lod < min then min
lod = CLAMP(lod, (s32)tm1.min_lod, (s32)tm1.max_lod);
}
void BuildBlock(s32 blockX, s32 blockY)
{
for (s32 yi = 0; yi < BLOCK_SIZE; yi++)
{
for (s32 xi = 0; xi < BLOCK_SIZE; xi++)
{
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
s32 x = xi + blockX;
s32 y = yi + blockY;
float invW = 1.0f / WSlope.GetValue(x, y);
pixel.InvW = invW;
// tex coords
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
{
float projection;
if (xfregs.texMtxInfo[i].projection)
{
float q = TexSlopes[i][2].GetValue(x, y) * invW;
projection = invW / q;
}
else
projection = invW;
pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection;
pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection;
}
}
}
u32 indref = bpmem.tevindref.hex;
for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
{
u32 texmap = indref & 3;
indref >>= 3;
u32 texcoord = indref & 3;
indref >>= 3;
CalculateLOD(rasterBlock.IndirectLod[i], rasterBlock.IndirectLinear[i], texmap, texcoord);
}
for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
{
int stageOdd = i&1;
TwoTevStageOrders &order = bpmem.tevorders[i >> 1];
if(order.getEnable(stageOdd))
{
u32 texmap = order.getTexMap(stageOdd);
u32 texcoord = order.getTexCoord(stageOdd);
CalculateLOD(rasterBlock.TextureLod[i], rasterBlock.TextureLinear[i], texmap, texcoord);
}
}
}
void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
{
INCSTAT(stats.thisFrame.numTrianglesDrawn);
@ -217,7 +338,7 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
float fltdy12 = flty1 - v1->screenPosition[1];
float fltdy31 = v2->screenPosition[1] - flty1;
float w[3] = { 1.0f / v0->projectedPosition[3], 1.0f / v1->projectedPosition[3], 1.0f / v2->projectedPosition[3] };
float w[3] = { 1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w };
InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1);
InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1);
@ -281,14 +402,16 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
// Skip block when outside an edge
if(a == 0x0 || b == 0x0 || c == 0x0) continue;
BuildBlock(x, y);
// Accept whole block when totally covered
if(a == 0xF && b == 0xF && c == 0xF)
{
for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
{
for(s32 ix = x; ix < x + BLOCK_SIZE; ix++)
for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
{
Draw(ix, iy + y);
Draw(x + ix, y + iy, ix, iy);
}
}
}
@ -298,17 +421,17 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
s32 CY3 = C3 + DX31 * y0 - DY31 * x0;
for(s32 iy = y; iy < y + BLOCK_SIZE; iy++)
for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
{
s32 CX1 = CY1;
s32 CX2 = CY2;
s32 CX3 = CY3;
for(s32 ix = x; ix < x + BLOCK_SIZE; ix++)
for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
{
if(CX1 > 0 && CX2 > 0 && CX3 > 0)
{
Draw(ix, iy);
Draw(x + ix, y + iy, ix, iy);
}
CX1 -= FDY12;

View File

@ -39,6 +39,21 @@ namespace Rasterizer
float y0;
float GetValue(s32 x, s32 y) { return f0 + (dfdx * (x - x0)) + (dfdy * (y - y0)); }
};
struct RasterBlockPixel
{
float InvW;
float Uv[8][2];
};
struct RasterBlock
{
RasterBlockPixel Pixel[2][2];
s32 IndirectLod[4];
bool IndirectLinear[4];
s32 TextureLod[16];
bool TextureLinear[16];
};
}

View File

@ -134,10 +134,38 @@ void SetupUnit::SetupTriFan()
}
void SetupUnit::SetupLine()
{}
{
if (m_VertexCounter < 1)
{
m_VertexCounter++;
m_VertWritePointer = m_VertPointer[m_VertexCounter];
return;
}
Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]);
m_VertexCounter = 0;
m_VertWritePointer = m_VertPointer[0];
}
void SetupUnit::SetupLineStrip()
{}
{
if (m_VertexCounter < 1)
{
m_VertexCounter++;
m_VertWritePointer = m_VertPointer[m_VertexCounter];
return;
}
m_VertexCounter++;
Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]);
m_VertWritePointer = m_VertPointer[0];
m_VertPointer[0] = m_VertPointer[1];
m_VertPointer[1] = &m_Vertices[m_VertexCounter & 1];
}
void SetupUnit::SetupPoint()
{}

View File

@ -439,34 +439,33 @@ static bool AlphaTest(int alpha)
return true;
}
inline float WrapIndirectCoord(float coord, int wrapMode)
inline s32 WrapIndirectCoord(s32 coord, int wrapMode)
{
switch (wrapMode) {
case ITW_OFF:
return coord;
case ITW_256:
return fmod(coord, 256);
case ITW_128:
return fmod(coord, 128);
return (coord % (256 << 7));
case ITW_128:
return (coord % (128 << 7));
case ITW_64:
return fmod(coord, 64);
return (coord % (64 << 7));
case ITW_32:
return fmod(coord, 32);
return (coord % (32 << 7));
case ITW_16:
return fmod(coord, 16);
return (coord % (16 << 7));
case ITW_0:
return 0;
}
return 0;
}
void Tev::Indirect(unsigned int stageNum, float s, float t)
void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
{
TevStageIndirect &indirect = bpmem.tevind[stageNum];
u8 *indmap = IndirectTex[indirect.bt];
float indcoord[3];
s32 indcoord[3];
// alpha bump select
switch (indirect.bs) {
@ -494,32 +493,32 @@ void Tev::Indirect(unsigned int stageNum, float s, float t)
// format
switch(indirect.fmt) {
case ITF_8:
indcoord[0] = (float)indmap[ALP_C] + bias[0];
indcoord[1] = (float)indmap[BLU_C] + bias[1];
indcoord[2] = (float)indmap[GRN_C] + bias[2];
indcoord[0] = indmap[ALP_C] + bias[0];
indcoord[1] = indmap[BLU_C] + bias[1];
indcoord[2] = indmap[GRN_C] + bias[2];
AlphaBump = AlphaBump & 0xf8;
break;
case ITF_5:
indcoord[0] = (float)(indmap[ALP_C] & 0x1f) + bias[0];
indcoord[1] = (float)(indmap[BLU_C] & 0x1f) + bias[1];
indcoord[2] = (float)(indmap[GRN_C] & 0x1f) + bias[2];
indcoord[0] = (indmap[ALP_C] & 0x1f) + bias[0];
indcoord[1] = (indmap[BLU_C] & 0x1f) + bias[1];
indcoord[2] = (indmap[GRN_C] & 0x1f) + bias[2];
AlphaBump = AlphaBump & 0xe0;
break;
case ITF_4:
indcoord[0] = (float)(indmap[ALP_C] & 0x0f) + bias[0];
indcoord[1] = (float)(indmap[BLU_C] & 0x0f) + bias[1];
indcoord[2] = (float)(indmap[GRN_C] & 0x0f) + bias[2];
indcoord[0] = (indmap[ALP_C] & 0x0f) + bias[0];
indcoord[1] = (indmap[BLU_C] & 0x0f) + bias[1];
indcoord[2] = (indmap[GRN_C] & 0x0f) + bias[2];
AlphaBump = AlphaBump & 0xf0;
break;
case ITF_3:
indcoord[0] = (float)(indmap[ALP_C] & 0x07) + bias[0];
indcoord[1] = (float)(indmap[BLU_C] & 0x07) + bias[1];
indcoord[2] = (float)(indmap[GRN_C] & 0x07) + bias[2];
indcoord[0] = (indmap[ALP_C] & 0x07) + bias[0];
indcoord[1] = (indmap[BLU_C] & 0x07) + bias[1];
indcoord[2] = (indmap[GRN_C] & 0x07) + bias[2];
AlphaBump = AlphaBump & 0xf8;
break;
}
float indtevtrans[2] = { 0,0 };
s64 indtevtrans[2] = { 0,0 };
// matrix multiply
int indmtxid = indirect.mid & 3;
@ -529,39 +528,40 @@ void Tev::Indirect(unsigned int stageNum, float s, float t)
int scale = ((u32)indmtx.col0.s0 << 0) |
((u32)indmtx.col1.s1 << 2) |
((u32)indmtx.col2.s2 << 4);
float fscale = 0.0f;
int shift;
switch (indirect.mid & 12) {
case 0:
fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
case 0:
shift = 3 + (17 - scale);
indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2];
indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2];
break;
case 4: // s matrix
fscale = powf(2.0f, (float)(scale - 17)) / 256;
shift = 8 + (17 - scale);
indtevtrans[0] = s * indcoord[0];
indtevtrans[1] = t * indcoord[0];
break;
case 8: // t matrix
fscale = powf(2.0f, (float)(scale - 17)) / 256;
shift = 8 + (17 - scale);
indtevtrans[0] = s * indcoord[1];
indtevtrans[1] = t * indcoord[1];
break;
}
indtevtrans[0] *= fscale;
indtevtrans[1] *= fscale;
indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
}
if (indirect.fb_addprev)
if (indirect.fb_addprev)
{
TexCoord[0] += WrapIndirectCoord(s, indirect.sw) + indtevtrans[0];
TexCoord[1] += WrapIndirectCoord(t, indirect.tw) + indtevtrans[1];
TexCoord.s += (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
TexCoord.t += (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
}
else
{
TexCoord[0] = WrapIndirectCoord(s, indirect.sw) + indtevtrans[0];
TexCoord[1] = WrapIndirectCoord(t, indirect.tw) + indtevtrans[1];
TexCoord.s = (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
TexCoord.t = (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
}
}
@ -580,10 +580,12 @@ void Tev::Draw()
u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
u32 texmap = bpmem.tevindref.getTexMap(stageNum);
float scaleS = bpmem.texscale[stageNum2].getScaleS(stageOdd);
float scaleT = bpmem.texscale[stageNum2].getScaleT(stageOdd);
const TEXSCALE& texscale = bpmem.texscale[stageNum2];
s32 scaleS = stageOdd ? texscale.ss1:texscale.ss0;
s32 scaleT = stageOdd ? texscale.ts1:texscale.ts0;
TextureSampler::Sample(Uv[texcoordSel][0] * scaleS, Uv[texcoordSel][1] * scaleT, Lod[texcoordSel], texmap, IndirectTex[stageNum]);
TextureSampler::Sample(Uv[texcoordSel].s >> scaleS, Uv[texcoordSel].t >> scaleT,
IndirectLod[stageNum], IndirectLinear[stageNum], texmap, IndirectTex[stageNum]);
#ifdef _DEBUG
if (g_Config.bDumpTevStages)
@ -608,14 +610,14 @@ void Tev::Draw()
int texcoordSel = order.getTexCoord(stageOdd);
int texmap = order.getTexMap(stageOdd);
Indirect(stageNum, Uv[texcoordSel][0], Uv[texcoordSel][1]);
Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t);
// sample texture
if (order.getEnable(stageOdd))
{
u8 texel[4];
TextureSampler::Sample(TexCoord[0], TexCoord[1], Lod[texcoordSel], texmap, texel);
TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum], texmap, texel);
int swaptable = ac.tswap * 2;

View File

@ -21,7 +21,20 @@
#include "BPMemLoader.h"
class Tev
{
{
struct InputRegType {
unsigned a : 8;
unsigned b : 8;
unsigned c : 8;
signed d : 11;
};
struct TextureCoordinateType
{
signed s : 24;
signed t : 24;
};
// color order: RGBA
s16 Reg[4][4];
s16 KonstantColors[4][4];
@ -32,7 +45,7 @@ class Tev
s16 Zero16[4];
u8 AlphaBump;
u8 IndirectTex[4][4];
float TexCoord[2];
TextureCoordinateType TexCoord;
s16 *m_ColorInputLUT[16][3];
s16 *m_AlphaInputLUT[8]; // values must point to RGBA color
@ -49,20 +62,16 @@ class Tev
void DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac);
void DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac);
void Indirect(unsigned int stageNum, float s, float t);
struct InputRegType {
unsigned a : 8;
unsigned b : 8;
unsigned c : 8;
signed d : 11;
};
void Indirect(unsigned int stageNum, s32 s, s32 t);
public:
s32 Position[3];
s32 Position[3];
u8 Color[2][4];
float Uv[8][2];
float Lod[8];
TextureCoordinateType Uv[8];
s32 IndirectLod[4];
bool IndirectLinear[4];
s32 TextureLod[16];
bool TextureLinear[16];
void Init();

View File

@ -23,29 +23,11 @@
#include <cmath>
#define ALLOW_MIPMAP 1
namespace TextureSampler
{
inline int iround(float x)
{
int t;
#if defined(_WIN32) && !defined(_M_X64)
__asm
{
fld x
fistp t
}
#else
t = (int)x;
if((x - t) >= 0.5)
return t + 1;
#endif
return t;
}
inline void WrapCoord(int &coord, int wrapMode, int imageSize)
{
switch (wrapMode)
@ -85,9 +67,53 @@ inline void AddTexel(u8 *inTexel, u32 *outTexel, u32 fract)
outTexel[3] += inTexel[3] * fract;
}
void Sample(float s, float t, float lod, u8 texmap, u8 *sample)
void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
int baseMip = 0;
bool mipLinear = false;
#if (ALLOW_MIPMAP)
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
TexMode0& tm0 = texUnit.texMode0[texmap & 3];
s32 lodFract = lod & 0xf;
if (lod > 0 && tm0.min_filter & 3)
{
// use mipmap
baseMip = lod >> 4;
mipLinear = (lodFract && tm0.min_filter & 2);
// if using nearest mip filter and lodFract >= 0.5 round up to next mip
baseMip += (lodFract >> 3) & (tm0.min_filter & 1);
}
if (mipLinear)
{
u8 sampledTex[4];
u32 texel[4];
SampleMip(s, t, baseMip, linear, texmap, sampledTex);
SetTexel(sampledTex, texel, (16 - lodFract));
SampleMip(s, t, baseMip + 1, linear, texmap, sampledTex);
AddTexel(sampledTex, texel, lodFract);
sample[0] = (u8)(texel[0] >> 4);
sample[1] = (u8)(texel[1] >> 4);
sample[2] = (u8)(texel[2] >> 4);
sample[3] = (u8)(texel[3] >> 4);
}
else
#endif
{
SampleMip(s, t, baseMip, linear, texmap, sample);
}
}
void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;
TexMode0& tm0 = texUnit.texMode0[subTexmap];
@ -97,59 +123,85 @@ void Sample(float s, float t, float lod, u8 texmap, u8 *sample)
u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5;
u8 *imageSrc = g_VideoInitialize.pGetMemoryPointer(imageBase);
bool linear = false;
if ((lod > 0 && tm0.min_filter > 4) || (lod <= 0 && tm0.mag_filter))
linear = true;
int imageWidth = ti0.width;
int imageHeight = ti0.height;
int tlutAddress = texTlut.tmem_offset << 9;
// reduce sample location and texture size to mip level
// move texture pointer to mip location
if (mip)
{
int mipWidth = imageWidth + 1;
int mipHeight = imageHeight + 1;
int fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format);
int fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format);
int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format);
imageWidth >>= mip;
imageHeight >>= mip;
s >>= mip;
t >>= mip;
while (mip)
{
mipWidth = max(mipWidth, fmtWidth);
mipHeight = max(mipHeight, fmtHeight);
u32 size = (mipWidth * mipHeight * fmtDepth) >> 1;
imageSrc += size;
mipWidth >>= 1;
mipHeight >>= 1;
mip--;
}
}
// integer part of sample location
int imageS = s >> 7;
int imageT = t >> 7;
if (linear)
{
s32 s256 = s32((s - 0.5f) * 256);
s32 t256 = s32((t- 0.5f) * 256);
int imageS = s256 >> 8;
int imageSPlus1 = imageS + 1;
u32 fractS = s256 & 0xff;
fractS += fractS >> 7;
int imageT = t256 >> 8;
// linear sampling
int imageSPlus1 = imageS + 1;
int fractS = s & 0x7f;
int imageTPlus1 = imageT + 1;
u32 fractT = t256 & 0xff;
fractT += fractT >> 7;
int fractT = t & 0x7f;
u8 sampledTex[4];
u32 texel[4];
WrapCoord(imageS, tm0.wrap_s, ti0.width);
WrapCoord(imageT, tm0.wrap_t, ti0.height);
WrapCoord(imageSPlus1, tm0.wrap_s, ti0.width);
WrapCoord(imageTPlus1, tm0.wrap_t, ti0.height);
WrapCoord(imageS, tm0.wrap_s, imageWidth);
WrapCoord(imageT, tm0.wrap_t, imageHeight);
WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth);
WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
SetTexel(sampledTex, texel, (256 - fractS) * (256 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
AddTexel(sampledTex, texel, (fractS) * (256 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
AddTexel(sampledTex, texel, (256 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
AddTexel(sampledTex, texel, (fractS) * (fractT));
sample[0] = (u8)(texel[0] >> 16);
sample[1] = (u8)(texel[1] >> 16);
sample[2] = (u8)(texel[2] >> 16);
sample[3] = (u8)(texel[3] >> 16);
sample[0] = (u8)(texel[0] >> 14);
sample[1] = (u8)(texel[1] >> 14);
sample[2] = (u8)(texel[2] >> 14);
sample[3] = (u8)(texel[3] >> 14);
}
else
{
int imageS = int(s);
int imageT = int(t);
// nearest neighbor sampling
WrapCoord(imageS, tm0.wrap_s, imageWidth);
WrapCoord(imageT, tm0.wrap_t, imageHeight);
WrapCoord(imageS, tm0.wrap_s, ti0.width);
WrapCoord(imageT, tm0.wrap_t, ti0.height);
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
}
}

View File

@ -23,7 +23,9 @@
namespace TextureSampler
{
void Sample(float s, float t, float lod, u8 texmap, u8 *sample);
void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample);
void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample);
}

View File

@ -22,6 +22,7 @@
#include "TransformUnit.h"
#include "XFMemLoader.h"
#include "CPMemLoader.h"
#include "BPMemLoader.h"
#include "NativeVertexFormat.h"
#include "../../Plugin_VideoDX9/Src/Vec3.h"
@ -30,48 +31,48 @@
namespace TransformUnit
{
void MultiplyVec2Mat24(const float *vec, const float *mat, float *result)
void MultiplyVec2Mat24(const Vec3 &vec, const float *mat, Vec3 &result)
{
result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3];
result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7];
result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3];
result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7];
}
void MultiplyVec2Mat34(const float *vec, const float *mat, float *result)
void MultiplyVec2Mat34(const Vec3 &vec, const float *mat, Vec3 &result)
{
result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3];
result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7];
result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] + mat[11];
result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3];
result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7];
result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] + mat[11];
}
void MultiplyVec3Mat33(const float *vec, const float *mat, float *result)
void MultiplyVec3Mat33(const Vec3 &vec, const float *mat, Vec3 &result)
{
result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2];
result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2];
result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2];
result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z;
result.y = mat[3] * vec.x + mat[4] * vec.y + mat[5] * vec.z;
result.z = mat[6] * vec.x + mat[7] * vec.y + mat[8] * vec.z;
}
void MultiplyVec3Mat34(const float *vec, const float *mat, float *result)
void MultiplyVec3Mat34(const Vec3 &vec, const float *mat, Vec3 &result)
{
result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2] + mat[3];
result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] * vec[2] + mat[7];
result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] * vec[2] + mat[11];
result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z + mat[3];
result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] * vec.z + mat[7];
result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] * vec.z + mat[11];
}
void MultipleVec3Perspective(const float *vec, const float *proj, float *result)
void MultipleVec3Perspective(const Vec3 &vec, const float *proj, Vec4 &result)
{
result[0] = proj[0] * vec[0] + proj[1] * vec[2];
result[1] = proj[2] * vec[1] + proj[3] * vec[2];
//result[2] = (proj[4] * vec[2] + proj[5]);
result[2] = (proj[4] * vec[2] + proj[5]) * (1.0f - (float)1e-7);
result[3] = -vec[2];
result.x = proj[0] * vec.x + proj[1] * vec.z;
result.y = proj[2] * vec.y + proj[3] * vec.z;
//result.z = (proj[4] * vec.z + proj[5]);
result.z = (proj[4] * vec.z + proj[5]) * (1.0f - (float)1e-7);
result.w = -vec.z;
}
void MultipleVec3Ortho(const float *vec, const float *proj, float *result)
void MultipleVec3Ortho(const Vec3 &vec, const float *proj, Vec4 &result)
{
result[0] = proj[0] * vec[0] + proj[1];
result[1] = proj[2] * vec[1] + proj[3];
result[2] = proj[4] * vec[2] + proj[5];
result[3] = 1;
result.x = proj[0] * vec.x + proj[1];
result.y = proj[2] * vec.y + proj[3];
result.z = proj[4] * vec.z + proj[5];
result.w = 1;
}
void TransformPosition(const InputVertexData *src, OutputVertexData *dst)
@ -98,55 +99,53 @@ void TransformNormal(const InputVertexData *src, bool nbt, OutputVertexData *dst
MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]);
MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]);
Vec3 *norm0 = (Vec3*)dst->normal[0];
norm0->normalize();
dst->normal[0].normalize();
}
else
{
MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
Vec3 *norm0 = (Vec3*)dst->normal[0];
norm0->normalize();
dst->normal[0].normalize();
}
}
inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bool specialCase, const InputVertexData *srcVertex, OutputVertexData *dstVertex)
{
const float *src;
const Vec3 *src;
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
src = srcVertex->position;
src = &srcVertex->position;
break;
case XF_SRCNORMAL_INROW:
src = srcVertex->normal[0];
src = &srcVertex->normal[0];
break;
case XF_SRCBINORMAL_T_INROW:
src = srcVertex->normal[1];
src = &srcVertex->normal[1];
break;
case XF_SRCBINORMAL_B_INROW:
src = srcVertex->normal[2];
src = &srcVertex->normal[2];
break;
default:
_assert_(texinfo.sourcerow >= XF_SRCTEX0_INROW && texinfo.sourcerow <= XF_SRCTEX7_INROW);
src = srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW];
src = (Vec3*)srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW];
break;
}
const float *mat = (const float*)&xfregs.posMatrices[srcVertex->texMtx[coordNum] * 4];
float *dst = dstVertex->texCoords[coordNum];
Vec3 *dst = &dstVertex->texCoords[coordNum];
if (texinfo.inputform == XF_TEXINPUT_AB11)
{
MultiplyVec2Mat34(src, mat, dst);
MultiplyVec2Mat34(*src, mat, *dst);
}
else
{
MultiplyVec3Mat34(src, mat, dst);
MultiplyVec3Mat34(*src, mat, *dst);
}
if (xfregs.dualTexTrans)
{
float tempCoord[3];
Vec3 tempCoord;
// normalize
const PostMtxInfo &postInfo = xfregs.postMtxInfo[coordNum];
@ -157,12 +156,12 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo
// no normalization
// q of input is 1
// q of output is unknown
tempCoord[0] = dst[0];
tempCoord[1] = dst[1];
tempCoord.x = dst->x;
tempCoord.y = dst->y;
dst[0] = postMat[0] * tempCoord[0] + postMat[1] * tempCoord[1] + postMat[2] + postMat[3];
dst[1] = postMat[4] * tempCoord[0] + postMat[5] * tempCoord[1] + postMat[6] + postMat[7];
dst[2] = 0.0f;
dst->x = postMat[0] * tempCoord.x + postMat[1] * tempCoord.y + postMat[2] + postMat[3];
dst->y = postMat[4] * tempCoord.x + postMat[5] * tempCoord.y + postMat[6] + postMat[7];
dst->z = 1.0f;
}
else
{
@ -170,18 +169,14 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo
{
float length = sqrtf(dst[0] * dst[0] + dst[1] * dst[1] + dst[2] * dst[2]);
float invL = 1.0f / length;
tempCoord[0] = invL * dst[0];
tempCoord[1] = invL * dst[1];
tempCoord[2] = invL * dst[2];
tempCoord = *dst * invL;
}
else
{
tempCoord[0] = dst[0];
tempCoord[1] = dst[1];
tempCoord[2] = dst[2];
tempCoord = *dst;
}
MultiplyVec3Mat34(tempCoord, postMat, dst);
MultiplyVec3Mat34(tempCoord, postMat, *dst);
}
}
}
@ -220,13 +215,8 @@ inline float SafeDivide(float n, float d)
return (d==0)?(n>0?1:0):n/d;
}
void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol)
void LightColor(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol)
{
// must be the size of 3 32bit floats for the light pointer to be valid
_assert_(sizeof(Vec3) == 12);
const Vec3 *pos = (const Vec3*)vertexPos;
const Vec3 *norm0 = (const Vec3*)normal;
const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum];
if (!(chan.attnfunc & 1)) {
@ -237,15 +227,15 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
break;
case LIGHTDIF_SIGN:
{
Vec3 ldir = (light->pos - *pos).normalized();
float diffuse = ldir * (*norm0);
Vec3 ldir = (light->pos - pos).normalized();
float diffuse = ldir * normal;
AddScaledIntegerColor(light->color, diffuse, lightCol);
}
break;
case LIGHTDIF_CLAMP:
{
Vec3 ldir = (light->pos - *pos).normalized();
float diffuse = max(0.0f, ldir * (*norm0));
Vec3 ldir = (light->pos - pos).normalized();
float diffuse = max(0.0f, ldir * normal);
AddScaledIntegerColor(light->color, diffuse, lightCol);
}
break;
@ -254,7 +244,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
}
else { // spec and spot
// not sure about divide by zero checks
Vec3 ldir = light->pos - *pos;
Vec3 ldir = light->pos - pos;
float attn;
if (chan.attnfunc == 3) { // spot
@ -269,7 +259,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
}
else if (chan.attnfunc == 1) { // specular
// donko - what is going on here? 655.36 is a guess but seems about right.
attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0;
attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0;
ldir.set(1.0f, attn, attn * attn);
float cosAtt = max(0.0f, light->cosatt * ldir);
@ -283,14 +273,14 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
break;
case LIGHTDIF_SIGN:
{
float difAttn = ldir * (*norm0);
float difAttn = ldir * normal;
AddScaledIntegerColor(light->color, attn * difAttn, lightCol);
}
break;
case LIGHTDIF_CLAMP:
{
float difAttn = max(0.0f, ldir * (*norm0));
float difAttn = max(0.0f, ldir * normal);
AddScaledIntegerColor(light->color, attn * difAttn, lightCol);
}
break;
@ -299,13 +289,8 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
}
}
void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, float &lightCol)
void LightAlpha(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, float &lightCol)
{
// must be the size of 3 32bit floats for the light pointer to be valid
_assert_(sizeof(Vec3) == 12);
const Vec3 *pos = (const Vec3*)vertexPos;
const Vec3 *norm0 = (const Vec3*)normal;
const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum];
if (!(chan.attnfunc & 1)) {
@ -316,15 +301,15 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
break;
case LIGHTDIF_SIGN:
{
Vec3 ldir = (light->pos - *pos).normalized();
float diffuse = ldir * (*norm0);
Vec3 ldir = (light->pos - pos).normalized();
float diffuse = ldir * normal;
lightCol += light->color[0] * diffuse;
}
break;
case LIGHTDIF_CLAMP:
{
Vec3 ldir = (light->pos - *pos).normalized();
float diffuse = max(0.0f, ldir * (*norm0));
Vec3 ldir = (light->pos - pos).normalized();
float diffuse = max(0.0f, ldir * normal);
lightCol += light->color[0] * diffuse;
}
break;
@ -332,7 +317,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
}
}
else { // spec and spot
Vec3 ldir = light->pos - *pos;
Vec3 ldir = light->pos - pos;
float attn;
if (chan.attnfunc == 3) { // spot
@ -347,7 +332,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
}
else if (chan.attnfunc == 1) { // specular
// donko - what is going on here? 655.36 is a guess but seems about right.
attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0;
attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0;
ldir.set(1.0f, attn, attn * attn);
float cosAtt = light->cosatt * ldir;
@ -361,14 +346,14 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
break;
case LIGHTDIF_SIGN:
{
float difAttn = ldir * (*norm0);
float difAttn = ldir * normal;
lightCol += light->color[0] * attn * difAttn;
}
break;
case LIGHTDIF_CLAMP:
{
float difAttn = max(0.0f, ldir * (*norm0));
float difAttn = max(0.0f, ldir * normal);
lightCol += light->color[0] * attn * difAttn;
}
break;
@ -472,14 +457,11 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s
break;
case XF_TEXGEN_EMBOSS_MAP:
{
const Vec3 *pos = (const Vec3*)dst->mvPosition;
const Vec3 *norm1 = (const Vec3*)dst->normal[1];
const Vec3 *norm2 = (const Vec3*)dst->normal[2];
const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*texinfo.embosslightshift];
Vec3 ldir = (light->pos - *pos).normalized();
float d1 = ldir * (*norm1);
float d2 = ldir * (*norm2);
Vec3 ldir = (light->pos - dst->mvPosition).normalized();
float d1 = ldir * dst->normal[1];
float d2 = ldir * dst->normal[2];
dst->texCoords[coordNum][0] = dst->texCoords[texinfo.embosssourceshift][0] + d1;
dst->texCoords[coordNum][1] = dst->texCoords[texinfo.embosssourceshift][1] + d2;
@ -503,6 +485,9 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s
default:
ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype);
}
dst->texCoords[coordNum][0] *= (bpmem.texcoords[coordNum].s.scale_minus_1 + 1);
dst->texCoords[coordNum][1] *= (bpmem.texcoords[coordNum].t.scale_minus_1 + 1);
}
}

View File

@ -24,32 +24,32 @@ namespace VertexFormatConverter
{
void LoadNormal1_Byte(InputVertexData *dst, u8 *src)
{
dst->normal[0][0] = (float)(s8)src[0] / 128;
dst->normal[0][1] = (float)(s8)src[1] / 128;
dst->normal[0][2] = (float)(s8)src[2] / 128;
dst->normal[0].x = (float)(s8)src[0] / 128;
dst->normal[0].y = (float)(s8)src[1] / 128;
dst->normal[0].z = (float)(s8)src[2] / 128;
}
void LoadNormal1_Short(InputVertexData *dst, u8 *src)
{
dst->normal[0][0] = (float)((s16*)src)[0] / 32768;
dst->normal[0][1] = (float)((s16*)src)[1] / 32768;
dst->normal[0][2] = (float)((s16*)src)[2] / 32768;
dst->normal[0].x = (float)((s16*)src)[0] / 32768;
dst->normal[0].y = (float)((s16*)src)[1] / 32768;
dst->normal[0].z = (float)((s16*)src)[2] / 32768;
}
void LoadNormal1_Float(InputVertexData *dst, u8 *src)
{
dst->normal[0][0] = ((float*)src)[0];
dst->normal[0][1] = ((float*)src)[1];
dst->normal[0][2] = ((float*)src)[2];
dst->normal[0].x = ((float*)src)[0];
dst->normal[0].y = ((float*)src)[1];
dst->normal[0].z = ((float*)src)[2];
}
void LoadNormal3_Byte(InputVertexData *dst, u8 *src)
{
for (int i = 0, j = 0; i < 3; i++, j+=3)
{
dst->normal[i][0] = (float)(s8)src[j + 0] / 128;
dst->normal[i][1] = (float)(s8)src[j + 1] / 128;
dst->normal[i][2] = (float)(s8)src[j + 2] / 128;
dst->normal[i].x = (float)(s8)src[j + 0] / 128;
dst->normal[i].y = (float)(s8)src[j + 1] / 128;
dst->normal[i].z = (float)(s8)src[j + 2] / 128;
}
}
@ -57,9 +57,9 @@ namespace VertexFormatConverter
{
for (int i = 0, j = 0; i < 3; i++, j+=3)
{
dst->normal[i][0] = (float)((s16*)src)[j + 0] / 32768;
dst->normal[i][1] = (float)((s16*)src)[j + 1] / 32768;
dst->normal[i][2] = (float)((s16*)src)[j + 2] / 32768;
dst->normal[i].x = (float)((s16*)src)[j + 0] / 32768;
dst->normal[i].y = (float)((s16*)src)[j + 1] / 32768;
dst->normal[i].z = (float)((s16*)src)[j + 2] / 32768;
}
}
@ -67,9 +67,9 @@ namespace VertexFormatConverter
{
for (int i = 0, j = 0; i < 3; i++, j+=3)
{
dst->normal[i][0] = ((float*)src)[j + 0];
dst->normal[i][1] = ((float*)src)[j + 1];
dst->normal[i][2] = ((float*)src)[j + 2];
dst->normal[i].x = ((float*)src)[j + 0];
dst->normal[i].y = ((float*)src)[j + 1];
dst->normal[i].z = ((float*)src)[j + 2];
}
}
}