Implemented secondary path for hardware with no multiples render target support, so please a lot of testing from people with problems in the last release.

corrected a little depth textures still broken but now at least i discover the reason, the ultra bad news for d3d lover is, the only correct way to implement depth textures will be do a firs depth only pass disabling blending. 
This is because blending is affecting the values stored in the depth texture, so to store the true values, blending mus be deactivated.
this will degrade performance but is the only "Correct" way in d3d 9. the other possibility is dx10 but that's a complete different story ;)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4526 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2009-11-10 12:45:03 +00:00
parent 0b30b23719
commit c2a4e33313
8 changed files with 167 additions and 103 deletions

View File

@ -143,10 +143,10 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable)
// output is given by .outreg
// tevtemp is set according to swapmodetables and
static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL);
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, bool HLSL);
static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL);
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, u32 HLSL);
static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, bool HLSL);
static bool WriteAlphaTest(char *&p, u32 HLSL);
static void WriteFog(char *&p);
const float epsilon8bit = 1.0f / 255.0f;
@ -382,7 +382,7 @@ static void BuildSwapModeTable()
}
}
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL)
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
{
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
@ -452,7 +452,7 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
WRITE(p, "void main(\n");
WRITE(p, " out float4 ocol0 : COLOR0,\n");
if (HLSL)
if (HLSL == 1)
WRITE(p, " out float4 ocol1 : COLOR1,\n");
WRITE(p, " out float depth : DEPTH,\n");
@ -530,12 +530,11 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
WRITE(p, "depth = zCoord;\n");
if(HLSL)
if(HLSL == 1)
{
//WRITE(p, "ocol1 = float4(1.0f/255.0f,2.0f/255.0f,3.0f/255.0f,0.0f);\n");
WRITE(p, "float4 EncodedDepth = frac(zCoord * float4(254.0f*255.0f,255.0f,254.0f/255.0f,254.0f*255.0f*255.0f));\n");
//WRITE(p, "EncodedDepth -= EncodedDepth.aarg * float4(1.0f/255.0f,1.0f/255.0f,1.0f/255.0f,0.0f);\n");
WRITE(p, "ocol1 = EncodedDepth;\n");
WRITE(p, "float4 EncodedDepth = frac(zCoord * float4(254.0f/255.0,254.0f,254.0f*255.0f,0.0f));\n");
WRITE(p, "EncodedDepth -= EncodedDepth.raag * float4(0.0f,1.0f/255.0f,1.0f/255.0f,0.0f);\n");
WRITE(p, "ocol1 = float4(EncodedDepth.rgb,1.0f);\n");
}
//if (bpmem.genMode.numindstages ) WRITE(p, "prev.rg = indtex0.xy;\nprev.b = 0;\n");
@ -613,7 +612,7 @@ static const char *TEVCMPAlphaOPTable[16] =
};
static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
@ -811,7 +810,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
WRITE(p, ");\n\n");
}
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, bool HLSL)
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, u32 HLSL)
{
if (texture_mask & (1<<texmap)) {
// non pow 2
@ -870,7 +869,7 @@ static const char *tevAlphaFunclogicTable[] =
" == " // xnor
};
static bool WriteAlphaTest(char *&p, bool HLSL)
static bool WriteAlphaTest(char *&p, u32 HLSL)
{
u32 op = bpmem.alphaFunc.logic;
u32 comp[2] = {bpmem.alphaFunc.comp0,bpmem.alphaFunc.comp1};

View File

@ -100,7 +100,7 @@ public:
}
};
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL = false);
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL = 0);
void GetPixelShaderId(PIXELSHADERUID &, u32 texturemask, u32 dstAlphaEnable);
extern PIXELSHADERUID last_pixel_shader_uid;

View File

@ -123,15 +123,23 @@ void Create()
//Select Zbuffer format supported by hadware.
if (g_ActiveConfig.bEFBAccessEnable)
{
if(D3D::GetCaps().NumSimultaneousRTs > 1)
{
hr = D3D::dev->CreateDepthStencilSurface(target_width, target_height, D3DFMT_D24X8,
D3DMULTISAMPLE_NONE, 0, FALSE, &s_efb_depth_surface, NULL);
CHECK(hr,"CreateDepthStencilSurface");
D3DFORMAT *DepthTexFormats = new D3DFORMAT[3];
DepthTexFormats[0] = D3DFMT_R32F;
DepthTexFormats[1] = D3DFMT_A8R8G8B8;
s_efb_depth_surface_Format = D3DFMT_A8R8G8B8;
for(int i = 0;i<2;i++)
{
s_efb_depth_surface_Format = DepthTexFormats[i];
//get the framebuffer Depth texture
HRESULT hr = D3D::dev->CreateTexture(target_width, target_height, 1, D3DUSAGE_RENDERTARGET, s_efb_depth_surface_Format,
hr = D3D::dev->CreateTexture(target_width, target_height, 1, D3DUSAGE_RENDERTARGET, s_efb_depth_surface_Format,
D3DPOOL_DEFAULT, &s_efb_depth_texture, NULL);
if (!FAILED(hr)) break;
}
CHECK(hr,"Depth Color Texture");
//get the Surface
if(s_efb_depth_texture)
@ -149,8 +157,11 @@ void Create()
//create an offscreen surface that we can lock to retrieve the data
hr = D3D::dev->CreateOffscreenPlainSurface(1, 1, s_efb_depth_surface_Format, D3DPOOL_SYSTEMMEM, &s_efb_depth_OffScreenReadBuffer, NULL );
CHECK(hr,"Create Depth offScreen Surface");
/*//depth format in prefered order
delete [] DepthTexFormats;
}
else
{
//depth format in prefered order
D3DFORMAT *DepthTexFormats = new D3DFORMAT[3];
DepthTexFormats[0] = D3DFMT_D32F_LOCKABLE;
DepthTexFormats[1] = D3DFMT_D16_LOCKABLE;
@ -164,8 +175,11 @@ void Create()
}
s_efb_depth_ReadBuffer = s_efb_depth_surface;
s_efb_depth_OffScreenReadBuffer = s_efb_depth_surface;
//ULTRAAAAAAAAAAA ugly hack when no depth textures are supported
s_efb_depthColor_surface = s_efb_color_surface;
CHECK(hr,"CreateDepthStencilSurface");
delete [] DepthTexFormats;*/
delete [] DepthTexFormats;
}
}
else
{
@ -173,6 +187,8 @@ void Create()
hr = D3D::dev->CreateDepthStencilSurface(target_width, target_height, s_efb_depth_surface_Format,
D3DMULTISAMPLE_NONE, 0, FALSE, &s_efb_depth_surface, NULL);
CHECK(hr,"CreateDepthStencilSurface");
//ULTRAAAAAAAAAAA ugly hack when no depth textures are supported
s_efb_depthColor_surface = s_efb_color_surface;
}
}

View File

@ -36,12 +36,19 @@ static float lastPSconstants[C_COLORMATRIX+16][4];
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram = 0;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram()
{
return s_ColorMatrixProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram()
{
return s_DepthMatrixProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram()
{
return s_ColorCopyProgram;
@ -92,9 +99,22 @@ void PixelShaderCache::Init()
" in float3 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0.xy);\n"
"}\n");
char pdmatrixprog[1024];
sprintf(pdmatrixprog,"uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float3 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"float4 EncodedDepth = frac(texcol.r * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"EncodedDepth -= EncodedDepth.raag * float4(0.0f,1.0f/255.0f,1.0f/255.0f,0.0f);\n"
"texcol = float4(EncodedDepth.rgb,1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram = D3D::CompilePixelShader(pmatrixprog, (int)strlen(pmatrixprog));
s_ColorCopyProgram = D3D::CompilePixelShader(pcopyprog, (int)strlen(pcopyprog));
s_DepthMatrixProgram = D3D::CompilePixelShader(pdmatrixprog, (int)strlen(pdmatrixprog));
Clear();
}
@ -118,6 +138,9 @@ void PixelShaderCache::Shutdown()
if(s_ColorCopyProgram)
s_ColorCopyProgram->Release();
s_ColorCopyProgram=NULL;
if(s_DepthMatrixProgram)
s_DepthMatrixProgram->Release();
s_DepthMatrixProgram = NULL;
Clear();
}
@ -158,7 +181,7 @@ bool PixelShaderCache::SetShader(bool dstAlpha)
return false;
}
const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), dstAlpha, true);
const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), dstAlpha, (D3D::GetCaps().NumSimultaneousRTs > 1)? 1 : 2);
LPDIRECT3DPIXELSHADER9 shader = D3D::CompilePixelShader(code, (int)strlen(code));
// Make an entry in the table

View File

@ -61,6 +61,7 @@ public:
static bool SetShader(bool dstAlpha);
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram();
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram();
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram();
#if defined(_DEBUG) || defined(DEBUGFAST)
static std::string GetCurrentShaderCode();
#endif

View File

@ -172,6 +172,7 @@ bool Renderer::Init()
D3D::dev->Clear(0, NULL, D3DCLEAR_TARGET, 0x0, 0, 0);
D3D::dev->SetRenderTarget(0, FBManager::GetEFBColorRTSurface());
if(D3D::GetCaps().NumSimultaneousRTs > 1)
D3D::dev->SetRenderTarget(1, FBManager::GetEFBDepthEncodedSurface());
D3D::dev->SetDepthStencilSurface(FBManager::GetEFBDepthRTSurface());
vp.Width = s_target_width;
@ -274,6 +275,7 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc)
{
// Set the backbuffer as the rendering target
D3D::dev->SetDepthStencilSurface(NULL);
if(D3D::GetCaps().NumSimultaneousRTs > 1)
D3D::dev->SetRenderTarget(1, NULL);
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
@ -331,6 +333,7 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc)
OSD::DrawMessages();
D3D::dev->SetRenderTarget(0, FBManager::GetEFBColorRTSurface());
if(D3D::GetCaps().NumSimultaneousRTs > 1)
D3D::dev->SetRenderTarget(1, FBManager::GetEFBDepthEncodedSurface());
D3D::dev->SetDepthStencilSurface(FBManager::GetEFBDepthRTSurface());
@ -479,6 +482,11 @@ bool Renderer::SetScissorRect()
else
{
WARN_LOG(VIDEO, "Bad scissor rectangle: %i %i %i %i", rc.left, rc.top, rc.right, rc.bottom);
rc.left = 0;
rc.top = 0;
rc.right = GetTargetWidth();
rc.bottom = GetTargetHeight();
D3D::dev->SetScissorRect(&rc);
return false;
}
return true;
@ -510,7 +518,8 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
FBManager::GetEFBDepthRTSurfaceFormat() : FBManager::GetEFBColorRTSurfaceFormat();
D3DLOCKED_RECT drect;
if(!g_ActiveConfig.bEFBAccessEnable || BufferFormat == D3DFMT_D24X8)
return 0;
//Buffer not found alert
if(!pBuffer) {
@ -535,7 +544,8 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
RectToLock.left = targetPixelRc.left;
RectToLock.right = targetPixelRc.right;
RectToLock.top = targetPixelRc.top;
if(BufferFormat != D3DFMT_D32F_LOCKABLE && BufferFormat != D3DFMT_D16_LOCKABLE)
{
hr = D3D::dev->StretchRect(pBuffer,&RectToLock,RBuffer,NULL, D3DTEXF_NONE);
if(FAILED(hr))
{
@ -554,6 +564,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
RectToLock.left = 0;
RectToLock.right = 1;
RectToLock.top = 0;
}
//the surface is good.. lock it
if((hr = pOffScreenBuffer->LockRect(&drect, &RectToLock, D3DLOCK_READONLY)) != D3D_OK)
@ -566,15 +577,32 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
switch(type) {
case PEEK_Z:
{
static float ffrac = 255.0f/254.0f;
switch (BufferFormat)
{
case D3DFMT_D32F_LOCKABLE:
val = ((float *)drect.pBits)[0];
break;
case D3DFMT_D16_LOCKABLE:
val = ((float)((u16 *)drect.pBits)[0])/((float)0xFFFF);
break;
case D3DFMT_R32F:
val = ((float *)drect.pBits)[0] * (255.0f/254.0f);
break;
default:
float ffrac = 1.0f/254.0f;
z = ((u32 *)drect.pBits)[0];
float fvalue = (((float)(z & 0xFF)) / 255.0f) * ffrac;
fvalue += (((float)((z>>8) & 0xFF)) / 255.0f) * (ffrac/255.0f);
fvalue += (((float)((z>>16) & 0xFF)) / 255.0f) * (ffrac/(255.0f*255.0f));
fvalue += (((float)((z>>24) & 0xFF)) / 255.0f) * (ffrac/(255.0f*255.0f*255.0f));
if(fvalue>1.0f)fvalue=1.0f;
if(fvalue<0.0f)fvalue=0.0f;
z = ((u32)(fvalue * 0xffffff));
val = ((float)((z>>16) & 0xFF)) * ffrac;
ffrac*= 1 / 255.0f;
val += ((float)((z>>8) & 0xFF)) * ffrac;
ffrac*= 1 / 255.0f;
val += ((float)(z & 0xFF)) * ffrac;
//ffrac*= 1 / 255.0f;
//val += ((float)((z>>24) & 0xFF)) * ffrac;
break;
};
if(val>1.0f)val=1.0f;
if(val<0.0f)val=0.0f;
z = ((u32)(val * 0xffffff));
}
break;
case POKE_Z:

View File

@ -338,32 +338,29 @@ have_texture:
{
case 0: // Z4
case 1: // Z8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1;
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 255.0f/254.0f;
break;
case 3: // Z16 //?
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1;
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 255.0f/254.0f;
case 11: // Z16 (reverse order)
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1;
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 255.0f/254.0f;
break;
case 6: // Z24X8
colmat[0] = 1;
colmat[5] = 1;
colmat[10] = 1;
colmat[15] = 1;
colmat[2] = colmat[5] = colmat[8] = 255.0f/254.0f;
break;
case 9: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1;
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 255.0f/254.0f;
break;
case 10: // Z8L
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 255.0f/254.0f;
break;
case 12: // Z16L
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1;
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 255.0f/254.0f;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
colmat[2] = colmat[5] = colmat[8] = 255.0f/254.0f;
break;
}
}
@ -478,7 +475,7 @@ have_texture:
sourcerect.right = source_rect.right;
sourcerect.top = source_rect.top;
D3D::drawShadedTexQuad(read_texture,&sourcerect, EFB_WIDTH , EFB_HEIGHT,&destrect,PixelShaderCache::GetColorMatrixProgram(),NULL);
D3D::drawShadedTexQuad(read_texture,&sourcerect, EFB_WIDTH , EFB_HEIGHT,&destrect,(FBManager::GetEFBDepthRTSurfaceFormat() == D3DFMT_R32F && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(): PixelShaderCache::GetColorMatrixProgram(),NULL);
D3D::dev->SetRenderTarget(0, FBManager::GetEFBColorRTSurface());
D3D::dev->SetRenderTarget(1, FBManager::GetEFBDepthEncodedSurface());

View File

@ -265,10 +265,10 @@ GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc) cons
TargetRectangle FramebufferManager::ConvertEFBRectangle(const EFBRectangle& rc) const
{
TargetRectangle result;
result.left = rc.left * Renderer::GetTargetWidth() / EFB_WIDTH;
result.top = Renderer::GetTargetHeight() - (rc.top * Renderer::GetTargetHeight() / EFB_HEIGHT);
result.right = rc.right * Renderer::GetTargetWidth() / EFB_WIDTH;
result.bottom = Renderer::GetTargetHeight() - (rc.bottom * Renderer::GetTargetHeight() / EFB_HEIGHT);
result.left = rc.left * Renderer::GetTargetWidth() / EFB_WIDTH + 1 ;
result.top = Renderer::GetTargetHeight() - (rc.top * Renderer::GetTargetHeight() / EFB_HEIGHT) - 1;
result.right = rc.right * Renderer::GetTargetWidth() / EFB_WIDTH + 1;
result.bottom = Renderer::GetTargetHeight() - (rc.bottom * Renderer::GetTargetHeight() / EFB_HEIGHT) - 1;
return result;
}