diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 5bd2ff6f4b..faf0c97eff 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -871,12 +871,12 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con static const char *tevAlphaFuncsTable[] = { "(false)", //ALPHACMP_NEVER 0 - "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 - "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 - "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 - "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4 - "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5 - "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6 + "(prev.a <= %s - (0.5f/255.0f))", //ALPHACMP_LESS 1 + "(abs( prev.a - %s ) < (1.0f/255.0f))", //ALPHACMP_EQUAL 2 + "(prev.a < %s + (0.5f/255.0f))", //ALPHACMP_LEQUAL 3 + "(prev.a >= %s + (0.5f/255.0f))", //ALPHACMP_GREATER 4 + "(abs( prev.a - %s ) >= (1.0f/255.0f))", //ALPHACMP_NEQUAL 5 + "(prev.a > %s - (0.5f/255.0f))", //ALPHACMP_GEQUAL 6 "(true)" //ALPHACMP_ALWAYS 7 }; diff --git a/Source/Core/VideoCommon/Src/Render.h b/Source/Core/VideoCommon/Src/Render.h index 6c37f1ac38..8ca5213ae7 100644 --- a/Source/Core/VideoCommon/Src/Render.h +++ b/Source/Core/VideoCommon/Src/Render.h @@ -74,6 +74,8 @@ public: static int GetCustomHeight(); static int GetTargetWidth(); static int GetTargetHeight(); + static int GetFullTargetWidth(); + static int GetFullTargetHeight(); // Multiply any 2D EFB coordinates by these when rendering. static float GetTargetScaleX(); diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index b2b8e0ee23..fe5fd1127e 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -425,9 +425,9 @@ PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt) case GX_TF_C4: return GetPCFormatFromTLUTFormat(tlutfmt); case GX_TF_I4: - return PC_TEX_FMT_I4_AS_I8; + return PC_TEX_FMT_IA8; case GX_TF_I8: // speed critical - return PC_TEX_FMT_I8; + return PC_TEX_FMT_IA8; case GX_TF_C8: return GetPCFormatFromTLUTFormat(tlutfmt); case GX_TF_IA4: @@ -456,6 +456,7 @@ PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt) PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int height,int Pitch, int texformat, int tlutaddr, int tlutfmt) { + int TexLen = height * Pitch; switch (texformat) { case GX_TF_C4: @@ -465,37 +466,81 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int y = 0; y < height; y += 8) for (int x = 0; x < width; x += 8) for (int iy = 0; iy < 8; iy++, src += 4) - decodebytesC4_5A3_To_BGRA32(((u32*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + { + //decodebytesC4_5A3_To_BGRA32(((u32*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int ix = 0; ix < 4; ix++) + { + int stride = (y + iy) * Pitch + (x + ix * 2) * 4; + if(stride < TexLen) + { + u8 val = src[ix]; + ((u32*)(dst+stride))[0] = decode5A3(Common::swap16(tlut[val >> 4])); + ((u32*)(dst+stride))[1] = decode5A3(Common::swap16(tlut[val & 0xF])); + } + else + break; + } + } } else { for (int y = 0; y < height; y += 8) for (int x = 0; x < width; x += 8) for (int iy = 0; iy < 8; iy++, src += 4) - decodebytesC4_To_Raw16(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + { + //decodebytesC4_To_Raw16(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + u16* tlut = (u16*)(texMem+tlutaddr); + for (int ix = 0; ix < 4; ix++) + { + int stride = (y + iy) * Pitch + (x + ix * 2) * 2; + if(stride < TexLen) + { + u8 val = src[ix]; + ((u16*)(dst+stride))[0] = Common::swap16(tlut[val >> 4]); + ((u16*)(dst+stride))[1] = Common::swap16(tlut[val & 0xF]); + } + else + break; + + } + } } return GetPCFormatFromTLUTFormat(tlutfmt); case GX_TF_I4: { for (int y = 0; y < height; y += 8) for (int x = 0; x < width; x += 8) - for (int iy = 0; iy < 8; iy++, src += 4) + for (int iy = 0; iy < 8 && (y + iy) < height ; iy++, src += 4) for (int ix = 0; ix < 4; ix++) { int val = src[ix]; - dst[(y + iy) * Pitch + x + ix * 2] = Convert4To8(val >> 4); - dst[(y + iy) * Pitch + x + ix * 2 + 1] = Convert4To8(val & 0xF); + int stride = (y + iy) * Pitch + (x + ix * 2) * 2; + dst[stride] = Convert4To8(val >> 4); + dst[stride + 1] = dst[stride]; + dst[stride + 2] = Convert4To8(val & 0xF); + dst[stride + 3] = dst[stride + 2]; } } - return PC_TEX_FMT_I4_AS_I8; + return PC_TEX_FMT_IA8; case GX_TF_I8: // speed critical { for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 8) - for (int iy = 0; iy < 4; iy++, src += 8) - memcpy((u8*)dst + (y + iy)*Pitch+x, src, 8); + for (int iy = 0; iy < 4 && (y + iy) < height; iy++) + for (int ix = 0; ix < 8; ix++,src++) + { + int stride = (y + iy)*Pitch+(x + ix) * 2; + if(stride < TexLen) + { + dst[stride] = src[0]; + dst[stride + 1] = src[0]; + } + else + break; + } } - return PC_TEX_FMT_I8; + return PC_TEX_FMT_IA8; case GX_TF_C8: if (tlutfmt == 2) { @@ -503,14 +548,42 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 8) for (int iy = 0; iy < 4; iy++, src += 8) - decodebytesC8_5A3_To_BGRA32(((u32*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + { + //decodebytesC8_5A3_To_BGRA32(((u32*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int ix = 0; ix < 8; ix++) + { + u8 val = src[ix]; + int stride = (y + iy) * Pitch + (x + ix) * 4; + if(stride < TexLen) + { + ((u32*)(dst+stride))[0] = decode5A3(Common::swap16(tlut[val])); + } + else + break; + } + } } else { for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 8) for (int iy = 0; iy < 4; iy++, src += 8) - decodebytesC8_To_Raw16(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + { + //decodebytesC8_To_Raw16(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, src, tlutaddr); + u16* tlut = (u16*)(texMem + tlutaddr); + for (int ix = 0; ix < 8; ix++) + { + u8 val = src[ix]; + int stride = (y + iy) * Pitch + (x + ix) * 2; + if(stride < TexLen) + { + ((u16*)(dst+stride))[0] = Common::swap16(tlut[val]); + } + else + break; + } + } } return GetPCFormatFromTLUTFormat(tlutfmt); case GX_TF_IA4: @@ -518,7 +591,22 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 8) for (int iy = 0; iy < 4; iy++, src += 8) - decodebytesIA4(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, src); + { + //decodebytesIA4(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, src); + for (int ix = 0; ix < 8; ix++) + { + int stride = (y + iy) * Pitch + (x + ix) * 2; + if(stride < TexLen) + { + const u8 val = src[ix]; + u8 a = Convert4To8(val >> 4); + u8 l = Convert4To8(val & 0xF); + ((u16*)(dst+stride))[0] = (a << 8) | l; + } + else + break; + } + } } return PC_TEX_FMT_IA4_AS_IA8; case GX_TF_IA8: @@ -527,10 +615,17 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int x = 0; x < width; x += 4) for (int iy = 0; iy < 4; iy++, src += 8) { - u16 *ptr = ((u16 *)((u8*)dst + (y + iy) * Pitch)) + x; u16 *s = (u16 *)src; for(int j = 0; j < 4; j++) - *ptr++ = Common::swap16(*s++); + { + int stride = (y + iy) * Pitch + (x + j) * 2; + if(stride < TexLen) + { + ((u16*)(dst+stride))[0] = Common::swap16(*s++); + } + else + break; + } } } @@ -542,26 +637,62 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 4) for (int iy = 0; iy < 4; iy++, src += 8) - decodebytesC14X2_5A3_To_BGRA32(((u32*)((u8*)dst + (y + iy) * Pitch)) + x, (u16*)src, tlutaddr); + { + //decodebytesC14X2_5A3_To_BGRA32(((u32*)((u8*)dst + (y + iy) * Pitch)) + x, (u16*)src, tlutaddr); + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int ix = 0; ix < 4; ix++) + { + int stride = (y + iy) * Pitch +( x + ix) * 4; + if(stride < TexLen) + { + u16 val = Common::swap16(src[ix]); + ((u32*)(dst+stride))[0] = decode5A3(Common::swap16(tlut[(val & 0x3FFF)])); + } + else + break; + } + } } else { for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 4) for (int iy = 0; iy < 4; iy++, src += 8) - decodebytesC14X2_To_Raw16(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, (u16*)src, tlutaddr); + { + //decodebytesC14X2_To_Raw16(((u16*)((u8*)dst + (y + iy) * Pitch)) + x, (u16*)src, tlutaddr); + u16* tlut = (u16*)(texMem + tlutaddr); + for (int ix = 0; ix < 4; ix++) + { + int stride = (y + iy) * Pitch + (x + ix) * 2; + if(stride < TexLen) + { + u16 val = Common::swap16(src[ix]); + ((u16*)(dst+stride))[0] = Common::swap16(tlut[(val & 0x3FFF)]); + } + else + break; + } + } } return GetPCFormatFromTLUTFormat(tlutfmt); case GX_TF_RGB565: { for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 4) - for (int iy = 0; iy < 4; iy++, src += 8) + for (int iy = 0; iy < 4 && (y + iy) < height; iy++, src += 8) { - u16 *ptr = ((u16 *)(dst + (y + iy) * Pitch)) + x; + //u16 *ptr = ((u16 *)(dst + (y + iy) * Pitch)) + x; u16 *s = (u16 *)src; for(int j = 0; j < 4; j++) - *ptr++ = Common::swap16(*s++); + { + int stride = (y + iy) * Pitch + (x + j) * 2; + if(stride < height * Pitch) + { + ((u16*)(dst+stride))[0] = Common::swap16(*s++); + } + else + break; + } } } return PC_TEX_FMT_RGB565; @@ -570,8 +701,14 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 4) for (int iy = 0; iy < 4; iy++, src += 8) - //decodebytesRGB5A3((u32*)dst+(y+iy)*width+x, (u16*)src, 4); - decodebytesRGB5A3(((u32*)((u8*)dst+(y+iy)*Pitch))+x, (u16*)src); + for (int ix = 0; ix < 4; ix++) + { + int stride = (y+iy)*Pitch + (x + ix) * 4; + if(stride < height * Pitch) + ((u32*)(dst+stride))[0] = decode5A3(Common::swap16(((u16 *)src)[ix])); + } + + } return PC_TEX_FMT_BGRA32; case GX_TF_RGBA8: // speed critical @@ -579,8 +716,22 @@ PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int for (int y = 0; y < height; y += 4) for (int x = 0; x < width; x += 4) { - for (int iy = 0; iy < 4; iy++) - decodebytesARGB8_4(((u32*)((u8*)dst + (y+iy)*Pitch)) + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16); + for (int iy = 0; iy < 4 && (y + iy) < height; iy++) + { + //decodebytesARGB8_4(((u32*)((u8*)dst + (y+iy)*Pitch)) + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16); + u16 *src1 = (u16*)src + 4 * iy; + u16 *src2 = (u16*)src + 4 * iy + 16; + for (int ix = 0; ix < 4; ix++) + { + int stride = (y+iy)*Pitch + (x + ix) * 4; + if(stride < height * Pitch) + { + ((u32*)(dst+ stride))[0] = Common::swap32((src2[ix] << 16) | src1[ix]); + } + else + break; + } + } src += 64; } } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.cpp b/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.cpp index 401b1814a8..7dbd3a56b2 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.cpp @@ -55,10 +55,20 @@ const int MaxSamplerSize = 13; const int MaxSamplerTypes = 15; static bool m_RenderStatesSet[MaxRenderStates]; static DWORD m_RenderStates[MaxRenderStates]; +static bool m_RenderStatesChanged[MaxRenderStates]; + static DWORD m_TextureStageStates[MaxTextureStages][MaxTextureTypes]; +static bool m_TextureStageStatesSet[MaxTextureStages][MaxTextureTypes]; +static bool m_TextureStageStatesChanged[MaxTextureStages][MaxTextureTypes]; + static DWORD m_SamplerStates[MaxSamplerSize][MaxSamplerTypes]; +static bool m_SamplerStatesSet[MaxSamplerSize][MaxSamplerTypes]; +static bool m_SamplerStatesChanged[MaxSamplerSize][MaxSamplerTypes]; + LPDIRECT3DBASETEXTURE9 m_Textures[16]; LPDIRECT3DVERTEXDECLARATION9 m_VtxDecl; +LPDIRECT3DPIXELSHADER9 m_PixelShader; +LPDIRECT3DVERTEXSHADER9 m_VertexShader; void Enumerate(); @@ -66,6 +76,12 @@ int GetNumAdapters() { return numAdapters; } const Adapter &GetAdapter(int i) { return adapters[i]; } const Adapter &GetCurAdapter() { return adapters[cur_adapter]; } +bool IsATIDevice() +{ + return GetCurAdapter().ident.VendorId == VENDOR_ATI; +} + + HRESULT Init() { // Create the D3D object, which is needed to create the D3DDevice. @@ -148,7 +164,7 @@ void Enumerate() a.resolutions.clear(); D3D::D3D->GetAdapterIdentifier(i, 0, &a.ident); bool isNvidia = a.ident.VendorId == VENDOR_NVIDIA; - + // Add multisample modes a.aa_levels.push_back(AALevel("None", D3DMULTISAMPLE_NONE, 0)); @@ -264,7 +280,7 @@ HRESULT Create(int adapter, HWND wnd, bool _fullscreen, int _resolution, int aa_ adapter, D3DDEVTYPE_HAL, wnd, - D3DCREATE_HARDWARE_VERTEXPROCESSING, // | D3DCREATE_PUREDEVICE, doesn't seem to make a difference + D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_PUREDEVICE, //doesn't seem to make a difference &d3dpp, &dev))) { if (FAILED(D3D->CreateDevice( @@ -285,10 +301,18 @@ HRESULT Create(int adapter, HWND wnd, bool _fullscreen, int _resolution, int aa_ dev->GetRenderTarget(0, &back_buffer); if (dev->GetDepthStencilSurface(&back_buffer_z) == D3DERR_NOTFOUND) back_buffer_z = NULL; - dev->SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE ); - - dev->SetRenderState(D3DRS_FILLMODE, g_Config.bWireFrame ? D3DFILL_WIREFRAME : D3DFILL_SOLID); - + D3D::SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE ); + D3D::SetRenderState(D3DRS_FILLMODE, g_Config.bWireFrame ? D3DFILL_WIREFRAME : D3DFILL_SOLID); + memset(m_Textures, 0, sizeof(m_Textures)); + memset(m_TextureStageStatesSet, 0, sizeof(m_TextureStageStatesSet)); + memset(m_RenderStatesSet, 0, sizeof(m_RenderStatesSet)); + memset(m_SamplerStatesSet, 0, sizeof(m_SamplerStatesSet)); + memset(m_TextureStageStatesChanged, 0, sizeof(m_TextureStageStatesChanged)); + memset(m_RenderStatesChanged, 0, sizeof(m_RenderStatesChanged)); + memset(m_SamplerStatesChanged, 0, sizeof(m_SamplerStatesChanged)); + m_VtxDecl = NULL; + m_PixelShader = NULL; + m_VertexShader = NULL; // Device state would normally be set here return S_OK; } @@ -435,7 +459,8 @@ void ApplyCachedState() { for (int type = 0; type < MaxSamplerTypes; type++) { - D3D::dev->SetSamplerState(sampler, (D3DSAMPLERSTATETYPE)type, m_SamplerStates[sampler][type]); + if(m_SamplerStatesSet[sampler][type]) + D3D::dev->SetSamplerState(sampler, (D3DSAMPLERSTATETYPE)type, m_SamplerStates[sampler][type]); } } @@ -448,8 +473,11 @@ void ApplyCachedState() // We don't bother restoring these so let's just wipe the state copy // so no stale state is around. memset(m_Textures, 0, sizeof(m_Textures)); - memset(m_TextureStageStates, 0xFF, sizeof(m_TextureStageStates)); + memset(m_TextureStageStatesSet, 0, sizeof(m_TextureStageStatesSet)); + memset(m_TextureStageStatesChanged, 0, sizeof(m_TextureStageStatesChanged)); m_VtxDecl = NULL; + m_PixelShader = NULL; + m_VertexShader = NULL; } void SetTexture(DWORD Stage, LPDIRECT3DBASETEXTURE9 pTexture) @@ -463,40 +491,101 @@ void SetTexture(DWORD Stage, LPDIRECT3DBASETEXTURE9 pTexture) void RefreshRenderState(D3DRENDERSTATETYPE State) { - D3D::dev->SetRenderState(State, m_RenderStates[State]); + if(m_RenderStatesSet[State] && m_RenderStatesChanged[State]) + { + D3D::dev->SetRenderState(State, m_RenderStates[State]); + m_RenderStatesChanged[State] = false; + } } void SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { - if (m_RenderStates[State] != Value) + if (m_RenderStates[State] != Value || !m_RenderStatesSet[State]) { m_RenderStates[State] = Value; m_RenderStatesSet[State] = true; + m_RenderStatesChanged[State] = false; D3D::dev->SetRenderState(State, Value); } } +void ChangeRenderState(D3DRENDERSTATETYPE State, DWORD Value) +{ + if (m_RenderStates[State] != Value || !m_RenderStatesSet[State]) + { + m_RenderStatesChanged[State] = m_RenderStatesSet[State]; + D3D::dev->SetRenderState(State, Value); + } + else + { + m_RenderStatesChanged[State] = false; + } +} + void SetTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value) { - if (m_TextureStageStates[Stage][Type] != Value) + if (m_TextureStageStates[Stage][Type] != Value || !m_TextureStageStatesSet[Stage][Type]) { m_TextureStageStates[Stage][Type] = Value; + m_TextureStageStatesSet[Stage][Type]=true; + m_TextureStageStatesChanged[Stage][Type]=false; D3D::dev->SetTextureStageState(Stage, Type, Value); } } +void RefreshTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type) +{ + if(m_TextureStageStatesSet[Stage][Type] && m_TextureStageStatesChanged[Stage][Type]) + { + D3D::dev->SetTextureStageState(Stage, Type, m_TextureStageStates[Stage][Type]); + m_TextureStageStatesChanged[Stage][Type] = false; + } +} + +void ChangeTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value) +{ + if (m_TextureStageStates[Stage][Type] != Value || !m_TextureStageStatesSet[Stage][Type]) + { + m_TextureStageStatesChanged[Stage][Type] = m_TextureStageStatesSet[Stage][Type]; + D3D::dev->SetTextureStageState(Stage, Type, Value); + } + else + { + m_TextureStageStatesChanged[Stage][Type] = false; + } +} + +void SetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) +{ + if (m_SamplerStates[Sampler][Type] != Value || !m_SamplerStatesSet[Sampler][Type]) + { + m_SamplerStates[Sampler][Type] = Value; + m_SamplerStatesSet[Sampler][Type] = true; + m_SamplerStatesChanged[Sampler][Type] = false; + D3D::dev->SetSamplerState(Sampler, Type, Value); + } +} + void RefreshSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type) { - D3D::dev->SetSamplerState(Sampler, Type, m_SamplerStates[Sampler][Type]); + if(m_SamplerStatesSet[Sampler][Type] && m_SamplerStatesChanged[Sampler][Type]) + { + D3D::dev->SetSamplerState(Sampler, Type, m_SamplerStates[Sampler][Type]); + m_SamplerStatesChanged[Sampler][Type] = false; + } } -void SetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) +void ChangeSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { - if (m_SamplerStates[Sampler][Type] != Value) + if (m_SamplerStates[Sampler][Type] != Value || !m_SamplerStatesSet[Sampler][Type]) { - m_SamplerStates[Sampler][Type] = Value; + m_SamplerStatesChanged[Sampler][Type] = m_SamplerStatesSet[Sampler][Type]; D3D::dev->SetSamplerState(Sampler, Type, Value); } + else + { + m_SamplerStatesChanged[Sampler][Type] = false; + } } void RefreshVertexDeclaration() @@ -520,4 +609,47 @@ void SetVertexDeclaration(LPDIRECT3DVERTEXDECLARATION9 decl) } } +void RefreshVertexShader() +{ + if (m_VertexShader) + { + D3D::dev->SetVertexShader(m_VertexShader); + } +} + +void SetVertexShader(LPDIRECT3DVERTEXSHADER9 shader) +{ + if (!shader) { + m_VertexShader = NULL; + return; + } + if (shader != m_VertexShader) + { + D3D::dev->SetVertexShader(shader); + m_VertexShader = shader; + } +} + +void RefreshPixelShader() +{ + if (m_PixelShader) + { + D3D::dev->SetPixelShader(m_PixelShader); + } +} + +void SetPixelShader(LPDIRECT3DPIXELSHADER9 shader) +{ + if (!shader) { + m_PixelShader = NULL; + return; + } + if (shader != m_PixelShader) + { + D3D::dev->SetPixelShader(shader); + m_PixelShader = shader; + } +} + + } // namespace diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.h b/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.h index 97d4e449d2..e9156e9377 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DBase.h @@ -44,6 +44,7 @@ namespace D3D // Null render target to do Z-only shadow maps: (probably not useful for Dolphin) #define FOURCC_NULL ((D3DFORMAT)(MAKEFOURCC('N','U','L','L'))) +bool IsATIDevice(); HRESULT Init(); HRESULT Create(int adapter, HWND wnd, bool fullscreen, int resolution, int aa_mode, bool auto_depth); void Close(); @@ -76,11 +77,25 @@ void ShowD3DError(HRESULT err); void SetTexture(DWORD Stage, IDirect3DBaseTexture9 *pTexture); void SetRenderState(D3DRENDERSTATETYPE State, DWORD Value); void RefreshRenderState(D3DRENDERSTATETYPE State); +void ChangeRenderState(D3DRENDERSTATETYPE State, DWORD Value); + void SetTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); -void RefreshSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type); +void RefreshTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type); +void ChangeTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); + void SetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value); +void RefreshSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type); +void ChangeSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value); + void RefreshVertexDeclaration(); void SetVertexDeclaration(LPDIRECT3DVERTEXDECLARATION9 decl); + +void RefreshVertexShader(); +void SetVertexShader(LPDIRECT3DVERTEXSHADER9 shader); + +void RefreshPixelShader(); +void SetPixelShader(LPDIRECT3DPIXELSHADER9 shader); + void ApplyCachedState(); // Utility functions for vendor specific hacks. So far, just the one. diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DUtil.cpp b/Source/Plugins/Plugin_VideoDX9/Src/D3DUtil.cpp index 580a4411e1..69b224e16a 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DUtil.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DUtil.cpp @@ -187,50 +187,24 @@ const int TS[6][2] = {D3DTSS_ALPHAARG2, D3DTA_DIFFUSE }, }; -static DWORD RS_old[6]; -static DWORD TS_old[6]; -static LPDIRECT3DBASETEXTURE9 texture_old = NULL; static LPDIRECT3DPIXELSHADER9 ps_old = NULL; static LPDIRECT3DVERTEXSHADER9 vs_old = NULL; -void SaveRenderStates() +void RestoreShaders() { - // TODO: Get rid of these Gets so we can potentially switch to Pure Device - for (int i = 0; i < 6; i++) - { - dev->GetRenderState((_D3DRENDERSTATETYPE)RS[i][0], &(RS_old[i])); - dev->GetTextureStageState(0, (_D3DTEXTURESTAGESTATETYPE)int(TS[i][0]), &(TS_old[i])); - } - dev->GetTexture(0, &texture_old); - dev->GetPixelShader(&ps_old); - dev->GetVertexShader(&vs_old); + D3D::SetTexture(0, 0); + D3D::RefreshVertexDeclaration(); + D3D::RefreshPixelShader(); + D3D::RefreshVertexShader(); } void RestoreRenderStates() { - if(texture_old) - { - D3D::SetTexture(0, texture_old); - texture_old->Release(); - texture_old = NULL; - } - if(ps_old) - { - dev->SetPixelShader(ps_old); - ps_old->Release(); - ps_old = NULL; - } - if(vs_old) - { - dev->SetVertexShader(vs_old); - vs_old->Release(); - vs_old = NULL; - } - + RestoreShaders(); for (int i = 0; i < 6; i++) { - D3D::SetRenderState((_D3DRENDERSTATETYPE)RS[i][0], RS_old[i]); - D3D::SetTextureStageState(0, (_D3DTEXTURESTAGESTATETYPE)int(TS[i][0]), TS_old[i]); + D3D::RefreshRenderState((_D3DRENDERSTATETYPE)RS[i][0]); + D3D::RefreshTextureStageState(0, (_D3DTEXTURESTAGESTATETYPE)int(TS[i][0])); } } @@ -241,13 +215,12 @@ void CD3DFont::SetRenderStates() dev->SetPixelShader(0); dev->SetVertexShader(0); - D3D::SetVertexDeclaration(NULL); // throw away cached vtx decl dev->SetFVF(D3DFVF_FONT2DVERTEX); for (int i = 0; i < 6; i++) { - D3D::SetRenderState((_D3DRENDERSTATETYPE)RS[i][0], RS[i][1]); - D3D::SetTextureStageState(0, (_D3DTEXTURESTAGESTATETYPE)int(TS[i][0]), TS[i][1]); + D3D::ChangeRenderState((_D3DRENDERSTATETYPE)RS[i][0], RS[i][1]); + D3D::ChangeTextureStageState(0, (_D3DTEXTURESTAGESTATETYPE)int(TS[i][0]), TS[i][1]); } } @@ -257,7 +230,6 @@ int CD3DFont::DrawTextScaled(float x, float y, float fXScale, float fYScale, flo if (!m_pVB) return 0; - SaveRenderStates(); SetRenderStates(); dev->SetStreamSource(0, m_pVB, 0, sizeof(FONT2DVERTEX)); @@ -369,8 +341,7 @@ int CD3DFont::DrawTextScaled(float x, float y, float fXScale, float fYScale, flo } void quad2d(float x1, float y1, float x2, float y2, u32 color, float u1, float v1, float u2, float v2) -{ - SaveRenderStates(); +{ struct Q2DVertex { float x,y,z,rhw; u32 color; float u, v; } coords[4] = { {x1-0.5f, y1-0.5f, 0, 1, color, u1, v1}, {x2-0.5f, y1-0.5f, 0, 1, color, u2, v1}, @@ -379,13 +350,9 @@ void quad2d(float x1, float y1, float x2, float y2, u32 color, float u1, float v }; dev->SetPixelShader(0); dev->SetVertexShader(0); - dev->SetVertexDeclaration(0); - - D3D::SetVertexDeclaration(NULL); // throw away cached vtx decl dev->SetFVF(D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_TEX1); dev->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coords, sizeof(Q2DVertex)); - - RestoreRenderStates(); + RestoreShaders(); } void drawShadedTexQuad(IDirect3DTexture9 *texture, @@ -396,8 +363,6 @@ void drawShadedTexQuad(IDirect3DTexture9 *texture, IDirect3DPixelShader9 *PShader, IDirect3DVertexShader9 *Vshader) { - SaveRenderStates(); - D3D::SetTexture(0, 0); float u1=((float)rSource->left)/(float) SourceWidth; float u2=((float)rSource->right)/(float) SourceWidth; float v1=((float)rSource->top)/(float) SourceHeight; @@ -409,30 +374,18 @@ void drawShadedTexQuad(IDirect3DTexture9 *texture, {(float)rDest->right- 0.5f, (float)rDest->bottom- 0.5f, 0.0f,1.0f, u2, v2}, {(float)rDest->left- 0.5f, (float)rDest->bottom- 0.5f, 0.0f,1.0f, u1, v2} }; - HRESULT hr = 0; - hr = dev->SetFVF(D3DFVF_XYZRHW | D3DFVF_TEX1); - hr = dev->SetVertexShader(Vshader); - hr = dev->SetPixelShader(PShader); - hr = dev->SetTexture(0, texture); - if(FAILED(hr)) - { - PanicAlert("unable to set pixel shader"); - } - hr = dev->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coords, sizeof(Q2DVertex)); - if(FAILED(hr)) - { - PanicAlert("unable to draw"); - } - D3D::RefreshVertexDeclaration(); - RestoreRenderStates(); - + dev->SetFVF(D3DFVF_XYZRHW | D3DFVF_TEX1); + dev->SetVertexShader(Vshader); + dev->SetPixelShader(PShader); + D3D::SetTexture(0, texture); + dev->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coords, sizeof(Q2DVertex)); + RestoreShaders(); } void drawClearQuad(const RECT *rDest, u32 Color,float z, IDirect3DPixelShader9 *PShader, IDirect3DVertexShader9 *Vshader) { - SaveRenderStates(); struct Q2DVertex { float x,y,z,rhw;u32 Color; } coords[4] = { {(float)rDest->left-0.5f, (float)rDest->top-0.5f, z, 1.0f, Color}, {(float)rDest->right-0.5f, (float)rDest->top-0.5f, z,1.0f, Color}, @@ -442,9 +395,9 @@ void drawClearQuad(const RECT *rDest, u32 Color,float z, dev->SetFVF(D3DFVF_XYZRHW | D3DFVF_DIFFUSE); dev->SetVertexShader(Vshader); dev->SetPixelShader(PShader); + D3D::SetTexture(0, 0); dev->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coords, sizeof(Q2DVertex)); - D3D::RefreshVertexDeclaration(); - RestoreRenderStates(); + RestoreShaders(); } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp b/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp index c79c5dbad1..8f2597e52c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp @@ -212,7 +212,7 @@ struct TabAdvanced : public W32Util::Tab g_Config.Save(FULL_CONFIG_DIR "gfx_dx9.ini"); if( D3D::dev != NULL ) { - D3D::dev->SetRenderState( D3DRS_FILLMODE, g_Config.bWireFrame ? D3DFILL_WIREFRAME : D3DFILL_SOLID ); + D3D::SetRenderState( D3DRS_FILLMODE, g_Config.bWireFrame ? D3DFILL_WIREFRAME : D3DFILL_SOLID ); } } }; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp index 81cc1f1483..7691be44b5 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp @@ -94,8 +94,8 @@ LPDIRECT3DTEXTURE9 GetEFBDepthTexture(const EFBRectangle &sourceRc) void Create() { // Simplest possible setup to start with. - int target_width = Renderer::GetTargetWidth(); - int target_height = Renderer::GetTargetHeight(); + int target_width = Renderer::GetFullTargetWidth(); + int target_height = Renderer::GetFullTargetHeight(); s_efb_color_surface_Format = D3DFMT_A8R8G8B8; //get the framebuffer texture diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index f622b335e0..145eb6e11e 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -27,6 +27,7 @@ #include "VertexLoader.h" #include "BPMemory.h" #include "XFMemory.h" +#include "ImageWrite.h" #include "debugger/debugger.h" @@ -190,14 +191,23 @@ bool PixelShaderCache::SetShader(bool dstAlpha) if (entry.shader) { - D3D::dev->SetPixelShader(entry.shader); + D3D::SetPixelShader(entry.shader); return true; } else return false; } - const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), dstAlpha, /*(D3D::GetCaps().NumSimultaneousRTs > 1)? 1 :*/ 2); + const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), dstAlpha, 2); + #if defined(_DEBUG) || defined(DEBUGFAST) + if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { + static int counter = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s/ps_%04i.txt", FULL_DUMP_DIR, counter++); + + SaveData(szTemp, code); + } + #endif LPDIRECT3DPIXELSHADER9 shader = D3D::CompilePixelShader(code, (int)strlen(code)); // Make an entry in the table @@ -214,7 +224,7 @@ bool PixelShaderCache::SetShader(bool dstAlpha) SETSTAT(stats.numPixelShadersAlive, (int)PixelShaders.size()); if (shader) { - D3D::dev->SetPixelShader(shader); + D3D::SetPixelShader(shader); return true; } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 0a8308befd..0afebc7bfb 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -49,12 +49,18 @@ static int s_target_width; static int s_target_height; +static int s_Fulltarget_width; +static int s_Fulltarget_height; + static int s_backbuffer_width; static int s_backbuffer_height; static float xScale; static float yScale; +static int FULL_EFB_WIDTH = EFB_WIDTH; +static int FULL_EFB_HEIGHT = EFB_HEIGHT; + static int s_recordWidth; static int s_recordHeight; @@ -262,10 +268,24 @@ bool Renderer::Init() // TODO: Grab target width from configured resolution? s_target_width = s_backbuffer_width * EFB_WIDTH / 640; - s_target_height = s_backbuffer_height * EFB_HEIGHT / 480; + s_target_height = s_backbuffer_height * EFB_HEIGHT / 480; xScale = (float)s_target_width / (float)EFB_WIDTH; yScale = (float)s_target_height / (float)EFB_HEIGHT; + if(!D3D::IsATIDevice()) + { + FULL_EFB_WIDTH = 2 * EFB_WIDTH; + FULL_EFB_HEIGHT = 2 * EFB_HEIGHT; + s_Fulltarget_width = FULL_EFB_WIDTH * xScale; + s_Fulltarget_height = FULL_EFB_HEIGHT * yScale; + } + else + { + s_Fulltarget_width = s_target_width; + s_Fulltarget_height = s_target_height; + } + + s_LastFrameDumped = false; s_AVIDumping = false; @@ -293,6 +313,8 @@ bool Renderer::Init() D3D::dev->SetRenderTarget(0, FBManager::GetEFBColorRTSurface()); D3D::dev->SetDepthStencilSurface(FBManager::GetEFBDepthRTSurface()); + vp.X = (s_Fulltarget_width - s_target_width) / 2; + vp.Y = (s_Fulltarget_height - s_target_height) / 2; vp.Width = s_target_width; vp.Height = s_target_height; D3D::dev->SetViewport(&vp); @@ -317,6 +339,8 @@ void Renderer::Shutdown() int Renderer::GetTargetWidth() { return s_target_width; } int Renderer::GetTargetHeight() { return s_target_height; } +int Renderer::GetFullTargetWidth() { return s_Fulltarget_width; } +int Renderer::GetFullTargetHeight() { return s_Fulltarget_height; } float Renderer::GetTargetScaleX() { return xScale; } float Renderer::GetTargetScaleY() { return yScale; } @@ -337,11 +361,13 @@ void dumpMatrix(D3DXMATRIX &mtx) TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) { + int Xstride = (s_Fulltarget_width - s_target_width) / 2; + int Ystride = (s_Fulltarget_height - s_target_height) / 2; TargetRectangle result; - result.left = (int)((rc.left) * xScale) ; - result.top = (int)((rc.top) * yScale); - result.right = (int)((rc.right) * xScale) ; - result.bottom = (int)((rc.bottom) * yScale); + result.left = (int)(rc.left * xScale) + Xstride ; + result.top = (int)(rc.top * yScale) + Ystride; + result.right = (int)(rc.right * xScale) + Xstride ; + result.bottom = (int)(rc.bottom * yScale) + Ystride; return result; } @@ -438,7 +464,7 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc) sourcerect.right = src_rect.right; sourcerect.top = src_rect.top; - D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetTargetWidth(),Renderer::GetTargetHeight(),&destinationrect,PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader()); + D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),&destinationrect,PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader()); // Finish up the current frame, print some stats if (g_ActiveConfig.bShowFPS) @@ -575,19 +601,22 @@ bool Renderer::SetScissorRect() rc.right = (int)((float)bpmem.scissorBR.x - xoff - 341); rc.bottom = (int)((float)bpmem.scissorBR.y - yoff - 341); - rc.left = (int)(rc.left * xScale); - rc.top = (int)(rc.top * yScale); - rc.right = (int)(rc.right * xScale); - rc.bottom = (int)(rc.bottom * yScale); + int Xstride = (s_Fulltarget_width - s_target_width) / 2; + int Ystride = (s_Fulltarget_height - s_target_height) / 2; + + rc.left = (int)(rc.left * xScale) + Xstride; + rc.top = (int)(rc.top * yScale) + Ystride; + rc.right = (int)(rc.right * xScale) + Xstride; + rc.bottom = (int)(rc.bottom * yScale) + Ystride; if (rc.left < 0) rc.left = 0; if (rc.right < 0) rc.right = 0; - if (rc.left > s_target_width) rc.left = s_target_width; - if (rc.right > s_target_width) rc.right = s_target_width; + if (rc.left > s_Fulltarget_width) rc.left = s_Fulltarget_width; + if (rc.right > s_Fulltarget_width) rc.right = s_Fulltarget_width; if (rc.top < 0) rc.top = 0; if (rc.bottom < 0) rc.bottom = 0; - if (rc.top > s_target_height) rc.top = s_target_height; - if (rc.bottom > s_target_height) rc.bottom = s_target_height; + if (rc.top > s_Fulltarget_height) rc.top = s_Fulltarget_height; + if (rc.bottom > s_Fulltarget_height) rc.bottom = s_Fulltarget_height; if(rc.left > rc.right) { int temp = rc.right; @@ -609,8 +638,8 @@ bool Renderer::SetScissorRect() else { //WARN_LOG(VIDEO, "Bad scissor rectangle: %i %i %i %i", rc.left, rc.top, rc.right, rc.bottom); - rc.left = 0; - rc.top = 0; + rc.left = Xstride; + rc.top = Ystride; rc.right = GetTargetWidth(); rc.bottom = GetTargetHeight(); D3D::dev->SetScissorRect(&rc); @@ -721,9 +750,9 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) EFBRectangle source_rect; LPDIRECT3DTEXTURE9 read_texture = FBManager::GetEFBDepthTexture(source_rect); - D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); - D3D::drawShadedTexQuad(read_texture,&RectToLock, Renderer::GetTargetWidth() , Renderer::GetTargetHeight(),&PixelRect,(BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram():PixelShaderCache::GetDepthMatrixProgram(),VertexShaderCache::GetSimpleVertexShader()); + D3D::drawShadedTexQuad(read_texture,&RectToLock, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(),&PixelRect,(BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram():PixelShaderCache::GetDepthMatrixProgram(),VertexShaderCache::GetSimpleVertexShader()); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); @@ -832,38 +861,33 @@ void UpdateViewport() float MValueX = Renderer::GetTargetScaleX(); float MValueY = Renderer::GetTargetScaleY(); + + int Xstride = (s_Fulltarget_width - s_target_width) / 2; + int Ystride = (s_Fulltarget_height - s_target_height) / 2; D3DVIEWPORT9 vp; // Stretch picture with increased internal resolution - int X = (int)(ceil(xfregs.rawViewport[3] - xfregs.rawViewport[0] - (scissorXOff)) * MValueX); - int Y = (int)(ceil(xfregs.rawViewport[4] + xfregs.rawViewport[1] - (scissorYOff)) * MValueY); + int X = (int)(ceil(xfregs.rawViewport[3] - xfregs.rawViewport[0] - (scissorXOff)) * MValueX) + Xstride; + int Y = (int)(ceil(xfregs.rawViewport[4] + xfregs.rawViewport[1] - (scissorYOff)) * MValueY) + Ystride; int Width = (int)ceil((int)(2 * xfregs.rawViewport[0]) * MValueX); int Height = (int)ceil((int)(-2 * xfregs.rawViewport[1]) * MValueY); if(Width < 0) { X += Width; - Width*=-1; - if( X < 0) - { - Width +=X; - } + Width*=-1; } if(Height < 0) { Y += Height; - Height *= -1; - if(Y < 0) - { - Height+=Y; - } + Height *= -1; } - if(X < 0) X = 0; - if(Y < 0) Y = 0; + vp.X = X; vp.Y = Y; vp.Width = Width; vp.Height = Height; + //some games set invalids values for z min and z max so fix them to the max an min alowed and let the shaders do this work vp.MinZ = 0.0f;//(xfregs.rawViewport[5] - xfregs.rawViewport[2]) / 16777216.0f; vp.MaxZ =1.0f;// xfregs.rawViewport[5] / 16777216.0f; @@ -873,11 +897,12 @@ void UpdateViewport() void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { // Update the view port for clearing the picture + D3DVIEWPORT9 vp; vp.X = 0; vp.Y = 0; - vp.Width = Renderer::GetTargetWidth(); - vp.Height = Renderer::GetTargetHeight(); + vp.Width = Renderer::GetFullTargetWidth(); + vp.Height = Renderer::GetFullTargetHeight(); vp.MinZ = 0.0; vp.MaxZ = 1.0; D3D::dev->SetViewport(&vp); @@ -988,27 +1013,25 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) void Renderer::ResetAPIState() { - D3D::SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); - D3D::SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); - D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE); - D3D::SetRenderState(D3DRS_ZENABLE, FALSE); - D3D::SetRenderState(D3DRS_ZWRITEENABLE, FALSE); + D3D::ChangeRenderState(D3DRS_SCISSORTESTENABLE, FALSE); + D3D::ChangeRenderState(D3DRS_CULLMODE, D3DCULL_NONE); + D3D::ChangeRenderState(D3DRS_ALPHABLENDENABLE, FALSE); + D3D::ChangeRenderState(D3DRS_ZENABLE, FALSE); + D3D::ChangeRenderState(D3DRS_ZWRITEENABLE, FALSE); DWORD color_mask = D3DCOLORWRITEENABLE_ALPHA| D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE; - D3D::SetRenderState(D3DRS_COLORWRITEENABLE, color_mask); + D3D::ChangeRenderState(D3DRS_COLORWRITEENABLE, color_mask); } void Renderer::RestoreAPIState() { // Gets us back into a more game-like state. - - UpdateViewport(); - if (bpmem.zmode.testenable) D3D::SetRenderState(D3DRS_ZENABLE, TRUE); - if (bpmem.zmode.updateenable) D3D::SetRenderState(D3DRS_ZWRITEENABLE, TRUE); - - D3D::SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE); - SetScissorRect(); - SetColorMask(); - SetLogicOpMode(); + D3D::RefreshRenderState(D3DRS_SCISSORTESTENABLE); + D3D::RefreshRenderState(D3DRS_CULLMODE); + D3D::RefreshRenderState(D3DRS_ALPHABLENDENABLE); + D3D::RefreshRenderState(D3DRS_ZENABLE); + D3D::RefreshRenderState(D3DRS_ZWRITEENABLE); + D3D::RefreshRenderState(D3DRS_COLORWRITEENABLE); + UpdateViewport(); } void Renderer::SetGenerationMode() diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 82db970ec6..1d6d05e80c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -136,162 +136,159 @@ void TextureCache::Cleanup() } } -/*TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, int height, int tex_format, int tlutaddr, int tlutfmt) -{ - if (address == 0) - return NULL; - - u8 *ptr = g_VideoInitialize.pGetMemoryPointer(address); - int bsw = TexDecoder_GetBlockWidthInTexels(tex_format) - 1; //TexelSizeInNibbles(format)*width*height/16; - int bsh = TexDecoder_GetBlockHeightInTexels(tex_format) - 1; //TexelSizeInNibbles(format)*width*height/16; - int expandedWidth = (width + bsw) & (~bsw); - int expandedHeight = (height + bsh) & (~bsh); - - u32 hash_value; - u32 texID = address; - u32 texHash; - - if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bDumpTextures) - { - texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); - if (g_ActiveConfig.bSafeTextureCache) - hash_value = texHash; - if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) - { - // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) - // tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower. - // This trick (to change the texID depending on the TLUT addr) is a trick to get around - // an issue with metroid prime's fonts, where it has multiple sets of fonts on top of - // each other stored in a single texture, and uses the palette to make different characters - // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, - // we must make sure that texture with different tluts get different IDs. - u32 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); - texHash ^= tlutHash; - if (g_ActiveConfig.bSafeTextureCache) - texID ^= tlutHash; - } - } - - bool skip_texture_create = false; - TexCache::iterator iter = textures.find(texID); - - if (iter != textures.end()) - { - TCacheEntry &entry = iter->second; - - if (!g_ActiveConfig.bSafeTextureCache) - hash_value = ((u32 *)ptr)[0]; - - if (entry.isRenderTarget || ((address == entry.addr) && (hash_value == entry.hash))) - { - entry.frameCount = frameCount; - D3D::SetTexture(stage, entry.texture); - return &entry; - } - else - { - // Let's reload the new texture data into the same texture, - // instead of destroying it and having to create a new one. - // Might speed up movie playback very, very slightly. - - if (width == entry.w && height==entry.h && tex_format == entry.fmt) - { - skip_texture_create = true; - } - else - { - entry.Destroy(false); - textures.erase(iter); - } - } - } - - //PC_TexFormat pcfmt = TexDecoder_Decode(temp, ptr, expandedWidth, height, tex_format, tlutaddr, tlutfmt); - PC_TexFormat pcfmt = GetPC_TexFormat(tex_format, tlutfmt); - - D3DFORMAT d3d_fmt; - switch (pcfmt) { - case PC_TEX_FMT_BGRA32: - case PC_TEX_FMT_RGBA32: - d3d_fmt = D3DFMT_A8R8G8B8; - break; - case PC_TEX_FMT_RGB565: - d3d_fmt = D3DFMT_R5G6B5; - break; - case PC_TEX_FMT_IA4_AS_IA8: - d3d_fmt = D3DFMT_A8L8; - break; - case PC_TEX_FMT_I8: - case PC_TEX_FMT_I4_AS_I8: - d3d_fmt = D3DFMT_A8L8; - break; - case PC_TEX_FMT_IA8: - d3d_fmt = D3DFMT_A8L8; - break; - case PC_TEX_FMT_DXT1: - d3d_fmt = D3DFMT_DXT1; - break; - } - - //Make an entry in the table - TCacheEntry& entry = textures[texID]; - - entry.oldpixel = ((u32 *)ptr)[0]; - if (g_ActiveConfig.bSafeTextureCache) - entry.hash = hash_value; - else - { - entry.hash = (u32)(((double)rand() / RAND_MAX) * 0xFFFFFFFF); - ((u32 *)ptr)[0] = entry.hash; - } - - entry.addr = address; - entry.size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, tex_format); - entry.isRenderTarget = false; - entry.isNonPow2 = ((width & (width - 1)) || (height & (height - 1))); - if (!skip_texture_create) { - entry.texture = D3D::CreateOnlyTexture2D(width, height, d3d_fmt); - } - D3DLOCKED_RECT Lock; - entry.texture->LockRect(0, &Lock, NULL, 0); - if(pcfmt != PC_TEX_FMT_I8 && pcfmt != PC_TEX_FMT_I4_AS_I8) - TexDecoder_DirectDecode((u8*)Lock.pBits,ptr,expandedWidth,height,Lock.Pitch,tex_format,tlutaddr,tlutfmt); - entry.texture->UnlockRect(0); - entry.frameCount = frameCount; - entry.w = width; - entry.h = height; - entry.fmt = tex_format; - - if (g_ActiveConfig.bDumpTextures) - { - // dump texture to file - char szTemp[MAX_PATH]; - char szDir[MAX_PATH]; - const char* uniqueId = globals->unique_id; - bool bCheckedDumpDir = false; - sprintf(szDir, "%s/%s", FULL_DUMP_TEXTURES_DIR, uniqueId); - if (!bCheckedDumpDir) - { - if (!File::Exists(szDir) || !File::IsDirectory(szDir)) - File::CreateDir(szDir); - - bCheckedDumpDir = true; - } - sprintf(szTemp, "%s/%s_%08x_%i.png", szDir, uniqueId, texHash, tex_format); - //sprintf(szTemp, "%s\\txt_%04i_%i.png", g_Config.texDumpPath.c_str(), counter++, format); <-- Old method - if (!File::Exists(szTemp)) - D3DXSaveTextureToFileA(szTemp,D3DXIFF_BMP,entry.texture,0); - } - - INCSTAT(stats.numTexturesCreated); - SETSTAT(stats.numTexturesAlive, (int)textures.size()); - - //Set the texture! - D3D::SetTexture(stage, entry.texture); - - DEBUGGER_PAUSE_LOG_AT(NEXT_NEW_TEXTURE,true,{printf("A new texture (%d x %d) is loaded", width, height);}); - return &entry; -}*/ +//TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, int height, int tex_format, int tlutaddr, int tlutfmt) +//{ +// if (address == 0) +// return NULL; +// +// u8 *ptr = g_VideoInitialize.pGetMemoryPointer(address); +// int bsw = TexDecoder_GetBlockWidthInTexels(tex_format) - 1; //TexelSizeInNibbles(format)*width*height/16; +// int bsh = TexDecoder_GetBlockHeightInTexels(tex_format) - 1; //TexelSizeInNibbles(format)*width*height/16; +// int expandedWidth = (width + bsw) & (~bsw); +// int expandedHeight = (height + bsh) & (~bsh); +// +// u32 hash_value; +// u32 texID = address; +// u32 texHash; +// +// if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bDumpTextures) +// { +// texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); +// if (g_ActiveConfig.bSafeTextureCache) +// hash_value = texHash; +// if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) +// { +// // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) +// // tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower. +// // This trick (to change the texID depending on the TLUT addr) is a trick to get around +// // an issue with metroid prime's fonts, where it has multiple sets of fonts on top of +// // each other stored in a single texture, and uses the palette to make different characters +// // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, +// // we must make sure that texture with different tluts get different IDs. +// u32 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); +// texHash ^= tlutHash; +// if (g_ActiveConfig.bSafeTextureCache) +// texID ^= tlutHash; +// } +// } +// +// bool skip_texture_create = false; +// TexCache::iterator iter = textures.find(texID); +// +// if (iter != textures.end()) +// { +// TCacheEntry &entry = iter->second; +// +// if (!g_ActiveConfig.bSafeTextureCache) +// hash_value = ((u32 *)ptr)[0]; +// +// if (entry.isRenderTarget || ((address == entry.addr) && (hash_value == entry.hash))) +// { +// entry.frameCount = frameCount; +// D3D::SetTexture(stage, entry.texture); +// return &entry; +// } +// else +// { +// // Let's reload the new texture data into the same texture, +// // instead of destroying it and having to create a new one. +// // Might speed up movie playback very, very slightly. +// +// if (width == entry.w && height==entry.h &&(tex_format | (tlutfmt << 16)) == entry.fmt) +// { +// skip_texture_create = true; +// } +// else +// { +// entry.Destroy(false); +// textures.erase(iter); +// } +// } +// } +// +// //PC_TexFormat pcfmt = TexDecoder_Decode(temp, ptr, expandedWidth, height, tex_format, tlutaddr, tlutfmt); +// PC_TexFormat pcfmt = GetPC_TexFormat(tex_format, tlutfmt); +// +// D3DFORMAT d3d_fmt; +// switch (pcfmt) { +// case PC_TEX_FMT_BGRA32: +// case PC_TEX_FMT_RGBA32: +// d3d_fmt = D3DFMT_A8R8G8B8; +// break; +// case PC_TEX_FMT_RGB565: +// d3d_fmt = D3DFMT_R5G6B5; +// break; +// case PC_TEX_FMT_IA4_AS_IA8: +// d3d_fmt = D3DFMT_A8L8; +// break; +// case PC_TEX_FMT_I8: +// case PC_TEX_FMT_I4_AS_I8: +// d3d_fmt = D3DFMT_A8L8; +// break; +// case PC_TEX_FMT_IA8: +// d3d_fmt = D3DFMT_A8L8; +// break; +// case PC_TEX_FMT_DXT1: +// d3d_fmt = D3DFMT_DXT1; +// break; +// } +// +// //Make an entry in the table +// TCacheEntry& entry = textures[texID]; +// entry.addr = address; +// entry.size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, tex_format); +// entry.isRenderTarget = false; +// entry.isNonPow2 = ((width & (width - 1)) || (height & (height - 1))); +// if (!skip_texture_create) { +// entry.texture = D3D::CreateOnlyTexture2D(width, height, d3d_fmt); +// } +// D3DLOCKED_RECT Lock; +// entry.texture->LockRect(0, &Lock, NULL,D3DLOCK_DISCARD); +// TexDecoder_DirectDecode((u8*)Lock.pBits,ptr,expandedWidth,height,Lock.Pitch,tex_format,tlutaddr,tlutfmt); +// entry.texture->UnlockRect(0); +// entry.oldpixel = ((u32 *)ptr)[0]; +// if (g_ActiveConfig.bSafeTextureCache) +// entry.hash = hash_value; +// else +// { +// entry.hash = (u32)(((double)rand() / RAND_MAX) * 0xFFFFFFFF); +// ((u32 *)ptr)[0] = entry.hash; +// } +// entry.frameCount = frameCount; +// entry.w = width; +// entry.h = height; +// entry.fmt = tex_format | (tlutfmt << 16); +// +// if (g_ActiveConfig.bDumpTextures) +// { +// // dump texture to file +// char szTemp[MAX_PATH]; +// char szDir[MAX_PATH]; +// const char* uniqueId = globals->unique_id; +// bool bCheckedDumpDir = false; +// sprintf(szDir, "%s/%s", FULL_DUMP_TEXTURES_DIR, uniqueId); +// if (!bCheckedDumpDir) +// { +// if (!File::Exists(szDir) || !File::IsDirectory(szDir)) +// File::CreateDir(szDir); +// +// bCheckedDumpDir = true; +// } +// sprintf(szTemp, "%s/%s_%08x_%i.png", szDir, uniqueId, texHash, tex_format); +// //sprintf(szTemp, "%s\\txt_%04i_%i.png", g_Config.texDumpPath.c_str(), counter++, format); <-- Old method +// if (!File::Exists(szTemp)) +// D3DXSaveTextureToFileA(szTemp,D3DXIFF_BMP,entry.texture,0); +// } +// +// INCSTAT(stats.numTexturesCreated); +// SETSTAT(stats.numTexturesAlive, (int)textures.size()); +// +// //Set the texture! +// D3D::SetTexture(stage, entry.texture); +// +// DEBUGGER_PAUSE_LOG_AT(NEXT_NEW_TEXTURE,true,{printf("A new texture (%d x %d) is loaded", width, height);}); +// return &entry; +//} TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, int height, int tex_format, int tlutaddr, int tlutfmt) { @@ -427,7 +424,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, entry.addr = address; entry.size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, tex_format); entry.isRenderTarget = false; - entry.isNonPow2 = ((width & (width - 1)) || (height & (height - 1))); + entry.isNonPow2 = false;//((width & (width - 1)) || (height & (height - 1))); if (!skip_texture_create) { entry.texture = D3D::CreateTexture2D((BYTE*)temp, width, height, expandedWidth, d3d_fmt); } else { @@ -496,7 +493,8 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo else { // Remove it and recreate it as a render target - iter->second.texture->Release(); + if(iter->second.texture) + iter->second.texture->Release(); iter->second.texture = 0; textures.erase(iter); } @@ -512,7 +510,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo entry.Scaledw = Scaledtex_w; entry.Scaledh = Scaledtex_h; entry.fmt = copyfmt; - + entry.isNonPow2 = true; D3D::dev->CreateTexture(Scaledtex_w, Scaledtex_h, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &entry.texture, 0); textures[address] = entry; tex = entry.texture; @@ -663,19 +661,18 @@ have_texture: if(bScaleByHalf) { - D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); - D3D::dev->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); } else { - D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); - D3D::dev->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); } D3DFORMAT bformat = FBManager::GetEFBDepthRTSurfaceFormat(); - D3D::drawShadedTexQuad(read_texture,&sourcerect, Renderer::GetTargetWidth() , Renderer::GetTargetHeight(),&destrect,((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(): PixelShaderCache::GetColorMatrixProgram(),VertexShaderCache::GetSimpleVertexShader()); - + D3D::drawShadedTexQuad(read_texture,&sourcerect, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(),&destrect,((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(): PixelShaderCache::GetColorMatrixProgram(),VertexShaderCache::GetSimpleVertexShader()); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp index c3191def13..64047eafde 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp @@ -240,11 +240,11 @@ void EncodeToRamUsingShader(LPDIRECT3DPIXELSHADER9 shader, LPDIRECT3DTEXTURE9 sr if (linearFilter) { - D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); } else { - D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); } D3DVIEWPORT9 vp; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 4344a62da3..e7e1a1b2fb 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -243,7 +243,7 @@ void Flush() for (int i = 0; i < 8; i++) { if (usedtextures & (1 << i)) { - Renderer::SetSamplerState(i & 3, i >> 2); + //Renderer::SetSamplerState(i & 3, i >> 2); FourTexUnits &tex = bpmem.tex[i >> 2]; TextureCache::TCacheEntry* tentry = TextureCache::Load(i, (tex.texImage3[i&3].image_base/* & 0x1FFFFF*/) << 5, @@ -297,8 +297,8 @@ void Flush() } // update alpha only - D3D::dev->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA); - D3D::dev->SetRenderState(D3DRS_ALPHABLENDENABLE, false); + D3D::ChangeRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA); + D3D::ChangeRenderState(D3DRS_ALPHABLENDENABLE, false); Draw(stride); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 95ab5fcac8..24d6492863 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -195,7 +195,7 @@ bool VertexShaderCache::SetShader(u32 components) DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE,true); if (entry.shader) { - D3D::dev->SetVertexShader(entry.shader); + D3D::SetVertexShader(entry.shader); return true; } else @@ -218,7 +218,7 @@ bool VertexShaderCache::SetShader(u32 components) SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size()); if (shader) { - D3D::dev->SetVertexShader(shader); + D3D::SetVertexShader(shader); return true; }