From 522746b2c223f37c45569ee7fd4a226b278cb6d9 Mon Sep 17 00:00:00 2001 From: gnick79 Date: Sat, 18 Dec 2010 18:23:22 +0000 Subject: [PATCH] **Changes** * More formats recognition for "Clear Screen" step. Improved the ability handling the alpha channel (% color translucency). * Related to CPUID detecting: - Added support displaying correct SysInfo for recent multicore Intel CPUs x86-64 with HT/SMT (Core i3/i5/i7 and Xeon) based on Nehalem architecture. + minor and passive changes. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6604 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/CPUDetect.cpp | 24 ++++++++++++------- Source/Core/Common/Src/CPUDetect.h | 2 ++ Source/Core/VideoCommon/Src/BPFunctions.cpp | 12 +++++++++- Source/Core/VideoCommon/Src/BPMemory.h | 21 +++++++++------- Source/Core/VideoCommon/Src/BPStructs.cpp | 5 ++-- .../VideoCommon/Src/PixelShaderManager.cpp | 2 +- Source/Core/VideoCommon/Src/RenderBase.h | 2 +- .../Core/VideoCommon/Src/TextureCacheBase.cpp | 6 ++++- .../Plugins/Plugin_VideoDX11/Src/Render.cpp | 2 +- Source/Plugins/Plugin_VideoDX11/Src/Render.h | 2 +- Source/Plugins/Plugin_VideoDX9/Src/Render.cpp | 2 +- Source/Plugins/Plugin_VideoDX9/Src/Render.h | 2 +- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 2 +- Source/Plugins/Plugin_VideoOGL/Src/Render.h | 2 +- 14 files changed, 56 insertions(+), 30 deletions(-) diff --git a/Source/Core/Common/Src/CPUDetect.cpp b/Source/Core/Common/Src/CPUDetect.cpp index 5fcc7062d9..79aad925e7 100644 --- a/Source/Core/Common/Src/CPUDetect.cpp +++ b/Source/Core/Common/Src/CPUDetect.cpp @@ -133,8 +133,8 @@ void CPUInfo::Detect() strcpy(brand_string, cpu_string); // Detect family and other misc stuff. - bool HTT = false; - int logical_cpu_count = 1; + HTT = false; + logical_cpu_count = 1; if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; @@ -171,11 +171,14 @@ void CPUInfo::Detect() __cpuid(cpu_id, 0x80000008); int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF; if (apic_id_core_id_size == 0) { - // Use what AMD calls the "legacy method" to determine # of cores. + // New mechanism for modern CPUs. + num_cores = logical_cpu_count; if (HTT) { - num_cores = logical_cpu_count; - } else { - num_cores = 1; + __cpuid(cpu_id, 0x00000004); + int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; + cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; + num_cores = (cores_x_package > 1) ? cores_x_package : num_cores; + logical_cpu_count /= cores_x_package; } } else { // Use AMD's new method. @@ -193,10 +196,13 @@ std::string CPUInfo::Summarize() { std::string sum; if (num_cores == 1) - sum = StringFromFormat("%s, %i core, ", cpu_string, num_cores); + sum = StringFromFormat("%s, %i core", cpu_string, num_cores); else - sum = StringFromFormat("%s, %i cores, ", cpu_string, num_cores); - if (bSSE) sum += "SSE"; + { + sum = StringFromFormat("%s, %i cores", cpu_string, num_cores); + if (HTT) sum += StringFromFormat(" (%i logical IDs per physical core)", logical_cpu_count); + } + if (bSSE) sum += ", SSE"; if (bSSE2) sum += ", SSE2"; if (bSSE3) sum += ", SSE3"; if (bSSSE3) sum += ", SSSE3"; diff --git a/Source/Core/Common/Src/CPUDetect.h b/Source/Core/Common/Src/CPUDetect.h index 55c6b62b3b..e509b8038d 100644 --- a/Source/Core/Common/Src/CPUDetect.h +++ b/Source/Core/Common/Src/CPUDetect.h @@ -40,6 +40,7 @@ struct CPUInfo bool hyper_threaded; int num_cores; + int logical_cpu_count; bool bSSE; bool bSSE2; @@ -52,6 +53,7 @@ struct CPUInfo bool bSSE4A; bool bLAHFSAHF64; bool bLongMode; + bool HTT; // Call Detect() explicit CPUInfo(); diff --git a/Source/Core/VideoCommon/Src/BPFunctions.cpp b/Source/Core/VideoCommon/Src/BPFunctions.cpp index ca88cc5486..331b7e702f 100644 --- a/Source/Core/VideoCommon/Src/BPFunctions.cpp +++ b/Source/Core/VideoCommon/Src/BPFunctions.cpp @@ -96,8 +96,18 @@ void ClearScreen(const BPCmd &bp, const EFBRectangle &rc) { u32 color = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB; u32 z = bpmem.clearZValue; + + // texture formats logic transposition from "EFB Copy to Texture" to "Copy Clear Screen" concepts. + // this it's a deduction without assurance. Ref. (p.12(Nintendo Co., Ltd. US 2010/0073394 A1)) + UPE_Copy EFB_copy = bpmem.triggerEFBCopy; - g_renderer->ClearScreen(rc, colorEnable, alphaEnable, zEnable, color, z); + // since this is an early implementation and we can't be sure, forward clauses are fairly restrictive. + if (EFB_copy.tp_realFormat() == 6) // RGBA8 + color |= (!EFB_copy.intensity_fmt && z > 0) ? 0xFF000000 : 0x0; + else if (EFB_copy.tp_realFormat() == 7) // A8 + color |= ((!EFB_copy.intensity_fmt && bpmem.zcontrol.pixel_format > 3) || z > 0) ? 0xFF000000 : 0x0; + + g_renderer->ClearScreen(rc, colorEnable, zEnable, color, z); } } diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index eb4fae5d43..abafe201d2 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -815,19 +815,22 @@ union UPE_Copy u32 Hex; struct { - u32 clamp0 : 1; - u32 clamp1 : 1; - u32 : 1; - u32 target_pixel_format : 4; // realformat is (fmt/2)+((fmt&1)*8).... for some reason the msb is the lsb - u32 gamma : 2; - u32 half_scale : 1; // real size should be 2x smaller (run a gauss filter?) "mipmap" - u32 scale_invert : 1; + u32 clamp0 : 1; // if set clamp top + u32 clamp1 : 1; // if set clamp bottom + u32 yuv : 1; // if set, color conversion from RGB to YUV + u32 target_pixel_format : 4; // realformat is (fmt/2)+((fmt&1)*8).... for some reason the msb is the lsb (pattern: cycling right shift) + u32 gamma : 2; // gamma correction.. 0 = 1.0 ; 1 = 1.7 ; 2 = 2.2 ; 3 is reserved + u32 half_scale : 1; // "mipmap" filter... 0 = no filter (scale 1:1) ; 1 = box filter (scale 2:1) + u32 scale_invert : 1; // if set vertical scaling is on u32 clear : 1; - u32 frame_to_field : 2; + u32 frame_to_field : 2; // 0 progressive ; 1 is reserved ; 2 = interlaced (even lines) ; 3 = interlaced 1 (odd lines) u32 copy_to_xfb : 1; u32 intensity_fmt : 1; // if set, is an intensity format (I4,I8,IA4,IA8) - u32 : 16; // seems to set everything to 1s when target pixel format is invalid + u32 auto_conv : 1; // if 0 automatic color conversion by texture format and pixel type }; + u32 tp_realFormat() { + return target_pixel_format / 2 + (target_pixel_format & 1) * 8; + } }; diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index 1683546344..016aa969d3 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -229,6 +229,8 @@ void BPWritten(const BPCmd& bp) EFBRectangle rc; rc.left = (int)bpmem.copyTexSrcXY.x; rc.top = (int)bpmem.copyTexSrcXY.y; + + // Here Width+1 like Height, otherwise some textures are corrupted already since the native resolution. rc.right = (int)(bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1); rc.bottom = (int)(bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1); @@ -242,8 +244,7 @@ void BPWritten(const BPCmd& bp) CopyEFB(bp, rc, bpmem.copyTexDest << 5, bpmem.zcontrol.pixel_format == PIXELFMT_Z24, - PE_copy.intensity_fmt > 0, - ((PE_copy.target_pixel_format / 2) + ((PE_copy.target_pixel_format & 1) * 8)), + PE_copy.intensity_fmt > 0,PE_copy.tp_realFormat(), PE_copy.half_scale); } else diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index f787ffa3e7..76bc2e7be6 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -210,7 +210,7 @@ void PixelShaderManager::SetConstants() } else { - SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 1.0, 0); + SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0); } s_bFogParamChanged = false; } diff --git a/Source/Core/VideoCommon/Src/RenderBase.h b/Source/Core/VideoCommon/Src/RenderBase.h index 0e8111cafb..83238931ce 100644 --- a/Source/Core/VideoCommon/Src/RenderBase.h +++ b/Source/Core/VideoCommon/Src/RenderBase.h @@ -109,7 +109,7 @@ public: virtual void RenderText(const char* pstr, int left, int top, u32 color) = 0; - virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) = 0; + virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z) = 0; static void RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc); virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0; diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp index b9a1ba7f2c..fffde7b7b2 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp @@ -438,6 +438,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, { case 0: // Z4 case 1: // Z8 + case 8: // Z8 colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; break; @@ -477,6 +478,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, { case 0: // Z4 case 1: // Z8 + case 8: // Z8 colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f; cbufid = 12; break; @@ -528,12 +530,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, case 1: // I8 case 2: // IA4 case 3: // IA8 + case 8: // I8 // TODO - verify these coefficients colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f; colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f; colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f; - if (copyfmt < 2) + if (copyfmt < 2 || copyfmt == 8) { fConstAdd[3] = 16.0f / 255.0f; colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f; @@ -558,6 +561,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, switch (copyfmt) { case 0: // R4 + case 1: // R8 case 8: // R8 colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; cbufid = 2; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp index c3ac135643..b97cdd1842 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp @@ -684,7 +684,7 @@ void Renderer::UpdateViewport() D3D::context->RSSetViewports(1, &vp); } -void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) +void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z) { ResetAPIState(); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.h b/Source/Plugins/Plugin_VideoDX11/Src/Render.h index bc4d9df3aa..2bb6084650 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.h @@ -35,7 +35,7 @@ public: void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight, const EFBRectangle& rc); - void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z); + void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z); void UpdateViewport(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 1fa95c2bc8..3703362d68 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -776,7 +776,7 @@ void Renderer::UpdateViewport() D3D::dev->SetViewport(&vp); } -void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) +void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z) { // Reset rendering pipeline while keeping color masks and depth buffer settings ResetAPIState(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.h b/Source/Plugins/Plugin_VideoDX9/Src/Render.h index dc8e30fed1..73e49b4f21 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.h @@ -35,7 +35,7 @@ public: void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight, const EFBRectangle& rc); - void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z); + void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z); void UpdateViewport(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 1a11367236..a796afa306 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -803,7 +803,7 @@ void Renderer::UpdateViewport() glDepthRange(GLNear, GLFar); } -void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) +void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z) { // Update the view port for clearing the picture TargetRectangle targetRc = ConvertEFBRectangle(rc); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.h b/Source/Plugins/Plugin_VideoOGL/Src/Render.h index 3747c38f28..d65e5bc67c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.h @@ -37,7 +37,7 @@ public: void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight, const EFBRectangle& rc); - void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z); + void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool zEnable, u32 color, u32 z); void UpdateViewport();