From 6daeb565502d65cf6e8835a2db51eb3ba4276d6a Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Fri, 3 Jun 2022 01:00:39 +1000 Subject: [PATCH] GS: Support using SW renderer for texture decompression sprites --- .github/workflows/scripts/lint/gamedb/lint.py | 2 + bin/resources/GameIndex.yaml | 116 ++++- pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 1 + pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 72 ++- pcsx2/Config.h | 1 + pcsx2/GS/GS.cpp | 3 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 454 ++++++++++++++++++ pcsx2/GS/Renderers/HW/GSRendererHW.h | 11 + pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 26 +- pcsx2/GS/Renderers/HW/GSTextureCache.h | 7 +- pcsx2/GS/Renderers/SW/GSRendererSW.cpp | 48 +- pcsx2/GS/Renderers/SW/GSRendererSW.h | 13 +- pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp | 35 ++ pcsx2/GS/Renderers/SW/GSTextureCacheSW.h | 2 + pcsx2/GS/Renderers/SW/GSVertexSW.h | 15 + pcsx2/GameDatabase.cpp | 7 + pcsx2/GameDatabase.h | 1 + pcsx2/Pcsx2Config.cpp | 3 + 18 files changed, 767 insertions(+), 50 deletions(-) diff --git a/.github/workflows/scripts/lint/gamedb/lint.py b/.github/workflows/scripts/lint/gamedb/lint.py index 8fec7cb0a9..aa6bd756ff 100644 --- a/.github/workflows/scripts/lint/gamedb/lint.py +++ b/.github/workflows/scripts/lint/gamedb/lint.py @@ -61,6 +61,7 @@ allowed_gs_hw_fixes = [ "roundSprite", "texturePreloading", "deinterlace", + "cpuSpriteRenderBW", ] gs_hw_fix_ranges = { "mipmap": (0, 2), @@ -70,6 +71,7 @@ gs_hw_fix_ranges = { "halfPixelOffset": (0, 3), "roundSprite": (0, 2), "deinterlace": (0, 7), + "cpuSpriteRenderBW": (1, 10), } allowed_speed_hacks = ["mvuFlagSpeedHack", "InstantVU1SpeedHack", "MTVUSpeedHack"] # Patches are allowed to have a 'default' key or a crc-32 key, followed by diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index 45ec4ddb16..5980c5c5f6 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -9433,6 +9433,8 @@ SLES-50288: name: "Stuntman" region: "PAL-M5" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 4 # Fixes textures. SLES-50296: name: "Gift" region: "PAL-F" @@ -13507,6 +13509,8 @@ SLES-52371: SLES-52372: name: "Spider-Man 2" region: "PAL-M5" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-52373: name: "Champions of Norrath" region: "PAL-E-S" @@ -13653,6 +13657,8 @@ SLES-52446: SLES-52447: name: "Spider-Man 2" region: "PAL-I" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-52448: name: "Knights of the Temple" region: "PAL-M4" @@ -13753,6 +13759,8 @@ SLES-52490: SLES-52493: name: "Spider-Man 2" region: "PAL-E" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-52495: name: "Bujingai - Swordmaster" region: "PAL-M5" @@ -14458,9 +14466,13 @@ SLES-52781: SLES-52782: name: "Call of Duty - Finest Hour" region: "PAL-E" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-52783: name: "Call of Duty - Le Jour de Gloire" region: "PAL-F" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. patches: 30AE5279: content: |- @@ -14470,6 +14482,8 @@ SLES-52783: SLES-52784: name: "Call of Duty - Finest Hour" region: "PAL-G" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-52798: name: "Vietcong - Purple Haze" region: "PAL-M4" @@ -15766,11 +15780,15 @@ SLES-53390: compat: 5 clampModes: vuClampMode: 0 # Fixes Spider-Man's eye texture colour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-53391: name: "Ultimate Spider-Man" region: "PAL-M5" clampModes: vuClampMode: 0 # Fixes Spider-Man's eye texture colour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-53393: name: "Spartan - Total Warrior" region: "PAL-M5" @@ -15849,6 +15867,7 @@ SLES-53415: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. + cpuSpriteRenderBW: 1 # Fixes textures. SLES-53416: name: "Call of Duty 2 - Big Red One" region: "PAL-M3" @@ -15856,6 +15875,7 @@ SLES-53416: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. + cpuSpriteRenderBW: 1 # Fixes textures. SLES-53417: name: "Call of Duty 2 - Big Red One" region: "PAL-G" @@ -15863,6 +15883,7 @@ SLES-53417: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. + cpuSpriteRenderBW: 1 # Fixes textures. SLES-53418: name: "Tak - The Great JuJu Challenge" region: "PAL-A" @@ -15870,6 +15891,8 @@ SLES-53418: SLES-53419: name: "LA Rush" region: "PAL-M5" + gsHWFixes: + cpuSpriteRenderBW: 4 # Fixes textures. SLES-53420: name: "Winnie the Pooh's Rumbly Tumbly Adventure" region: "PAL-PL" @@ -16404,15 +16427,23 @@ SLES-53616: region: "PAL-E" clampModes: eeClampMode: 2 # Fixes SPS on highway. + roundModes: + eeRoundMode: 0 # Fixes scene switching in intro. gsHWFixes: - halfPixelOffset: 1 # Fixes ghosting. + cpuSpriteRenderBW: 1 # Fixes textures. + preloadFrameData: 1 # Fixes static text screens. + roundSprite: 1 # Fixes lines in some post-effects. SLES-53618: name: "True Crime - New York City" region: "PAL-S" clampModes: eeClampMode: 2 # Fixes SPS on highway. + roundModes: + eeRoundMode: 0 # Fixes scene switching in intro. gsHWFixes: - halfPixelOffset: 1 # Fixes ghosting. + cpuSpriteRenderBW: 1 # Fixes textures. + preloadFrameData: 1 # Fixes static text screens. + roundSprite: 1 # Fixes lines in some post-effects. SLES-53621: name: "Wallace & Grommit - The Curse of the Were Rabbit" region: "PAL-M5" @@ -16535,6 +16566,8 @@ SLES-53672: region: "PAL-E" clampModes: vuClampMode: 0 # Fixes Fixes Spider-Man's eye texture colour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-53676: name: "WWE SmackDown! vs. RAW 2006" region: "PAL-E" @@ -16688,6 +16721,7 @@ SLES-53722: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. + cpuSpriteRenderBW: 1 # Fixes textures. SLES-53724: name: "World Series of Poker" region: "PAL-E" @@ -16766,9 +16800,13 @@ SLES-53749: SLES-53751: name: "Shrek Superslam" region: "PAL-M3" + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-53752: name: "Shrek Superslam" region: "PAL-M4" + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-53753: name: "Test Drive Unlimited" region: "PAL-M5" @@ -17667,16 +17705,22 @@ SLES-54166: region: "PAL-E" clampModes: eeClampMode: 3 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-54167: name: "Call of Duty 3" region: "PAL-M3" clampModes: eeClampMode: 3 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-54168: name: "Call of Duty 3" region: "PAL-G" clampModes: eeClampMode: 3 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-54169: name: "Aeon Flux" region: "PAL-M4" @@ -18979,11 +19023,15 @@ SLES-54723: compat: 4 roundModes: eeRoundMode: 0 # Fixes idle camera behaviour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-54724: name: "Spider-Man 3" region: "PAL-M4" roundModes: eeRoundMode: 0 # Fixes idle camera behaviour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-54725: name: "Kirikou and the Wild Beasts" region: "PAL-M5" @@ -19812,6 +19860,8 @@ SLES-55030: SLES-55031: name: "Kung Fu Panda" region: "PAL-F" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-55032: name: "Off Road" region: "PAL-M5" @@ -20211,12 +20261,18 @@ SLES-55233: SLES-55234: name: "Kung Fu Panda" region: "PAL-SW" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-55235: name: "Kung Fu Panda" region: "PAL-I" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-55236: name: "Kung Fu Panda" region: "PAL-G-S" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLES-55237: name: "Naruto - Ultimate Ninja 3" region: "PAL-M5" @@ -20779,6 +20835,8 @@ SLES-55518: SLES-55520: name: "Transformers - Revenge of the Fallen" region: "PAL-M5" + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLES-55522: name: "Disney-Pixar Up" region: "PAL-E" @@ -22270,6 +22328,8 @@ SLKA-25385: region: "NTSC-K" roundModes: eeRoundMode: 0 # Fixes idle camera behaviour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLKA-25388: name: "One Piece - Grand Adventure" region: "NTSC-K" @@ -28394,6 +28454,8 @@ SLPM-66018: SLPM-66019: name: "Stuntman" region: "NTSC-J" + gsHWFixes: + cpuSpriteRenderBW: 4 # Fixes textures. SLPM-66020: name: "Psi-Ops - The Mindgate Conspiracy" region: "NTSC-J" @@ -28865,6 +28927,8 @@ SLPM-66158: SLPM-66159: name: "Call of Duty - Final Hour" region: "NTSC-J" + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLPM-66160: name: "Devil May Cry 3 [Special Edition]" region: "NTSC-J" @@ -29480,6 +29544,7 @@ SLPM-66328: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. + cpuSpriteRenderBW: 1 # Fixes textures. SLPM-66329: name: "Mahou Sensei Negima! Kagai Jugyou" region: "NTSC-J" @@ -29721,6 +29786,8 @@ SLPM-66404: region: "NTSC-J" clampModes: vuClampMode: 0 # Fixes Spider-Man's eye texture colour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLPM-66405: name: "Rajirugi - Precious" region: "NTSC-J" @@ -29977,8 +30044,12 @@ SLPM-66473: region: "NTSC-J" clampModes: eeClampMode: 2 # Fixes SPS on highway. + roundModes: + eeRoundMode: 0 # Fixes scene switching in intro. gsHWFixes: - halfPixelOffset: 1 # Fixes ghosting. + cpuSpriteRenderBW: 1 # Fixes textures. + preloadFrameData: 1 # Fixes static text screens. + roundSprite: 1 # Fixes lines in some post-effects. SLPM-66474: name: "Odin Sphere" region: "NTSC-J" @@ -32171,8 +32242,12 @@ SLPM-74243: region: "NTSC-J" clampModes: eeClampMode: 2 # Fixes SPS on highway. + roundModes: + eeRoundMode: 0 # Fixes scene switching in intro. gsHWFixes: - halfPixelOffset: 1 # Fixes ghosting. + cpuSpriteRenderBW: 1 # Fixes textures. + preloadFrameData: 1 # Fixes static text screens. + roundSprite: 1 # Fixes lines in some post-effects. SLPM-74244: name: "Phantasy Star Universe [PlayStation 2 The Best]" region: "NTSC-J" @@ -36377,6 +36452,8 @@ SLPS-25823: region: "NTSC-J" roundModes: eeRoundMode: 0 # Fixes idle camera behaviour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLPS-25825: name: "Zero no Tsukaima [Best Collection]" region: "NTSC-J" @@ -38243,6 +38320,8 @@ SLUS-20250: name: "Stuntman" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 4 # Fixes textures. SLUS-20251: name: "Harvest Moon - Save the Homeland" region: "NTSC-U" @@ -40347,6 +40426,8 @@ SLUS-20725: name: "Call of Duty - Finest Hour" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLUS-20726: name: "ESPN - NBA Basketball" region: "NTSC-U" @@ -40596,6 +40677,8 @@ SLUS-20776: name: "Spider-Man 2" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLUS-20777: name: "Obscure" region: "NTSC-U" @@ -40975,6 +41058,8 @@ SLUS-20870: compat: 5 clampModes: vuClampMode: 0 # Fixes wrong texture colour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLUS-20871: name: "Naval Ops - Commander" region: "NTSC-U" @@ -42144,8 +42229,12 @@ SLUS-21106: compat: 5 clampModes: eeClampMode: 2 # Fixes SPS on highway. + roundModes: + eeRoundMode: 0 # Fixes scene switching in intro. gsHWFixes: - halfPixelOffset: 1 # Fixes ghosting. + cpuSpriteRenderBW: 1 # Fixes textures. + preloadFrameData: 1 # Fixes static text screens. + roundSprite: 1 # Fixes lines in some post-effects. SLUS-21107: name: "X-Men - The Official Game" region: "NTSC-U" @@ -42177,6 +42266,8 @@ SLUS-21112: name: "L.A. Rush" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 4 # Fixes textures. SLUS-21113: name: "Atelier Iris - Eternal Mana" region: "NTSC-U" @@ -42566,6 +42657,8 @@ SLUS-21197: name: "Shrek Superslam" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLUS-21198: name: "Batman Begins" region: "NTSC-U" @@ -42724,6 +42817,7 @@ SLUS-21228: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. + cpuSpriteRenderBW: 1 # Fixes textures. SLUS-21229: name: "Motocross Mania 3" region: "NTSC-U" @@ -43061,6 +43155,8 @@ SLUS-21285: region: "NTSC-U" clampModes: vuClampMode: 0 # Fixes Spider-Man's eyes texture. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLUS-21286: name: "WWE SmackDown! vs. RAW 2006" region: "NTSC-U" @@ -43232,7 +43328,7 @@ SLUS-21318: - SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly. gsHWFixes: roundSprite: 1 # Fixes lines in sprites. - cpuFramebufferConversion: 1 # Fixes some textures but most are still massively broken. + cpuSpriteRenderBW: 1 # Fixes textures. SLUS-21319: name: "Flow - Urban Dance Uprising" region: "NTSC-U" @@ -43855,6 +43951,8 @@ SLUS-21426: compat: 5 clampModes: eeClampMode: 3 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLUS-21427: name: "WWE SmackDown! vs. RAW 2007" region: "NTSC-U" @@ -44318,6 +44416,8 @@ SLUS-21552: compat: 4 roundModes: eeRoundMode: 0 # Fixes idle camera behaviour. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. SLUS-21553: name: "Lumines Plus" region: "NTSC-U" @@ -45255,6 +45355,8 @@ SLUS-21757: name: "Kung Fu Panda" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 1 # Fixes textures. SLUS-21758: name: "Rock Band - Track Pack Vol.1" region: "NTSC-U" @@ -45752,6 +45854,8 @@ SLUS-21881: name: "Transformers - Revenge of the Fallen" region: "NTSC-U" compat: 5 + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes textures. patches: 137C792E: content: |- diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index 3d6dd0c10f..c5094f3f07 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -184,6 +184,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* // HW Renderer Fixes ////////////////////////////////////////////////////////////////////////// SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.halfScreenFix, "EmuCore/GS", "UserHacks_Half_Bottom_Override", -1, -1); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuSpriteRenderBW, "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawStart, "EmuCore/GS", "UserHacks_SkipDraw_Start", 0); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawEnd, "EmuCore/GS", "UserHacks_SkipDraw_End", 0); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hwAutoFlush, "EmuCore/GS", "UserHacks_AutoFlush", false); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index 279a54883b..6d70ab70c3 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -698,14 +698,14 @@ - + Skipdraw Range: - + @@ -723,7 +723,7 @@ - + @@ -783,6 +783,72 @@ + + + + CPU Sprite Render Size: + + + + + + + + 0 (Disabled) + + + + + 1 (64 Max Width) + + + + + 2 (128 Max Width) + + + + + 3 (192 Max Width) + + + + + 4 (256 Max Width) + + + + + 5 (320 Max Width) + + + + + 6 (384 Max Width) + + + + + 7 (448 Max Width) + + + + + 8 (512 Max Width) + + + + + 9 (576 Max Width) + + + + + 10 (640 Max Width) + + + + diff --git a/pcsx2/Config.h b/pcsx2/Config.h index ba7c991321..060a10de26 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -543,6 +543,7 @@ struct Pcsx2Config int UserHacks_RoundSprite{0}; int UserHacks_TCOffsetX{0}; int UserHacks_TCOffsetY{0}; + int UserHacks_CPUSpriteRenderBW{0}; TriFiltering UserHacks_TriFilter{TriFiltering::Automatic}; int OverrideTextureBarriers{-1}; int OverrideGeometryShaders{-1}; diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index e2ec76f6b8..1d009a0cd1 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -838,7 +838,8 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config) GSConfig.UserHacks_CPUFBConversion != old_config.UserHacks_CPUFBConversion || GSConfig.UserHacks_DisableDepthSupport != old_config.UserHacks_DisableDepthSupport || GSConfig.UserHacks_DisablePartialInvalidation != old_config.UserHacks_DisablePartialInvalidation || - GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt) + GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt || + GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW) { g_gs_renderer->PurgeTextureCache(); g_gs_renderer->PurgePool(); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 583c9b046e..6c015b39fc 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -17,6 +17,8 @@ #include "GSRendererHW.h" #include "GSTextureReplacements.h" #include "GS/GSGL.h" +#include "GS/Renderers/SW/GSTextureCacheSW.h" +#include "GS/Renderers/SW/GSDrawScanline.h" #include "Host.h" #include "common/StringUtil.h" @@ -1302,6 +1304,13 @@ void GSRendererHW::Draw() const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p; const bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f); + // We trigger the sw prim render here super early, to avoid creating superfluous render targets. + if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender()) + { + GL_CACHE("Possible texture decompression, drawn with SwPrimRender()"); + return; + } + if (m_channel_shuffle) { m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page; @@ -3674,6 +3683,451 @@ bool GSRendererHW::IsDummyTexture() const return g_gs_device->Features().texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum && m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_context->FRAME.FBMSK == 0x00FFFFFF); } +bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex) +{ + // Master enable. + if (GSConfig.UserHacks_CPUSpriteRenderBW == 0) + return false; + + // We don't ever want to do this when we have a depth buffer, and only for textured sprites. + if (no_rt || !no_ds || !draw_sprite_tex) + return false; + + // Check the size threshold. Spider-man 2 uses a FBW of 32 for some silly reason... + if (m_context->FRAME.FBW > static_cast(GSConfig.UserHacks_CPUSpriteRenderBW) && m_context->FRAME.FBW != 32) + return false; + + // We shouldn't be using mipmapping, and this shouldn't be a blended draw. + // TODO: Jak 3 builds textures semi-procedurally using blending, and would be a good candidate here. + if (IsMipMapActive() || !IsOpaque()) + return false; + + // Make sure this isn't something we've actually rendered to (e.g. a texture shuffle). + // We do this by checking the texture block width against the target's block width, as all the decompression draws + // will use a much smaller block size than the framebuffer. + GSTextureCache::Target* src_target = m_tc->GetTargetWithSharedBits(m_context->TEX0.TBP0, m_context->TEX0.PSM); + if (src_target && src_target->m_TEX0.TBW == m_context->TEX0.TBW) + return false; + + // We can use the sw prim render path! + return true; +} + +bool GSRendererHW::SwPrimRender() +{ + const GSDrawingContext* context = m_context; + const GSDrawingEnvironment& env = m_env; + const GS_PRIM_CLASS primclass = m_vt.m_primclass; + + GSDrawScanline::SharedData data; + GSScanlineGlobalData& gd = data.global; + + u32 clut_storage[256]; + GSVector4i dimx_storage[8]; + + m_sw_vertex_buffer.resize(((m_vertex.next + 1) & ~1)); + + data.primclass = m_vt.m_primclass; + data.buff = nullptr; + data.vertex = m_sw_vertex_buffer.data(); + data.vertex_count = m_vertex.next; + data.index = m_index.buff; + data.index_count = m_index.tail; + data.scanmsk_value = m_env.SCANMSK.MSK; + + // Skip per pixel division if q is constant. + // Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q. + // If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'. + const u32 q_div = ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS)); + GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, data.vertex, m_vertex.buff, m_vertex.next); + + GSVector4i scissor = GSVector4i(m_context->scissor.in); + GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); + + // Points and lines may have zero area bbox (single line: 0, 0 - 256, 0) + + if (m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) + { + if (bbox.x == bbox.z) + bbox.z++; + if (bbox.y == bbox.w) + bbox.w++; + } + + GSVector4i r = bbox.rintersect(scissor); + + scissor.z = std::min(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour + + data.scissor = scissor; + data.bbox = bbox; + data.frame = g_perfmon.GetFrame(); + + gd.vm = m_mem.m_vm8; + + gd.fbo = context->offset.fb; + gd.zbo = context->offset.zb; + gd.fzbr = context->offset.fzb4->row; + gd.fzbc = context->offset.fzb4->col; + + gd.sel.key = 0; + + gd.sel.fpsm = 3; + gd.sel.zpsm = 3; + gd.sel.atst = ATST_ALWAYS; + gd.sel.tfx = TFX_NONE; + gd.sel.ababcd = 0xff; + gd.sel.prim = primclass; + + u32 fm = context->FRAME.FBMSK; + u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; + const u32 fm_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; + + // When the format is 24bit (Z or C), DATE ceases to function. + // It was believed that in 24bit mode all pixels pass because alpha doesn't exist + // however after testing this on a PS2 it turns out nothing passes, it ignores the draw. + if ((m_context->FRAME.PSM & 0xF) == PSM_PSMCT24 && m_context->TEST.DATE) + { + //DevCon.Warning("DATE on a 24bit format, Frame PSM %x", m_context->FRAME.PSM); + return false; + } + + if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) + { + fm = 0xffffffff; + zm = 0xffffffff; + } + + if (PRIM->TME) + { + if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) + { + m_mem.m_clut.Read32(context->TEX0, env.TEXA); + } + } + + if (context->TEST.ATE) + { + if (!TryAlphaTest(fm, fm_mask, zm)) + { + gd.sel.atst = context->TEST.ATST; + gd.sel.afail = context->TEST.AFAIL; + + gd.aref = GSVector4i((int)context->TEST.AREF); + + switch (gd.sel.atst) + { + case ATST_LESS: + gd.sel.atst = ATST_LEQUAL; + gd.aref -= GSVector4i::x00000001(); + break; + case ATST_GREATER: + gd.sel.atst = ATST_GEQUAL; + gd.aref += GSVector4i::x00000001(); + break; + } + } + } + + bool fwrite = (fm & fm_mask) != fm_mask; + bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; + + bool zwrite = zm != 0xffffffff; + bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS; + if (!fwrite && !zwrite) + return false; + + gd.sel.fwrite = fwrite; + gd.sel.ftest = ftest; + + if (fwrite || ftest) + { + gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; + + if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) + { + gd.sel.iip = PRIM->IIP; + } + + if (PRIM->TME) + { + gd.sel.tfx = context->TEX0.TFX; + gd.sel.tcc = context->TEX0.TCC; + gd.sel.fst = PRIM->FST; + gd.sel.ltf = m_vt.IsLinear(); + + if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) + { + gd.sel.tlu = 1; + + gd.clut = clut_storage; // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats + + memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal); + } + + gd.sel.wms = context->CLAMP.WMS; + gd.sel.wmt = context->CLAMP.WMT; + + if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) + { + // modulate does not do anything when vertex color is 0x80 + + gd.sel.tfx = TFX_DECAL; + } + + GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), false); + + GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage; + + if (!m_sw_texture) + m_sw_texture = std::make_unique(0, TEX0, env.TEXA); + else + m_sw_texture->Reset(0, TEX0, env.TEXA); + + m_sw_texture->Update(r); + gd.tex[0] = m_sw_texture->m_buff; + + gd.sel.tw = m_sw_texture->m_tw - 3; + + { + // skip per pixel division if q is constant. Sprite uses flat + // q, so it's always constant by primitive. + // Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer + gd.sel.fst |= (m_vt.m_eq.q || primclass == GS_SPRITE_CLASS); + + if (gd.sel.ltf && gd.sel.fst) + { + // if q is constant we can do the half pel shift for bilinear sampling on the vertices + + // TODO: but not when mipmapping is used!!! + + GSVector4 half(0x8000, 0x8000); + + GSVertexSW* RESTRICT v = data.vertex; + + for (int i = 0, j = data.vertex_count; i < j; i++) + { + GSVector4 t = v[i].t; + + v[i].t = (t - half).xyzw(t); + } + } + } + + u16 tw = 1u << TEX0.TW; + u16 th = 1u << TEX0.TH; + + switch (context->CLAMP.WMS) + { + case CLAMP_REPEAT: + gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1; + gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0; + gd.t.mask.U32[0] = 0xffffffff; + break; + case CLAMP_CLAMP: + gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0; + gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1; + gd.t.mask.U32[0] = 0; + break; + case CLAMP_REGION_CLAMP: + gd.t.min.U16[0] = gd.t.minmax.U16[0] = std::min(context->CLAMP.MINU, tw - 1); + gd.t.max.U16[0] = gd.t.minmax.U16[2] = std::min(context->CLAMP.MAXU, tw - 1); + gd.t.mask.U32[0] = 0; + break; + case CLAMP_REGION_REPEAT: + gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1); + gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU & (tw - 1); + gd.t.mask.U32[0] = 0xffffffff; + break; + default: + __assume(0); + } + + switch (context->CLAMP.WMT) + { + case CLAMP_REPEAT: + gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1; + gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0; + gd.t.mask.U32[2] = 0xffffffff; + break; + case CLAMP_CLAMP: + gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0; + gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1; + gd.t.mask.U32[2] = 0; + break; + case CLAMP_REGION_CLAMP: + gd.t.min.U16[4] = gd.t.minmax.U16[1] = std::min(context->CLAMP.MINV, th - 1); + gd.t.max.U16[4] = gd.t.minmax.U16[3] = std::min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) + gd.t.mask.U32[2] = 0; + break; + case CLAMP_REGION_REPEAT: + gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 + gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV & (th - 1); + gd.t.mask.U32[2] = 0xffffffff; + break; + default: + __assume(0); + } + + gd.t.min = gd.t.min.xxxxlh(); + gd.t.max = gd.t.max.xxxxlh(); + gd.t.mask = gd.t.mask.xxzz(); + gd.t.invmask = ~gd.t.mask; + } + + if (PRIM->FGE) + { + gd.sel.fge = 1; + + gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff; + gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff; + } + + if (context->FRAME.PSM != PSM_PSMCT24) + { + gd.sel.date = context->TEST.DATE; + gd.sel.datm = context->TEST.DATM; + } + + if (!IsOpaque()) + { + gd.sel.abe = PRIM->ABE; + gd.sel.ababcd = context->ALPHA.U32[0]; + + if (env.PABE.PABE) + { + gd.sel.pabe = 1; + } + + if (GSConfig.AA1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) + { + gd.sel.aa1 = 1; + } + + gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh(); + } + + const u32 masked_fm = fm & fm_mask; + if (gd.sel.date + || gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1 + || gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY + || gd.sel.fpsm == 0 && masked_fm != 0 && masked_fm != fm_mask + || gd.sel.fpsm == 1 && masked_fm != 0 && masked_fm != fm_mask + || gd.sel.fpsm == 2 && masked_fm != 0 && masked_fm != fm_mask) + { + gd.sel.rfb = 1; + } + + gd.sel.colclamp = env.COLCLAMP.CLAMP; + gd.sel.fba = context->FBA.FBA; + + if (env.DTHE.DTHE) + { + gd.sel.dthe = 1; + + gd.dimx = dimx_storage; + + memcpy(gd.dimx, env.dimx, sizeof(env.dimx)); + } + } + + gd.sel.zwrite = zwrite; + gd.sel.ztest = ztest; + + if (zwrite || ztest) + { + u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8); + + gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt; + gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS; + gd.sel.zequal = !!m_vt.m_eq.z; + gd.sel.zoverflow = (u32)GSVector4i(m_vt.m_max.p).z == 0x80000000U; + gd.sel.zclamp = (u32)GSVector4i(m_vt.m_max.p).z > z_max; + } + +#if _M_SSE >= 0x501 + + gd.fm = fm; + gd.zm = zm; + + if (gd.sel.fpsm == 1) + { + gd.fm |= 0xff000000; + } + else if (gd.sel.fpsm == 2) + { + u32 rb = gd.fm & 0x00f800f8; + u32 ga = gd.fm & 0x8000f800; + + gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000; + } + + if (gd.sel.zpsm == 1) + { + gd.zm |= 0xff000000; + } + else if (gd.sel.zpsm == 2) + { + gd.zm |= 0xffff0000; + } + +#else + + gd.fm = GSVector4i(fm); + gd.zm = GSVector4i(zm); + + if (gd.sel.fpsm == 1) + { + gd.fm |= GSVector4i::xff000000(); + } + else if (gd.sel.fpsm == 2) + { + GSVector4i rb = gd.fm & 0x00f800f8; + GSVector4i ga = gd.fm & 0x8000f800; + + gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000(); + } + + if (gd.sel.zpsm == 1) + { + gd.zm |= GSVector4i::xff000000(); + } + else if (gd.sel.zpsm == 2) + { + gd.zm |= GSVector4i::xffff0000(); + } + +#endif + + if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data.bbox.eq(data.bbox.rintersect(data.scissor))) // TODO: check scissor horizontally only + { + gd.sel.notest = 1; + + u32 ofx = context->XYOFFSET.OFX; + + for (int i = 0, j = m_vertex.tail; i < j; i++) + { +#if _M_SSE >= 0x501 + if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 +#else + if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 +#endif + { + gd.sel.notest = 0; + + break; + } + } + } + + if (!m_sw_rasterizer) + m_sw_rasterizer = std::make_unique(new GSDrawScanline(), 0, 1); + + m_sw_rasterizer->Draw(&data); + + m_tc->InvalidateVideoMem(context->offset.fb, bbox); + return true; +} + // hacks GSRendererHW::Hacks::Hacks() diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 4e6bd247a0..77644777cf 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -18,8 +18,11 @@ #include "GSTextureCache.h" #include "GS/Renderers/Common/GSFunctionMap.h" #include "GS/Renderers/Common/GSRenderer.h" +#include "GS/Renderers/SW/GSTextureCacheSW.h" #include "GS/GSState.h" +class GSRasterizer; + class GSRendererHW : public GSRenderer { public: @@ -130,6 +133,9 @@ private: void SwSpriteRender(); bool CanUseSwSpriteRender(); + bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex); + bool SwPrimRender(); + template void RoundSpriteOffset(); @@ -160,6 +166,11 @@ private: GSHWDrawConfig m_conf; + // software sprite renderer state + std::vector m_sw_vertex_buffer; + std::unique_ptr m_sw_texture; + std::unique_ptr m_sw_rasterizer; + public: GSRendererHW(); virtual ~GSRendererHW() override; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 683deb02d9..82687e9c89 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -23,6 +23,8 @@ #include "common/Align.h" #include "common/HashCombine.h" +//#define DISABLE_HW_TEXTURE_CACHE 1 + #define XXH_STATIC_LINKING_ONLY 1 #define XXH_INLINE_ALL 1 #include "xxhash.h" @@ -931,11 +933,17 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r u32 rowsize = bw * 8192; u32 offset = (u32)((t->m_TEX0.TBP0 - bp) * 256); - if (rowsize > 0 && offset % rowsize == 0) + // This grossness is needed to fix incorrect invalidations in True Crime: New York City. + // Because it's writing tiny texture blocks (which are later decompressed) over previous targets, + // we need to be ensure said targets are invalidated, otherwise the SW prim render path won't be + // triggered. This whole thing needs rewriting anyway, because it can't handle non-page-aligned + // writes, but for now we'll just use the unsafer logic when the TC hack is enabled. + const bool start_of_page = rowsize > 0 && (offset % rowsize == 0); + if (start_of_page || (rowsize > 0 && GSConfig.UserHacks_CPUSpriteRenderBW != 0)) { int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - if (r.bottom > y) + if (r.bottom > y && (start_of_page || r.top >= y)) { GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type), t->m_texture ? t->m_texture->GetID() : 0, @@ -1212,6 +1220,20 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, u32 PSM) return nullptr; } +GSTextureCache::Target* GSTextureCache::GetTargetWithSharedBits(u32 BP, u32 PSM) const +{ + auto& rts = m_dst[GSLocalMemory::m_psm[PSM].depth ? DepthStencil : RenderTarget]; + for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. + { + Target* t = *it; + u32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; + if (GSUtil::HasSharedBits(BP, PSM, t->m_TEX0.TBP0, t_psm)) + return t; + } + + return nullptr; +} + // Hack: remove Target that are strictly included in current rt. Typically uses for FMV // For example, game is rendered at 0x800->0x1000, fmv will be uploaded to 0x0->0x2800 // FIXME In theory, we ought to report the data from the sub rt to the main rt. But let's diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 7d044fdce3..63fe66cee9 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -286,9 +286,6 @@ protected: Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, const GSVector2i* lod = nullptr, const GSVector4i* src_range = nullptr); Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear); - /// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly. - Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const; - HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod); static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level); @@ -314,6 +311,10 @@ public: Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_h = 0); Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_h); + /// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly. + Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const; + Target* GetTargetWithSharedBits(u32 BP, u32 PSM) const; + void InvalidateVideoMemType(int type, u32 bp); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index 01dd532f2e..e794e2c8a4 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -22,9 +22,9 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; -CONSTINIT const GSVector4 GSRendererSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); +CONSTINIT const GSVector4 GSVertexSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #if _M_SSE >= 0x501 -CONSTINIT const GSVector8 GSRendererSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); +CONSTINIT const GSVector8 GSVertexSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #endif GSRendererSW::GSRendererSW(int threads) @@ -40,21 +40,6 @@ GSRendererSW::GSRendererSW(int threads) std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0); std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0); - #define InitCVB2(P, Q) \ - m_cvb[P][0][0][Q] = &GSRendererSW::ConvertVertexBuffer; \ - m_cvb[P][0][1][Q] = &GSRendererSW::ConvertVertexBuffer; \ - m_cvb[P][1][0][Q] = &GSRendererSW::ConvertVertexBuffer; \ - m_cvb[P][1][1][Q] = &GSRendererSW::ConvertVertexBuffer; - - #define InitCVB(P) \ - InitCVB2(P, 0) \ - InitCVB2(P, 1) - - InitCVB(GS_POINT_CLASS); - InitCVB(GS_LINE_CLASS); - InitCVB(GS_TRIANGLE_CLASS); - InitCVB(GS_SPRITE_CLASS); - m_dump_root = root_sw; } @@ -195,15 +180,15 @@ GSTexture* GSRendererSW::GetFeedbackOutput() template -void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) +void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) { // FIXME q_div wasn't added to AVX2 code path. - GSVector4i off = (GSVector4i)m_context->XYOFFSET; - GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0); - GSVector4i z_max = GSVector4i::xffffffff().srl32(GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8); + GSVector4i off = (GSVector4i)ctx->XYOFFSET; + GSVector4 tsize = GSVector4(0x10000 << ctx->TEX0.TW, 0x10000 << ctx->TEX0.TH, 1, 0); + GSVector4i z_max = GSVector4i::xffffffff().srl32(GSLocalMemory::m_psm[ctx->ZBUF.PSM].fmt * 8); - for (int i = (int)m_vertex.next; i > 0; i--, src++, dst++) + for (int i = (int)count; i > 0; i--, src++, dst++) { GSVector4 stcq = GSVector4::load(&src->m[0]); // s t rgba q @@ -266,6 +251,23 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* } } +// clang-format off +GSVertexSW::ConvertVertexBufferPtr GSVertexSW::s_cvb[4][2][2][2] = { +#define InitCVB3(P, T, F) { &GSVertexSW::ConvertVertexBuffer, &GSVertexSW::ConvertVertexBuffer } +#define InitCVB2(P, T) { InitCVB3(P, T, 0), InitCVB3(P, T, 1) } +#define InitCVB(P) { InitCVB2(static_cast(P), 0), InitCVB2(static_cast(P), 1) } + + InitCVB(GS_POINT_CLASS), + InitCVB(GS_LINE_CLASS), + InitCVB(GS_TRIANGLE_CLASS), + InitCVB(GS_SPRITE_CLASS) + +#undef InitCVB +#undef InitCVB2 +#undef InitCVB3 +}; +// clang-format on + void GSRendererSW::Draw() { const GSDrawingContext* context = m_context; @@ -304,7 +306,7 @@ void GSRendererSW::Draw() // If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR' u32 q_div = !IsMipMapActive() && ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS)); - (this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div])(sd->vertex, m_vertex.buff, m_vertex.next); + GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next); memcpy(sd->index, m_index.buff, sizeof(u32) * m_index.tail); diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.h b/pcsx2/GS/Renderers/SW/GSRendererSW.h index a5ff728910..85c63b99c3 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.h +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.h @@ -21,11 +21,7 @@ class GSRendererSW final : public GSRenderer { - static const GSVector4 m_pos_scale; -#if _M_SSE >= 0x501 - static const GSVector8 m_pos_scale2; -#endif - +public: class SharedData : public GSDrawScanline::SharedData { struct alignas(16) TextureLevel @@ -59,13 +55,6 @@ class GSRendererSW final : public GSRenderer void UpdateSource(); }; - typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); - - ConvertVertexBufferPtr m_cvb[4][2][2][2]; - - template - void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); - protected: std::unique_ptr m_rl; std::unique_ptr m_tc; diff --git a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp index 917df9f567..46c2cd16df 100644 --- a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.cpp @@ -172,6 +172,41 @@ GSTextureCacheSW::Texture::~Texture() } } +void GSTextureCacheSW::Texture::Reset(u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) +{ + if (m_buff && (m_TEX0.TW != TEX0.TW || m_TEX0.TH != TEX0.TH)) + { + _aligned_free(m_buff); + m_buff = NULL; + } + + m_tw = tw0; + m_age = 0; + m_complete = false; + m_p2t = NULL; + m_TEX0 = TEX0; + m_TEXA = TEXA; + + if (m_tw == 0) + { + m_tw = std::max(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff + } + + memset(m_valid, 0, sizeof(m_valid)); + + m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM); + + m_offset = g_gs_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); + m_pages = m_offset.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); + + m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower + + if (m_repeating) + { + m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0); + } +} + bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) { if (m_complete) diff --git a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.h b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.h index 196657e711..1d12efc4cd 100644 --- a/pcsx2/GS/Renderers/SW/GSTextureCacheSW.h +++ b/pcsx2/GS/Renderers/SW/GSTextureCacheSW.h @@ -46,6 +46,8 @@ public: Texture(u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); virtual ~Texture(); + void Reset(u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + bool Update(const GSVector4i& r); bool Save(const std::string& fn, bool dds = false) const; }; diff --git a/pcsx2/GS/Renderers/SW/GSVertexSW.h b/pcsx2/GS/Renderers/SW/GSVertexSW.h index 7358f2e41c..028ecbc381 100644 --- a/pcsx2/GS/Renderers/SW/GSVertexSW.h +++ b/pcsx2/GS/Renderers/SW/GSVertexSW.h @@ -17,6 +17,9 @@ #include "GS/GSVector.h" +class GSDrawingContext; +struct GSVertex; + struct alignas(32) GSVertexSW { // When drawing sprites: @@ -242,6 +245,18 @@ struct alignas(32) GSVertexSW #endif } + + typedef void (*ConvertVertexBufferPtr)(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); + + static ConvertVertexBufferPtr s_cvb[4][2][2][2]; + + template + static void ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); + + static const GSVector4 m_pos_scale; +#if _M_SSE >= 0x501 + static const GSVector8 m_pos_scale2; +#endif }; #if _M_SSE >= 0x501 diff --git a/pcsx2/GameDatabase.cpp b/pcsx2/GameDatabase.cpp index 9d7add8a19..d3aa690302 100644 --- a/pcsx2/GameDatabase.cpp +++ b/pcsx2/GameDatabase.cpp @@ -290,6 +290,7 @@ static const char* s_gs_hw_fix_names[] = { "roundSprite", "texturePreloading", "deinterlace", + "cpuSpriteRenderBW", }; static_assert(std::size(s_gs_hw_fix_names) == static_cast(GameDatabaseSchema::GSHWFixId::Count), "HW fix name lookup is correct size"); @@ -556,6 +557,7 @@ u32 GameDatabaseSchema::GameEntry::applyGSHardwareFixes(Pcsx2Config::GSOptions& break; case GSHWFixId::Deinterlace: + { if (value >= 0 && value <= static_cast(GSInterlaceMode::Automatic)) { if (config.InterlaceMode == GSInterlaceMode::Automatic) @@ -563,8 +565,13 @@ u32 GameDatabaseSchema::GameEntry::applyGSHardwareFixes(Pcsx2Config::GSOptions& else Console.Warning("[GameDB] Game requires different deinterlace mode but it has been overridden by user setting."); } + } break; + case GSHWFixId::CPUSpriteRenderBW: + config.UserHacks_CPUSpriteRenderBW = value; + break; + default: break; } diff --git a/pcsx2/GameDatabase.h b/pcsx2/GameDatabase.h index 9f441078cf..d24ecafefb 100644 --- a/pcsx2/GameDatabase.h +++ b/pcsx2/GameDatabase.h @@ -82,6 +82,7 @@ namespace GameDatabaseSchema RoundSprite, TexturePreloading, Deinterlace, + CPUSpriteRenderBW, Count }; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index de70d50af4..c3641b1479 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -411,6 +411,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(UserHacks_RoundSprite) && OpEqu(UserHacks_TCOffsetX) && OpEqu(UserHacks_TCOffsetY) && + OpEqu(UserHacks_CPUSpriteRenderBW) && OpEqu(UserHacks_TriFilter) && OpEqu(OverrideTextureBarriers) && OpEqu(OverrideGeometryShaders) && @@ -597,6 +598,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingIntEx(UserHacks_RoundSprite, "UserHacks_round_sprite_offset"); GSSettingIntEx(UserHacks_TCOffsetX, "UserHacks_TCOffsetX"); GSSettingIntEx(UserHacks_TCOffsetY, "UserHacks_TCOffsetY"); + GSSettingIntEx(UserHacks_CPUSpriteRenderBW, "UserHacks_CPUSpriteRenderBW"); GSSettingIntEnumEx(UserHacks_TriFilter, "UserHacks_TriFilter"); GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers"); GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders"); @@ -643,6 +645,7 @@ void Pcsx2Config::GSOptions::MaskUserHacks() UserHacks_TextureInsideRt = false; UserHacks_TCOffsetX = 0; UserHacks_TCOffsetY = 0; + UserHacks_CPUSpriteRenderBW = 0; SkipDrawStart = 0; SkipDrawEnd = 0;