From 68dd818029de8c63dd34245d5d42003932d8633a Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Mon, 1 Jun 2020 14:00:22 +0200 Subject: [PATCH] [GPU] Add workaround for occlusion queries. --- src/xenia/gpu/command_processor.cc | 21 ++++++++++++++++++--- src/xenia/gpu/gpu_flags.cc | 7 +++++++ src/xenia/gpu/gpu_flags.h | 2 ++ src/xenia/gpu/xenos.h | 16 +++++++++++++++- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index d2be4ded5..679c572fd 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -1145,9 +1145,24 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader, // Writeback initiator. WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); - // TODO: Flag the backend CP to write out zpass counters to - // REG_RB_SAMPLE_COUNT_ADDR (probably # pixels passed depth test). - // This applies to the last draw, I believe. + // Occlusion queries: + // This command is send on query begin and end. + // As a workaround report some fixed amount of passed samples. + auto fake_sample_count = cvars::query_occlusion_fake_sample_count; + if (fake_sample_count >= 0) { + auto* pSampleCounts = + memory_->TranslatePhysical( + register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32); + // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END + // and used to detect a finished query. + bool isEnd = pSampleCounts->ZPass_A == xe::byte_swap(0xFFFFFEED) && + pSampleCounts->ZPass_B == xe::byte_swap(0xFFFFFEED); + std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts)); + if (isEnd) { + pSampleCounts->ZPass_A = fake_sample_count; + pSampleCounts->Total_A = fake_sample_count; + } + } return true; } diff --git a/src/xenia/gpu/gpu_flags.cc b/src/xenia/gpu/gpu_flags.cc index 7652fd711..b456ac59e 100644 --- a/src/xenia/gpu/gpu_flags.cc +++ b/src/xenia/gpu/gpu_flags.cc @@ -27,3 +27,10 @@ DEFINE_bool( "may be used to bypass fetch constant type errors in certain games until " "the real reason why they're invalid is found.", "GPU"); + +DEFINE_int32(query_occlusion_fake_sample_count, 1000, + "If set to -1 no sample counts are written, games may hang. Else, " + "the sample count of every tile will be incremented on every " + "EVENT_WRITE_ZPD by this number. Setting this to 0 means " + "everything is reported as occluded.", + "GPU"); diff --git a/src/xenia/gpu/gpu_flags.h b/src/xenia/gpu/gpu_flags.h index 19b786e35..fe344dc4f 100644 --- a/src/xenia/gpu/gpu_flags.h +++ b/src/xenia/gpu/gpu_flags.h @@ -20,4 +20,6 @@ DECLARE_bool(vsync); DECLARE_bool(gpu_allow_invalid_fetch_constants); +DECLARE_int32(query_occlusion_fake_sample_count); + #endif // XENIA_GPU_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 41169e0ff..ee91308b0 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -849,6 +849,20 @@ XEPACKEDUNION(xe_gpu_memexport_stream_t, { }); }); +XEPACKEDSTRUCT(xe_gpu_depth_sample_counts, { + // This is little endian as it is swapped in D3D code. + // Corresponding A and B values are summed up by D3D. + // Occlusion there is calculated by substracting begin from end struct. + uint32_t Total_A; + uint32_t Total_B; + uint32_t ZFail_A; + uint32_t ZFail_B; + uint32_t ZPass_A; + uint32_t ZPass_B; + uint32_t StencilFail_A; + uint32_t StencilFail_B; +}); + // Enum of event values used for VGT_EVENT_INITIATOR enum Event { VS_DEALLOC = 0,