From 4a2f4d9cfe28847039ccf939f68a0d486796398b Mon Sep 17 00:00:00 2001 From: Shoegzer Date: Thu, 29 Dec 2022 16:22:52 -0500 Subject: [PATCH 1/5] Add include to fix compiling --- src/xenia/ui/imgui_drawer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/ui/imgui_drawer.h b/src/xenia/ui/imgui_drawer.h index 6009ebdf1..f5057f403 100644 --- a/src/xenia/ui/imgui_drawer.h +++ b/src/xenia/ui/imgui_drawer.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "xenia/ui/immediate_drawer.h" From f357f26eaef52c6d4c9aa69a9f069c00139da9fb Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 5 Feb 2023 15:14:48 -0800 Subject: [PATCH 2/5] [Build] Add parallel PPC test generation Utilizes `multiprocessing` to allow for multiple power-pc assembly tests to be generated in parallel. Some results on my i9-11900k(8c/16t): Before: ``` Measure-Command {.\xb gentests} Days : 0 Hours : 0 Minutes : 0 Seconds : 11 Milliseconds : 200 Ticks : 112007585 TotalDays : 0.000129638408564815 TotalHours : 0.00311132180555556 TotalMinutes : 0.186679308333333 TotalSeconds : 11.2007585 TotalMilliseconds : 11200.7585 ``` After: ``` Measure-Command {.\xb gentests} Days : 0 Hours : 0 Minutes : 0 Seconds : 5 Milliseconds : 426 Ticks : 54265895 TotalDays : 6.28077488425926E-05 TotalHours : 0.00150738597222222 TotalMinutes : 0.0904431583333333 TotalSeconds : 5.4265895 TotalMilliseconds : 5426.5895 ``` This is an over **x2** speedup! --- xenia-build | 117 ++++++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/xenia-build b/xenia-build index 8985ad9de..130032323 100755 --- a/xenia-build +++ b/xenia-build @@ -8,6 +8,8 @@ Run with --help or no arguments for possible commands. """ from __future__ import print_function from datetime import datetime +from multiprocessing import Pool +from functools import partial import argparse import json import os @@ -1206,6 +1208,62 @@ class GenTestsCommand(Command): ''', *args, **kwargs) + def process_src_file(test_bin, ppc_as, ppc_objdump, ppc_ld, ppc_nm, src_file): + print('- %s' % src_file) + + def make_unix_path(p): + """Forces a unix path separator style, as required by binutils. + """ + return p.replace(os.sep, '/') + + src_name = os.path.splitext(os.path.basename(src_file))[0] + obj_file = os.path.join(test_bin, src_name) + '.o' + shell_call([ + ppc_as, + '-a32', + '-be', + '-mregnames', + '-mpower7', + '-maltivec', + '-mvsx', + '-mvmx128', + '-R', + '-o%s' % (make_unix_path(obj_file)), + make_unix_path(src_file), + ]) + dis_file = os.path.join(test_bin, src_name) + '.dis' + shell_call([ + ppc_objdump, + '--adjust-vma=0x100000', + '-Mpower7', + '-Mvmx128', + '-D', + '-EB', + make_unix_path(obj_file), + ], stdout_path=dis_file) + # Eat the first 4 lines to kill the file path that'll differ across machines. + with open(dis_file) as f: + dis_file_lines = f.readlines() + with open(dis_file, 'w') as f: + f.writelines(dis_file_lines[4:]) + shell_call([ + ppc_ld, + '-A powerpc:common32', + '-melf32ppc', + '-EB', + '-nostdlib', + '--oformat=binary', + '-Ttext=0x80000000', + '-e0x80000000', + '-o%s' % (make_unix_path(os.path.join(test_bin, src_name) + '.bin')), + make_unix_path(obj_file), + ]) + shell_call([ + ppc_nm, + '--numeric-sort', + make_unix_path(obj_file), + ], stdout_path=os.path.join(test_bin, src_name) + '.map') + def execute(self, args, pass_args, cwd): print('Generating test binaries...') print('') @@ -1229,61 +1287,12 @@ class GenTestsCommand(Command): if (name.startswith('instr_') or name.startswith('seq_')) and name.endswith(('.s'))] - def make_unix_path(p): - """Forces a unix path separator style, as required by binutils. - """ - return p.replace(os.sep, '/') - any_errors = False - for src_file in src_files: - print('- %s' % src_file) - src_name = os.path.splitext(os.path.basename(src_file))[0] - obj_file = os.path.join(test_bin, src_name) + '.o' - shell_call([ - ppc_as, - '-a32', - '-be', - '-mregnames', - '-mpower7', - '-maltivec', - '-mvsx', - '-mvmx128', - '-R', - '-o%s' % (make_unix_path(obj_file)), - make_unix_path(src_file), - ]) - dis_file = os.path.join(test_bin, src_name) + '.dis' - shell_call([ - ppc_objdump, - '--adjust-vma=0x100000', - '-Mpower7', - '-Mvmx128', - '-D', - '-EB', - make_unix_path(obj_file), - ], stdout_path=dis_file) - # Eat the first 4 lines to kill the file path that'll differ across machines. - with open(dis_file) as f: - dis_file_lines = f.readlines() - with open(dis_file, 'w') as f: - f.writelines(dis_file_lines[4:]) - shell_call([ - ppc_ld, - '-A powerpc:common32', - '-melf32ppc', - '-EB', - '-nostdlib', - '--oformat=binary', - '-Ttext=0x80000000', - '-e0x80000000', - '-o%s' % (make_unix_path(os.path.join(test_bin, src_name) + '.bin')), - make_unix_path(obj_file), - ]) - shell_call([ - ppc_nm, - '--numeric-sort', - make_unix_path(obj_file), - ], stdout_path=os.path.join(test_bin, src_name) + '.map') + + pool_func = partial(GenTestsCommand.process_src_file, test_bin, ppc_as, ppc_objdump, ppc_ld, ppc_nm) + with Pool() as pool: + pool.map(pool_func, src_files) + if any_errors: print('ERROR: failed to build one or more tests.') From c238d8af550c457ac61a4ebef6888b3895802dbb Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 30 Mar 2023 22:28:56 +0300 Subject: [PATCH 3/5] [Vulkan] Fix FragStencilRef store type --- .../gpu/vulkan/vulkan_render_target_cache.cc | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 8113827e5..ecd4b7fd1 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -3174,7 +3174,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( source_stencil[0] != spv::NoResult) { // For the depth -> depth case, write the stencil directly to the output. assert_true(mode.output == TransferOutput::kDepth); - builder.createStore(source_stencil[0], output_fragment_stencil_ref); + builder.createStore(builder.createUnaryOp(spv::OpBitcast, type_int, + source_stencil[0]), + output_fragment_stencil_ref); } if (dest_is_64bpp) { @@ -3518,13 +3520,15 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( if (output_fragment_stencil_ref != spv::NoResult) { builder.createStore( builder.createUnaryOp( - spv::OpConvertFToU, type_uint, - builder.createBinOp( - spv::OpFAdd, type_float, - builder.createBinOp(spv::OpFMul, type_float, - source_color[0][0], - unorm_scale), - unorm_round_offset)), + spv::OpBitcast, type_int, + builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[0][0], + unorm_scale), + unorm_round_offset))), output_fragment_stencil_ref); } } From baa2ff78d8f44883b0d70ab89fd9b4ee53a45e80 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 30 Mar 2023 22:40:40 +0300 Subject: [PATCH 4/5] [Vulkan] Add missing stencil reference unpack in RT transfer + formatting fix --- .../gpu/vulkan/vulkan_render_target_cache.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index ecd4b7fd1..fc9a7bb79 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -3174,9 +3174,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( source_stencil[0] != spv::NoResult) { // For the depth -> depth case, write the stencil directly to the output. assert_true(mode.output == TransferOutput::kDepth); - builder.createStore(builder.createUnaryOp(spv::OpBitcast, type_int, - source_stencil[0]), - output_fragment_stencil_ref); + builder.createStore( + builder.createUnaryOp(spv::OpBitcast, type_int, source_stencil[0]), + output_fragment_stencil_ref); } if (dest_is_64bpp) { @@ -4335,6 +4335,17 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( builder.createOp(spv::OpPhi, type_float, id_vector_temp); } builder.createStore(fragment_depth32, output_fragment_depth); + // Unpack the stencil into the stencil reference output if needed and + // not already written. + if (!packed_only_depth && + output_fragment_stencil_ref != spv::NoResult) { + builder.createStore( + builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, + builder.makeUintConstant(UINT8_MAX))), + output_fragment_stencil_ref); + } } } break; case TransferOutput::kStencilBit: { From 88c645d8182d517364d6f3884c1d4739f0664320 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 9 Apr 2023 18:07:44 +0300 Subject: [PATCH 5/5] [D3D12] Don't use emit_then_cut due to RDNA 3 crash --- src/xenia/gpu/d3d12/pipeline_cache.cc | 19 ++++++++++--------- src/xenia/gpu/dxbc.h | 2 ++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 91ac56a91..83b195c1e 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -2390,6 +2390,10 @@ void PipelineCache::CreateDxbcGeometryShader( // to again. // Also, FXC generates only movs (from statically or dynamically indexed // v[#][#], from r#, or from a literal) to o# for some reason. + // emit_then_cut_stream must not be used - it crashes the shader compiler of + // AMD Software: Adrenalin Edition 23.3.2 on RDNA 3 if it's conditional (after + // a `retc` or inside an `if`), and it doesn't seem to be generated by FXC or + // DXC at all. // Discard the whole primitive if any vertex has a NaN position (may also be // set to NaN for emulation of vertex killing with the OR operator). @@ -2535,11 +2539,9 @@ void PipelineCache::CreateDxbcGeometryShader( dxbc::Src::V2D( 0, input_register_clip_and_cull_distances + (j >> 2))); } - if (i < 3) { - a.OpEmitStream(stream); - } + a.OpEmitStream(stream); } - a.OpEmitThenCutStream(stream); + a.OpCutStream(stream); } break; case PipelineGeometryShader::kRectangleList: { @@ -2685,7 +2687,8 @@ void PipelineCache::CreateDxbcGeometryShader( clip_distance_mask), dxbc::Src::R(1)); } - a.OpEmitThenCutStream(stream); + a.OpEmitStream(stream); + a.OpCutStream(stream); } break; case PipelineGeometryShader::kQuadList: { @@ -2716,11 +2719,9 @@ void PipelineCache::CreateDxbcGeometryShader( input_vertex_index, input_register_clip_and_cull_distances + (j >> 2))); } - if (i < 3) { - a.OpEmitStream(stream); - } + a.OpEmitStream(stream); } - a.OpEmitThenCutStream(stream); + a.OpCutStream(stream); } break; default: diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index 2c9f5eeab..42d8d89d8 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -2196,6 +2196,8 @@ class Assembler { ++stat_.instruction_count; ++stat_.cut_instruction_count; } + // Don't use emit_then_cut_stream - crashes AMD Software: Adrenalin Edition + // 23.3.2 shader compiler on RDNA 3 if used conditionally. void OpEmitThenCutStream(const Dest& stream) { uint32_t operands_length = stream.GetLength(); code_.reserve(code_.size() + 1 + operands_length);