diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 29501b299..6defad83a 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -2394,6 +2394,10 @@ void PipelineCache::CreateDxbcGeometryShader( // to again. // Also, FXC generates only movs (from statically or dynamically indexed // v[#][#], from r#, or from a literal) to o# for some reason. + // emit_then_cut_stream must not be used - it crashes the shader compiler of + // AMD Software: Adrenalin Edition 23.3.2 on RDNA 3 if it's conditional (after + // a `retc` or inside an `if`), and it doesn't seem to be generated by FXC or + // DXC at all. // Discard the whole primitive if any vertex has a NaN position (may also be // set to NaN for emulation of vertex killing with the OR operator). @@ -2539,11 +2543,9 @@ void PipelineCache::CreateDxbcGeometryShader( dxbc::Src::V2D( 0, input_register_clip_and_cull_distances + (j >> 2))); } - if (i < 3) { - a.OpEmitStream(stream); - } + a.OpEmitStream(stream); } - a.OpEmitThenCutStream(stream); + a.OpCutStream(stream); } break; case PipelineGeometryShader::kRectangleList: { @@ -2689,7 +2691,8 @@ void PipelineCache::CreateDxbcGeometryShader( clip_distance_mask), dxbc::Src::R(1)); } - a.OpEmitThenCutStream(stream); + a.OpEmitStream(stream); + a.OpCutStream(stream); } break; case PipelineGeometryShader::kQuadList: { @@ -2720,11 +2723,9 @@ void PipelineCache::CreateDxbcGeometryShader( input_vertex_index, input_register_clip_and_cull_distances + (j >> 2))); } - if (i < 3) { - a.OpEmitStream(stream); - } + a.OpEmitStream(stream); } - a.OpEmitThenCutStream(stream); + a.OpCutStream(stream); } break; default: diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index a39989645..ea44abe46 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -2197,6 +2197,8 @@ class Assembler { ++stat_.instruction_count; ++stat_.cut_instruction_count; } + // Don't use emit_then_cut_stream - crashes AMD Software: Adrenalin Edition + // 23.3.2 shader compiler on RDNA 3 if used conditionally. void OpEmitThenCutStream(const Dest& stream) { uint32_t operands_length = stream.GetLength(); code_.reserve(code_.size() + 1 + operands_length); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 75aac0dcb..5fe2181b0 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -3174,7 +3174,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( source_stencil[0] != spv::NoResult) { // For the depth -> depth case, write the stencil directly to the output. assert_true(mode.output == TransferOutput::kDepth); - builder.createStore(source_stencil[0], output_fragment_stencil_ref); + builder.createStore( + builder.createUnaryOp(spv::OpBitcast, type_int, source_stencil[0]), + output_fragment_stencil_ref); } if (dest_is_64bpp) { @@ -3518,13 +3520,15 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( if (output_fragment_stencil_ref != spv::NoResult) { builder.createStore( builder.createUnaryOp( - spv::OpConvertFToU, type_uint, - builder.createBinOp( - spv::OpFAdd, type_float, - builder.createBinOp(spv::OpFMul, type_float, - source_color[0][0], - unorm_scale), - unorm_round_offset)), + spv::OpBitcast, type_int, + builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[0][0], + unorm_scale), + unorm_round_offset))), output_fragment_stencil_ref); } } @@ -4331,6 +4335,17 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( builder.createOp(spv::OpPhi, type_float, id_vector_temp); } builder.createStore(fragment_depth32, output_fragment_depth); + // Unpack the stencil into the stencil reference output if needed and + // not already written. + if (!packed_only_depth && + output_fragment_stencil_ref != spv::NoResult) { + builder.createStore( + builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, + builder.makeUintConstant(UINT8_MAX))), + output_fragment_stencil_ref); + } } } break; case TransferOutput::kStencilBit: { diff --git a/src/xenia/ui/imgui_drawer.h b/src/xenia/ui/imgui_drawer.h index 3cabf91cf..550a4c515 100644 --- a/src/xenia/ui/imgui_drawer.h +++ b/src/xenia/ui/imgui_drawer.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "third_party/imgui/imgui.h" diff --git a/xenia-build b/xenia-build index 178e1008d..0bdbd2004 100755 --- a/xenia-build +++ b/xenia-build @@ -8,6 +8,8 @@ Run with --help or no arguments for possible commands. """ from __future__ import print_function from datetime import datetime +from multiprocessing import Pool +from functools import partial import argparse import json import os @@ -1209,6 +1211,62 @@ class GenTestsCommand(Command): ''', *args, **kwargs) + def process_src_file(test_bin, ppc_as, ppc_objdump, ppc_ld, ppc_nm, src_file): + print('- %s' % src_file) + + def make_unix_path(p): + """Forces a unix path separator style, as required by binutils. + """ + return p.replace(os.sep, '/') + + src_name = os.path.splitext(os.path.basename(src_file))[0] + obj_file = os.path.join(test_bin, src_name) + '.o' + shell_call([ + ppc_as, + '-a32', + '-be', + '-mregnames', + '-mpower7', + '-maltivec', + '-mvsx', + '-mvmx128', + '-R', + '-o%s' % (make_unix_path(obj_file)), + make_unix_path(src_file), + ]) + dis_file = os.path.join(test_bin, src_name) + '.dis' + shell_call([ + ppc_objdump, + '--adjust-vma=0x100000', + '-Mpower7', + '-Mvmx128', + '-D', + '-EB', + make_unix_path(obj_file), + ], stdout_path=dis_file) + # Eat the first 4 lines to kill the file path that'll differ across machines. + with open(dis_file) as f: + dis_file_lines = f.readlines() + with open(dis_file, 'w') as f: + f.writelines(dis_file_lines[4:]) + shell_call([ + ppc_ld, + '-A powerpc:common32', + '-melf32ppc', + '-EB', + '-nostdlib', + '--oformat=binary', + '-Ttext=0x80000000', + '-e0x80000000', + '-o%s' % (make_unix_path(os.path.join(test_bin, src_name) + '.bin')), + make_unix_path(obj_file), + ]) + shell_call([ + ppc_nm, + '--numeric-sort', + make_unix_path(obj_file), + ], stdout_path=os.path.join(test_bin, src_name) + '.map') + def execute(self, args, pass_args, cwd): print('Generating test binaries...') print('') @@ -1232,61 +1290,12 @@ class GenTestsCommand(Command): if (name.startswith('instr_') or name.startswith('seq_')) and name.endswith(('.s'))] - def make_unix_path(p): - """Forces a unix path separator style, as required by binutils. - """ - return p.replace(os.sep, '/') - any_errors = False - for src_file in src_files: - print('- %s' % src_file) - src_name = os.path.splitext(os.path.basename(src_file))[0] - obj_file = os.path.join(test_bin, src_name) + '.o' - shell_call([ - ppc_as, - '-a32', - '-be', - '-mregnames', - '-mpower7', - '-maltivec', - '-mvsx', - '-mvmx128', - '-R', - '-o%s' % (make_unix_path(obj_file)), - make_unix_path(src_file), - ]) - dis_file = os.path.join(test_bin, src_name) + '.dis' - shell_call([ - ppc_objdump, - '--adjust-vma=0x100000', - '-Mpower7', - '-Mvmx128', - '-D', - '-EB', - make_unix_path(obj_file), - ], stdout_path=dis_file) - # Eat the first 4 lines to kill the file path that'll differ across machines. - with open(dis_file) as f: - dis_file_lines = f.readlines() - with open(dis_file, 'w') as f: - f.writelines(dis_file_lines[4:]) - shell_call([ - ppc_ld, - '-A powerpc:common32', - '-melf32ppc', - '-EB', - '-nostdlib', - '--oformat=binary', - '-Ttext=0x80000000', - '-e0x80000000', - '-o%s' % (make_unix_path(os.path.join(test_bin, src_name) + '.bin')), - make_unix_path(obj_file), - ]) - shell_call([ - ppc_nm, - '--numeric-sort', - make_unix_path(obj_file), - ], stdout_path=os.path.join(test_bin, src_name) + '.map') + + pool_func = partial(GenTestsCommand.process_src_file, test_bin, ppc_as, ppc_objdump, ppc_ld, ppc_nm) + with Pool() as pool: + pool.map(pool_func, src_files) + if any_errors: print('ERROR: failed to build one or more tests.')