Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental

This commit is contained in:
Gliniak 2023-04-09 17:28:04 +02:00
commit 5e0c67438c
5 changed files with 99 additions and 71 deletions

View File

@ -2394,6 +2394,10 @@ void PipelineCache::CreateDxbcGeometryShader(
// to again.
// Also, FXC generates only movs (from statically or dynamically indexed
// v[#][#], from r#, or from a literal) to o# for some reason.
// emit_then_cut_stream must not be used - it crashes the shader compiler of
// AMD Software: Adrenalin Edition 23.3.2 on RDNA 3 if it's conditional (after
// a `retc` or inside an `if`), and it doesn't seem to be generated by FXC or
// DXC at all.
// Discard the whole primitive if any vertex has a NaN position (may also be
// set to NaN for emulation of vertex killing with the OR operator).
@ -2539,11 +2543,9 @@ void PipelineCache::CreateDxbcGeometryShader(
dxbc::Src::V2D(
0, input_register_clip_and_cull_distances + (j >> 2)));
}
if (i < 3) {
a.OpEmitStream(stream);
}
a.OpEmitStream(stream);
}
a.OpEmitThenCutStream(stream);
a.OpCutStream(stream);
} break;
case PipelineGeometryShader::kRectangleList: {
@ -2689,7 +2691,8 @@ void PipelineCache::CreateDxbcGeometryShader(
clip_distance_mask),
dxbc::Src::R(1));
}
a.OpEmitThenCutStream(stream);
a.OpEmitStream(stream);
a.OpCutStream(stream);
} break;
case PipelineGeometryShader::kQuadList: {
@ -2720,11 +2723,9 @@ void PipelineCache::CreateDxbcGeometryShader(
input_vertex_index,
input_register_clip_and_cull_distances + (j >> 2)));
}
if (i < 3) {
a.OpEmitStream(stream);
}
a.OpEmitStream(stream);
}
a.OpEmitThenCutStream(stream);
a.OpCutStream(stream);
} break;
default:

View File

@ -2197,6 +2197,8 @@ class Assembler {
++stat_.instruction_count;
++stat_.cut_instruction_count;
}
// Don't use emit_then_cut_stream - crashes AMD Software: Adrenalin Edition
// 23.3.2 shader compiler on RDNA 3 if used conditionally.
void OpEmitThenCutStream(const Dest& stream) {
uint32_t operands_length = stream.GetLength();
code_.reserve(code_.size() + 1 + operands_length);

View File

@ -3174,7 +3174,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
source_stencil[0] != spv::NoResult) {
// For the depth -> depth case, write the stencil directly to the output.
assert_true(mode.output == TransferOutput::kDepth);
builder.createStore(source_stencil[0], output_fragment_stencil_ref);
builder.createStore(
builder.createUnaryOp(spv::OpBitcast, type_int, source_stencil[0]),
output_fragment_stencil_ref);
}
if (dest_is_64bpp) {
@ -3518,13 +3520,15 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
if (output_fragment_stencil_ref != spv::NoResult) {
builder.createStore(
builder.createUnaryOp(
spv::OpConvertFToU, type_uint,
builder.createBinOp(
spv::OpFAdd, type_float,
builder.createBinOp(spv::OpFMul, type_float,
source_color[0][0],
unorm_scale),
unorm_round_offset)),
spv::OpBitcast, type_int,
builder.createUnaryOp(
spv::OpConvertFToU, type_uint,
builder.createBinOp(
spv::OpFAdd, type_float,
builder.createBinOp(spv::OpFMul, type_float,
source_color[0][0],
unorm_scale),
unorm_round_offset))),
output_fragment_stencil_ref);
}
}
@ -4331,6 +4335,17 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
builder.createOp(spv::OpPhi, type_float, id_vector_temp);
}
builder.createStore(fragment_depth32, output_fragment_depth);
// Unpack the stencil into the stencil reference output if needed and
// not already written.
if (!packed_only_depth &&
output_fragment_stencil_ref != spv::NoResult) {
builder.createStore(
builder.createUnaryOp(
spv::OpBitcast, type_int,
builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed,
builder.makeUintConstant(UINT8_MAX))),
output_fragment_stencil_ref);
}
}
} break;
case TransferOutput::kStencilBit: {

View File

@ -13,6 +13,7 @@
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <vector>
#include "third_party/imgui/imgui.h"

View File

@ -8,6 +8,8 @@ Run with --help or no arguments for possible commands.
"""
from __future__ import print_function
from datetime import datetime
from multiprocessing import Pool
from functools import partial
import argparse
import json
import os
@ -1209,6 +1211,62 @@ class GenTestsCommand(Command):
''',
*args, **kwargs)
def process_src_file(test_bin, ppc_as, ppc_objdump, ppc_ld, ppc_nm, src_file):
print('- %s' % src_file)
def make_unix_path(p):
"""Forces a unix path separator style, as required by binutils.
"""
return p.replace(os.sep, '/')
src_name = os.path.splitext(os.path.basename(src_file))[0]
obj_file = os.path.join(test_bin, src_name) + '.o'
shell_call([
ppc_as,
'-a32',
'-be',
'-mregnames',
'-mpower7',
'-maltivec',
'-mvsx',
'-mvmx128',
'-R',
'-o%s' % (make_unix_path(obj_file)),
make_unix_path(src_file),
])
dis_file = os.path.join(test_bin, src_name) + '.dis'
shell_call([
ppc_objdump,
'--adjust-vma=0x100000',
'-Mpower7',
'-Mvmx128',
'-D',
'-EB',
make_unix_path(obj_file),
], stdout_path=dis_file)
# Eat the first 4 lines to kill the file path that'll differ across machines.
with open(dis_file) as f:
dis_file_lines = f.readlines()
with open(dis_file, 'w') as f:
f.writelines(dis_file_lines[4:])
shell_call([
ppc_ld,
'-A powerpc:common32',
'-melf32ppc',
'-EB',
'-nostdlib',
'--oformat=binary',
'-Ttext=0x80000000',
'-e0x80000000',
'-o%s' % (make_unix_path(os.path.join(test_bin, src_name) + '.bin')),
make_unix_path(obj_file),
])
shell_call([
ppc_nm,
'--numeric-sort',
make_unix_path(obj_file),
], stdout_path=os.path.join(test_bin, src_name) + '.map')
def execute(self, args, pass_args, cwd):
print('Generating test binaries...')
print('')
@ -1232,61 +1290,12 @@ class GenTestsCommand(Command):
if (name.startswith('instr_') or name.startswith('seq_'))
and name.endswith(('.s'))]
def make_unix_path(p):
"""Forces a unix path separator style, as required by binutils.
"""
return p.replace(os.sep, '/')
any_errors = False
for src_file in src_files:
print('- %s' % src_file)
src_name = os.path.splitext(os.path.basename(src_file))[0]
obj_file = os.path.join(test_bin, src_name) + '.o'
shell_call([
ppc_as,
'-a32',
'-be',
'-mregnames',
'-mpower7',
'-maltivec',
'-mvsx',
'-mvmx128',
'-R',
'-o%s' % (make_unix_path(obj_file)),
make_unix_path(src_file),
])
dis_file = os.path.join(test_bin, src_name) + '.dis'
shell_call([
ppc_objdump,
'--adjust-vma=0x100000',
'-Mpower7',
'-Mvmx128',
'-D',
'-EB',
make_unix_path(obj_file),
], stdout_path=dis_file)
# Eat the first 4 lines to kill the file path that'll differ across machines.
with open(dis_file) as f:
dis_file_lines = f.readlines()
with open(dis_file, 'w') as f:
f.writelines(dis_file_lines[4:])
shell_call([
ppc_ld,
'-A powerpc:common32',
'-melf32ppc',
'-EB',
'-nostdlib',
'--oformat=binary',
'-Ttext=0x80000000',
'-e0x80000000',
'-o%s' % (make_unix_path(os.path.join(test_bin, src_name) + '.bin')),
make_unix_path(obj_file),
])
shell_call([
ppc_nm,
'--numeric-sort',
make_unix_path(obj_file),
], stdout_path=os.path.join(test_bin, src_name) + '.map')
pool_func = partial(GenTestsCommand.process_src_file, test_bin, ppc_as, ppc_objdump, ppc_ld, ppc_nm)
with Pool() as pool:
pool.map(pool_func, src_files)
if any_errors:
print('ERROR: failed to build one or more tests.')