[Code] Remove game names from code comments (most of at least)
This commit is contained in:
parent
6986d6c7e8
commit
e720e0a540
|
@ -811,12 +811,13 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
|
||||||
// Loop through every sample, convert and drop it into the output array.
|
// Loop through every sample, convert and drop it into the output array.
|
||||||
// If more than one channel, we need to interleave the samples from each
|
// If more than one channel, we need to interleave the samples from each
|
||||||
// channel next to each other. Always saturate because FFmpeg output is
|
// channel next to each other. Always saturate because FFmpeg output is
|
||||||
// not limited to [-1, 1] (for example 1.095 as seen in RDR)
|
// not limited to [-1, 1] (for example 1.095 as seen in 5454082B).
|
||||||
constexpr float scale = (1 << 15) - 1;
|
constexpr float scale = (1 << 15) - 1;
|
||||||
auto out = reinterpret_cast<int16_t*>(output_buffer);
|
auto out = reinterpret_cast<int16_t*>(output_buffer);
|
||||||
|
|
||||||
// For testing of vectorized versions, stereo audio is common in Halo 3, since
|
// For testing of vectorized versions, stereo audio is common in 4D5307E6,
|
||||||
// the first menu frame; the intro cutscene also has more than 2 channels.
|
// since the first menu frame; the intro cutscene also has more than 2
|
||||||
|
// channels.
|
||||||
#if XE_ARCH_AMD64
|
#if XE_ARCH_AMD64
|
||||||
static_assert(kSamplesPerFrame % 8 == 0);
|
static_assert(kSamplesPerFrame % 8 == 0);
|
||||||
const auto in_channel_0 = reinterpret_cast<const float*>(samples[0]);
|
const auto in_channel_0 = reinterpret_cast<const float*>(samples[0]);
|
||||||
|
|
|
@ -1862,8 +1862,8 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
||||||
src = i.src1;
|
src = i.src1;
|
||||||
}
|
}
|
||||||
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
|
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
|
||||||
// are valid - max before min to pack NaN as zero (Red Dead Redemption is
|
// are valid - max before min to pack NaN as zero (5454082B is heavily
|
||||||
// heavily affected by the order - packs 0xFFFFFFFF in matrix code to get 0
|
// affected by the order - packs 0xFFFFFFFF in matrix code to get a 0
|
||||||
// constant).
|
// constant).
|
||||||
e.vmaxps(i.dest, src, e.GetXmmConstPtr(XMM3333));
|
e.vmaxps(i.dest, src, e.GetXmmConstPtr(XMM3333));
|
||||||
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
|
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
|
||||||
|
|
|
@ -2069,7 +2069,8 @@ int InstrEmit_vpkd3d128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
v = f.Pack(v, PACK_TYPE_FLOAT16_4);
|
v = f.Pack(v, PACK_TYPE_FLOAT16_4);
|
||||||
break;
|
break;
|
||||||
case 6: // VPACK_NORMPACKED64 4_20_20_20 w_z_y_x
|
case 6: // VPACK_NORMPACKED64 4_20_20_20 w_z_y_x
|
||||||
// Used in 2K games like NBA 2K9, pretty rarely in general.
|
// Used in 54540829 and other installments in the series, pretty rarely in
|
||||||
|
// general.
|
||||||
v = f.Pack(v, PACK_TYPE_ULONG_4202020);
|
v = f.Pack(v, PACK_TYPE_ULONG_4202020);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -738,7 +738,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4_WAIT_FOR_IDLE: {
|
case PM4_WAIT_FOR_IDLE: {
|
||||||
// This opcode is used by "Duke Nukem Forever" while going/being ingame
|
// This opcode is used by 5454084E while going / being ingame.
|
||||||
assert_true(count == 1);
|
assert_true(count == 1);
|
||||||
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
||||||
XELOGGPU("GPU wait for idle = {:08X}", value);
|
XELOGGPU("GPU wait for idle = {:08X}", value);
|
||||||
|
@ -1168,7 +1168,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
|
||||||
// and used to detect a finished query.
|
// and used to detect a finished query.
|
||||||
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
|
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
|
||||||
pSampleCounts->ZPass_B == kQueryFinished;
|
pSampleCounts->ZPass_B == kQueryFinished;
|
||||||
// Older versions of D3D also checks for ZFail (First Gears of War)
|
// Older versions of D3D also checks for ZFail (4D5307D5).
|
||||||
bool is_end_via_z_fail = pSampleCounts->ZFail_A == kQueryFinished &&
|
bool is_end_via_z_fail = pSampleCounts->ZFail_A == kQueryFinished &&
|
||||||
pSampleCounts->ZFail_B == kQueryFinished;
|
pSampleCounts->ZFail_B == kQueryFinished;
|
||||||
std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts));
|
std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts));
|
||||||
|
|
|
@ -1662,7 +1662,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
||||||
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
|
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
|
||||||
for (uint32_t i = 0; i < 256; ++i) {
|
for (uint32_t i = 0; i < 256; ++i) {
|
||||||
uint32_t value = gamma_ramp_.normal[i].value;
|
uint32_t value = gamma_ramp_.normal[i].value;
|
||||||
// Swap red and blue (Project Sylpheed has settings allowing separate
|
// Swap red and blue (535107D4 has settings allowing separate
|
||||||
// configuration).
|
// configuration).
|
||||||
mapping[i] = ((value & 1023) << 20) | (value & (1023 << 10)) |
|
mapping[i] = ((value & 1023) << 20) | (value & (1023 << 10)) |
|
||||||
((value >> 20) & 1023);
|
((value >> 20) & 1023);
|
||||||
|
@ -2076,7 +2076,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
||||||
memexport_stream.index_count * memexport_format_size;
|
memexport_stream.index_count * memexport_format_size;
|
||||||
// Try to reduce the number of shared memory operations when writing
|
// Try to reduce the number of shared memory operations when writing
|
||||||
// different elements into the same buffer through different exports
|
// different elements into the same buffer through different exports
|
||||||
// (happens in Halo 3).
|
// (happens in 4D5307E6).
|
||||||
bool memexport_range_reused = false;
|
bool memexport_range_reused = false;
|
||||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||||
MemExportRange& memexport_range = memexport_ranges[i];
|
MemExportRange& memexport_range = memexport_ranges[i];
|
||||||
|
@ -2878,8 +2878,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
// Get the color info register values for each render target. Also, for ROV,
|
// Get the color info register values for each render target. Also, for ROV,
|
||||||
// exclude components that don't exist in the format from the write mask.
|
// exclude components that don't exist in the format from the write mask.
|
||||||
// Don't exclude fully overlapping render targets, however - two render
|
// Don't exclude fully overlapping render targets, however - two render
|
||||||
// targets with the same base address are used in the lighting pass of Halo 3,
|
// targets with the same base address are used in the lighting pass of
|
||||||
// for example, with the needed one picked with dynamic control flow.
|
// 4D5307E6, for example, with the needed one picked with dynamic control
|
||||||
|
// flow.
|
||||||
reg::RB_COLOR_INFO color_infos[4];
|
reg::RB_COLOR_INFO color_infos[4];
|
||||||
float rt_clamp[4][4];
|
float rt_clamp[4][4];
|
||||||
uint32_t rt_keep_masks[4][2];
|
uint32_t rt_keep_masks[4][2];
|
||||||
|
@ -2898,8 +2899,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable depth and stencil if it aliases a color render target (for
|
// Disable depth and stencil if it aliases a color render target (for
|
||||||
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
|
// instance, during the XBLA logo in 58410954, though depth writing is already
|
||||||
// already disabled there).
|
// disabled there).
|
||||||
bool depth_stencil_enabled =
|
bool depth_stencil_enabled =
|
||||||
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
||||||
if (edram_rov_used && depth_stencil_enabled) {
|
if (edram_rov_used && depth_stencil_enabled) {
|
||||||
|
|
|
@ -83,9 +83,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
// Gets the current color write mask, taking the pixel shader's write mask
|
// Gets the current color write mask, taking the pixel shader's write mask
|
||||||
// into account. If a shader doesn't write to a render target, it shouldn't be
|
// into account. If a shader doesn't write to a render target, it shouldn't be
|
||||||
// written to and it shouldn't be even bound - otherwise, in Halo 3, one
|
// written to and it shouldn't be even bound - otherwise, in 4D5307E6, one
|
||||||
// render target is being destroyed by a shader not writing anything, and in
|
// render target is being destroyed by a shader not writing anything, and in
|
||||||
// Banjo-Tooie, the result of clearing the top tile is being ignored because
|
// 58410955, the result of clearing the top tile is being ignored because
|
||||||
// there are 4 render targets bound with the same EDRAM base (clearly not
|
// there are 4 render targets bound with the same EDRAM base (clearly not
|
||||||
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
|
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
|
||||||
// conflict with each other.
|
// conflict with each other.
|
||||||
|
|
|
@ -3619,7 +3619,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
||||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||||
// Round to the nearest even integer. This seems to be the correct,
|
// Round to the nearest even integer. This seems to be the correct,
|
||||||
// adding +0.5 and rounding towards zero results in red instead of
|
// adding +0.5 and rounding towards zero results in red instead of
|
||||||
// black in GTA IV and Halo 3 clear shaders.
|
// black in the 4D5307E6 clear shader.
|
||||||
a.OpMul(dxbc::Dest::R(i, 0b1000), dxbc::Src::R(i, dxbc::Src::kWWWW),
|
a.OpMul(dxbc::Dest::R(i, 0b1000), dxbc::Src::R(i, dxbc::Src::kWWWW),
|
||||||
dxbc::Src::LF(float(0xFFFFFF)));
|
dxbc::Src::LF(float(0xFFFFFF)));
|
||||||
a.OpRoundNE(dxbc::Dest::R(i, 0b1000),
|
a.OpRoundNE(dxbc::Dest::R(i, 0b1000),
|
||||||
|
@ -3804,7 +3804,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
||||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||||
// Round to the nearest even integer. This seems to be the correct,
|
// Round to the nearest even integer. This seems to be the correct,
|
||||||
// adding +0.5 and rounding towards zero results in red instead of
|
// adding +0.5 and rounding towards zero results in red instead of
|
||||||
// black in GTA IV and Halo 3 clear shaders.
|
// black in the 4D5307E6 clear shader.
|
||||||
a.OpMul(dxbc::Dest::R(1, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW),
|
a.OpMul(dxbc::Dest::R(1, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW),
|
||||||
dxbc::Src::LF(float(0xFFFFFF)));
|
dxbc::Src::LF(float(0xFFFFFF)));
|
||||||
a.OpRoundNE(dxbc::Dest::R(1, 0b1000),
|
a.OpRoundNE(dxbc::Dest::R(1, 0b1000),
|
||||||
|
@ -4181,7 +4181,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
||||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||||
// Round to the nearest even integer. This seems to be the
|
// Round to the nearest even integer. This seems to be the
|
||||||
// correct, adding +0.5 and rounding towards zero results in red
|
// correct, adding +0.5 and rounding towards zero results in red
|
||||||
// instead of black in GTA IV and Halo 3 clear shaders.
|
// instead of black in the 4D5307E6 clear shader.
|
||||||
a.OpMul(dxbc::Dest::R(0, 0b0010),
|
a.OpMul(dxbc::Dest::R(0, 0b0010),
|
||||||
dxbc::Src::R(0, dxbc::Src::kXXXX),
|
dxbc::Src::R(0, dxbc::Src::kXXXX),
|
||||||
dxbc::Src::LF(float(0xFFFFFF)));
|
dxbc::Src::LF(float(0xFFFFFF)));
|
||||||
|
@ -6228,7 +6228,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
||||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||||
// Round to the nearest even integer. This seems to be the correct,
|
// Round to the nearest even integer. This seems to be the correct,
|
||||||
// adding +0.5 and rounding towards zero results in red instead of
|
// adding +0.5 and rounding towards zero results in red instead of
|
||||||
// black in GTA IV and Halo 3 clear shaders.
|
// black in the 4D5307E6 clear shader.
|
||||||
a.OpMul(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
a.OpMul(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||||
dxbc::Src::LF(float(0xFFFFFF)));
|
dxbc::Src::LF(float(0xFFFFFF)));
|
||||||
a.OpRoundNE(dxbc::Dest::R(1, 0b0001),
|
a.OpRoundNE(dxbc::Dest::R(1, 0b0001),
|
||||||
|
|
|
@ -1567,7 +1567,8 @@ bool PipelineCache::GetCurrentStateDescription(
|
||||||
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
|
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
|
||||||
};
|
};
|
||||||
// Like kBlendFactorMap, but with color modes changed to alpha. Some
|
// Like kBlendFactorMap, but with color modes changed to alpha. Some
|
||||||
// pipelines aren't created in Prey because a color mode is used for alpha.
|
// pipelines aren't created in 545407E0 because a color mode is used for
|
||||||
|
// alpha.
|
||||||
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
|
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
|
||||||
/* 0 */ PipelineBlendFactor::kZero,
|
/* 0 */ PipelineBlendFactor::kZero,
|
||||||
/* 1 */ PipelineBlendFactor::kOne,
|
/* 1 */ PipelineBlendFactor::kOne,
|
||||||
|
@ -1599,7 +1600,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
||||||
// have their sample count matching the one set in the pipeline - however if
|
// have their sample count matching the one set in the pipeline - however if
|
||||||
// we set NumRenderTargets to 0 and also disable depth / stencil, the sample
|
// we set NumRenderTargets to 0 and also disable depth / stencil, the sample
|
||||||
// count must be set to 1 - while the command list may still have
|
// count must be set to 1 - while the command list may still have
|
||||||
// multisampled render targets bound (happens in Halo 3 main menu).
|
// multisampled render targets bound (happens in 4D5307E6 main menu).
|
||||||
// TODO(Triang3l): Investigate interaction of OMSetRenderTargets with
|
// TODO(Triang3l): Investigate interaction of OMSetRenderTargets with
|
||||||
// non-null depth and DSVFormat DXGI_FORMAT_UNKNOWN in the same case.
|
// non-null depth and DSVFormat DXGI_FORMAT_UNKNOWN in the same case.
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
@ -2005,7 +2006,7 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
||||||
state_desc.BlendState.RenderTarget[i];
|
state_desc.BlendState.RenderTarget[i];
|
||||||
// Treat 1 * src + 0 * dest as disabled blending (there are opaque
|
// Treat 1 * src + 0 * dest as disabled blending (there are opaque
|
||||||
// surfaces drawn with blending enabled, but it's 1 * src + 0 * dest, in
|
// surfaces drawn with blending enabled, but it's 1 * src + 0 * dest, in
|
||||||
// Call of Duty 4 - GPU performance is better when not blending.
|
// 415607E6 - GPU performance is better when not blending.
|
||||||
if (rt.src_blend != PipelineBlendFactor::kOne ||
|
if (rt.src_blend != PipelineBlendFactor::kOne ||
|
||||||
rt.dest_blend != PipelineBlendFactor::kZero ||
|
rt.dest_blend != PipelineBlendFactor::kZero ||
|
||||||
rt.blend_op != xenos::BlendOp::kAdd ||
|
rt.blend_op != xenos::BlendOp::kAdd ||
|
||||||
|
|
|
@ -121,8 +121,8 @@ namespace shaders {
|
||||||
// components of operands in shaders.
|
// components of operands in shaders.
|
||||||
// For DXT3A and DXT5A, RRRR swizzle is specified in:
|
// For DXT3A and DXT5A, RRRR swizzle is specified in:
|
||||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||||
// Halo 3 also expects replicated components in k_8 sprites.
|
// 4D5307E6 also expects replicated components in k_8 sprites.
|
||||||
// DXN is read as RG in Halo 3, but as RA in Call of Duty.
|
// DXN is read as RG in 4D5307E6, but as RA in 415607E6.
|
||||||
// TODO(Triang3l): Find out the correct contents of unused texture components.
|
// TODO(Triang3l): Find out the correct contents of unused texture components.
|
||||||
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||||
// k_1_REVERSE
|
// k_1_REVERSE
|
||||||
|
@ -250,9 +250,9 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||||
LoadMode::kUnknown,
|
LoadMode::kUnknown,
|
||||||
{2, 1, 0, 3}},
|
{2, 1, 0, 3}},
|
||||||
// k_Y1_Cr_Y0_Cb_REP
|
// k_Y1_Cr_Y0_Cb_REP
|
||||||
// Used for videos in NBA 2K9. Red and blue must be swapped.
|
// Used for videos in 54540829. Red and blue must be swapped.
|
||||||
// TODO(Triang3l): D3DFMT_G8R8_G8B8 is DXGI_FORMAT_R8G8_B8G8_UNORM * 255.0f,
|
// TODO(Triang3l): D3DFMT_G8R8_G8B8 is DXGI_FORMAT_R8G8_B8G8_UNORM * 255.0f,
|
||||||
// watch out for num_format int, division in shaders, etc., in NBA 2K9 it
|
// watch out for num_format int, division in shaders, etc., in 54540829 it
|
||||||
// works as is. Also need to decompress if the size is uneven, but should be
|
// works as is. Also need to decompress if the size is uneven, but should be
|
||||||
// a very rare case.
|
// a very rare case.
|
||||||
{DXGI_FORMAT_R8G8_B8G8_UNORM,
|
{DXGI_FORMAT_R8G8_B8G8_UNORM,
|
||||||
|
@ -1309,7 +1309,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
||||||
// Clear the bindings not only for this draw call, but entirely, because
|
// Clear the bindings not only for this draw call, but entirely, because
|
||||||
// loading may be needed in some draw call later, which may have the same
|
// loading may be needed in some draw call later, which may have the same
|
||||||
// key for some binding as before the invalidation, but texture_invalidated_
|
// key for some binding as before the invalidation, but texture_invalidated_
|
||||||
// being false (menu background in Halo 3).
|
// being false (menu background in 4D5307E6).
|
||||||
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
|
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
|
||||||
texture_bindings_[i].Clear();
|
texture_bindings_[i].Clear();
|
||||||
}
|
}
|
||||||
|
|
|
@ -418,7 +418,7 @@ class TextureCache {
|
||||||
// Uncompression info for when the regular host format for this texture is
|
// Uncompression info for when the regular host format for this texture is
|
||||||
// block-compressed, but the size is not block-aligned, and thus such
|
// block-compressed, but the size is not block-aligned, and thus such
|
||||||
// texture cannot be created in Direct3D on PC and needs decompression,
|
// texture cannot be created in Direct3D on PC and needs decompression,
|
||||||
// however, such textures are common, for instance, in Halo 3. This only
|
// however, such textures are common, for instance, in 4D5307E6. This only
|
||||||
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
|
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
|
||||||
// used for DXN and DXT5A.
|
// used for DXN and DXT5A.
|
||||||
DXGI_FORMAT dxgi_format_uncompressed;
|
DXGI_FORMAT dxgi_format_uncompressed;
|
||||||
|
|
|
@ -24,12 +24,13 @@
|
||||||
#include "xenia/gpu/texture_util.h"
|
#include "xenia/gpu/texture_util.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
|
|
||||||
|
// Very prominent in 545407F2.
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
resolve_resolution_scale_duplicate_second_pixel, true,
|
resolve_resolution_scale_duplicate_second_pixel, true,
|
||||||
"When using resolution scale, apply the hack that duplicates the "
|
"When using resolution scale, apply the hack that duplicates the "
|
||||||
"right/lower host pixel in the left and top sides of render target resolve "
|
"right/lower host pixel in the left and top sides of render target resolve "
|
||||||
"areas to eliminate the gap caused by half-pixel offset (this is necessary "
|
"areas to eliminate the gap caused by half-pixel offset (this is necessary "
|
||||||
"for certain games like GTA IV to work).",
|
"for certain games to display the scene graphics).",
|
||||||
"GPU");
|
"GPU");
|
||||||
|
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
|
@ -952,11 +953,11 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
||||||
dest_dimension = xenos::DataDimension::k2DOrStacked;
|
dest_dimension = xenos::DataDimension::k2DOrStacked;
|
||||||
// RB_COPY_DEST_PITCH::copy_dest_height is the real texture height used
|
// RB_COPY_DEST_PITCH::copy_dest_height is the real texture height used
|
||||||
// for 3D texture pitch, it's not relative to 0,0 of the coordinate space
|
// for 3D texture pitch, it's not relative to 0,0 of the coordinate space
|
||||||
// (in Halo 3, the sniper rifle scope has copy_dest_height of 192, but the
|
// (in 4D5307E6, the sniper rifle scope has copy_dest_height of 192, but
|
||||||
// rectangle's Y is 64...256) - provide the real height of the rectangle
|
// the rectangle's Y is 64...256) - provide the real height of the
|
||||||
// since 32x32 tiles are stored linearly anyway. In addition, the height
|
// rectangle since 32x32 tiles are stored linearly anyway. In addition,
|
||||||
// in RB_COPY_DEST_PITCH may be larger than needed - in Red Dead
|
// the height in RB_COPY_DEST_PITCH may be larger than needed - in
|
||||||
// Redemption, a UI texture for the letterbox bars alpha is located within
|
// 5454082B, a UI texture for the letterbox bars alpha is located within
|
||||||
// the range of a 1280x720 resolve target, so with resolution scaling it's
|
// the range of a 1280x720 resolve target, so with resolution scaling it's
|
||||||
// also wrongly detected as scaled, while only 1280x208 is being resolved.
|
// also wrongly detected as scaled, while only 1280x208 is being resolved.
|
||||||
dest_height = uint32_t(y1 - y0);
|
dest_height = uint32_t(y1 - y0);
|
||||||
|
|
|
@ -67,7 +67,7 @@ constexpr bool IsPrimitivePolygonal(bool vgt_output_path_is_tessellation_enable,
|
||||||
// TODO(Triang3l): Investigate how kRectangleList should be treated - possibly
|
// TODO(Triang3l): Investigate how kRectangleList should be treated - possibly
|
||||||
// actually drawn as two polygons on the console, however, the current
|
// actually drawn as two polygons on the console, however, the current
|
||||||
// geometry shader doesn't care about the winding order - allowing backface
|
// geometry shader doesn't care about the winding order - allowing backface
|
||||||
// culling for rectangles currently breaks Gears of War 2.
|
// culling for rectangles currently breaks 4D53082D.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,10 +112,10 @@ constexpr float GetD3D10PolygonOffsetFactor(
|
||||||
return float(1 << 24);
|
return float(1 << 24);
|
||||||
}
|
}
|
||||||
// 20 explicit + 1 implicit (1.) mantissa bits.
|
// 20 explicit + 1 implicit (1.) mantissa bits.
|
||||||
// 2^20 is not enough for Call of Duty 4 retail version's first mission F.N.G.
|
// 2^20 is not enough for 415607E6 retail version's training mission shooting
|
||||||
// shooting range floor (with the number 1) on Direct3D 12. Tested on Nvidia
|
// range floor (with the number 1) on Direct3D 12. Tested on Nvidia GeForce
|
||||||
// GeForce GTX 1070, the exact formula (taking into account the 0...1 to
|
// GTX 1070, the exact formula (taking into account the 0...1 to 0...0.5
|
||||||
// 0...0.5 remapping described below) used for testing is
|
// remapping described below) used for testing is
|
||||||
// `int(ceil(offset * 2^20 * 0.5)) * sign(offset)`. With 2^20 * 0.5, there
|
// `int(ceil(offset * 2^20 * 0.5)) * sign(offset)`. With 2^20 * 0.5, there
|
||||||
// are various kinds of stripes dependending on the view angle in that
|
// are various kinds of stripes dependending on the view angle in that
|
||||||
// location. With 2^21 * 0.5, the issue is not present.
|
// location. With 2^21 * 0.5, the issue is not present.
|
||||||
|
@ -141,7 +141,7 @@ inline bool DoesCoverageDependOnAlpha(reg::RB_COLORCONTROL rb_colorcontrol) {
|
||||||
// pre-passes and shadowmaps. The shader must have its ucode analyzed. If
|
// pre-passes and shadowmaps. The shader must have its ucode analyzed. If
|
||||||
// IsRasterizationPotentiallyDone, this shouldn't be called, and assumed false
|
// IsRasterizationPotentiallyDone, this shouldn't be called, and assumed false
|
||||||
// instead. Helps reject the pixel shader in some cases - memexport draws in
|
// instead. Helps reject the pixel shader in some cases - memexport draws in
|
||||||
// Halo 3, and also most of some 1-point draws not covering anything done for
|
// 4D5307E6, and also most of some 1-point draws not covering anything done for
|
||||||
// some reason in different games with a leftover pixel shader from the previous
|
// some reason in different games with a leftover pixel shader from the previous
|
||||||
// draw, but with SQ_PROGRAM_CNTL destroyed, reducing the number of
|
// draw, but with SQ_PROGRAM_CNTL destroyed, reducing the number of
|
||||||
// unpredictable unneeded translations of random shaders with different host
|
// unpredictable unneeded translations of random shaders with different host
|
||||||
|
|
|
@ -23,11 +23,12 @@
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/ui/graphics_provider.h"
|
#include "xenia/ui/graphics_provider.h"
|
||||||
|
|
||||||
|
// The test case for AMD is 4D5307E6 (checked in 2018).
|
||||||
DEFINE_bool(dxbc_switch, true,
|
DEFINE_bool(dxbc_switch, true,
|
||||||
"Use switch rather than if for flow control. Turning this off or "
|
"Use switch rather than if for flow control. Turning this off or "
|
||||||
"on may improve stability, though this heavily depends on the "
|
"on may improve stability, though this heavily depends on the "
|
||||||
"driver - on AMD, it's recommended to have this set to true, as "
|
"driver - on AMD, it's recommended to have this set to true, as "
|
||||||
"Halo 3 appears to crash when if is used for flow control "
|
"some titles appear to crash when if is used for flow control "
|
||||||
"(possibly the shader compiler tries to flatten them). On Intel "
|
"(possibly the shader compiler tries to flatten them). On Intel "
|
||||||
"HD Graphics, this is ignored because of a crash with the switch "
|
"HD Graphics, this is ignored because of a crash with the switch "
|
||||||
"instruction.",
|
"instruction.",
|
||||||
|
@ -398,7 +399,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||||
assert_true(register_count() >= 2);
|
assert_true(register_count() >= 2);
|
||||||
if (register_count() >= 1) {
|
if (register_count() >= 1) {
|
||||||
// Copy the domain location to r0.xyz.
|
// Copy the domain location to r0.xyz.
|
||||||
// ZYX swizzle according to Call of Duty 3 and Viva Pinata.
|
// ZYX swizzle according to 415607E1 and 4D5307F2.
|
||||||
in_domain_location_used_ |= 0b0111;
|
in_domain_location_used_ |= 0b0111;
|
||||||
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111)
|
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111)
|
||||||
: dxbc::Dest::R(0, 0b0111),
|
: dxbc::Dest::R(0, 0b0111),
|
||||||
|
@ -425,7 +426,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||||
if (register_count() >= 1) {
|
if (register_count() >= 1) {
|
||||||
// Copy the domain location to r0.xyz.
|
// Copy the domain location to r0.xyz.
|
||||||
// ZYX swizzle with r1.y == 0, according to the water shader in
|
// ZYX swizzle with r1.y == 0, according to the water shader in
|
||||||
// Banjo-Kazooie: Nuts & Bolts.
|
// 4D5307ED.
|
||||||
in_domain_location_used_ |= 0b0111;
|
in_domain_location_used_ |= 0b0111;
|
||||||
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111)
|
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111)
|
||||||
: dxbc::Dest::R(0, 0b0111),
|
: dxbc::Dest::R(0, 0b0111),
|
||||||
|
@ -447,10 +448,10 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||||
// appears that the tessellator offloads the reordering of coordinates
|
// appears that the tessellator offloads the reordering of coordinates
|
||||||
// for edges to game shaders.
|
// for edges to game shaders.
|
||||||
//
|
//
|
||||||
// In Banjo-Kazooie: Nuts & Bolts, the water shader multiplies the
|
// In 4D5307ED, the water shader multiplies the first control point's
|
||||||
// first control point's position by r0.z, the second CP's by r0.y,
|
// position by r0.z, the second CP's by r0.y, and the third CP's by
|
||||||
// and the third CP's by r0.x. But before doing that it swizzles
|
// r0.x. But before doing that it swizzles r0.xyz the following way
|
||||||
// r0.xyz the following way depending on the value in r1.y:
|
// depending on the value in r1.y:
|
||||||
// - ZXY for 1.0.
|
// - ZXY for 1.0.
|
||||||
// - YZX for 2.0.
|
// - YZX for 2.0.
|
||||||
// - XZY for 4.0.
|
// - XZY for 4.0.
|
||||||
|
@ -478,9 +479,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||||
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0011)
|
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0011)
|
||||||
: dxbc::Dest::R(0, 0b0011),
|
: dxbc::Dest::R(0, 0b0011),
|
||||||
dxbc::Src::VDomain());
|
dxbc::Src::VDomain());
|
||||||
// Control point indices according to the shader from the main menu of
|
// Control point indices according the main menu of 58410823, with
|
||||||
// Defender, which starts from `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz`,
|
// `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz` in the prologue of the
|
||||||
// where c255.x is 0, and c255.y is 1.
|
// shader, where c255.x is 0, and c255.y is 1.
|
||||||
// r0.z for (1 - r0.x) * (1 - r0.y)
|
// r0.z for (1 - r0.x) * (1 - r0.y)
|
||||||
// r1.x for r0.x * (1 - r0.y)
|
// r1.x for r0.x * (1 - r0.y)
|
||||||
// r1.y for r0.x * r0.y
|
// r1.y for r0.x * r0.y
|
||||||
|
@ -509,7 +510,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||||
assert_true(register_count() >= 2);
|
assert_true(register_count() >= 2);
|
||||||
if (register_count() >= 1) {
|
if (register_count() >= 1) {
|
||||||
// Copy the domain location to r0.yz.
|
// Copy the domain location to r0.yz.
|
||||||
// XY swizzle according to the ground shader in Viva Pinata.
|
// XY swizzle according to the ground shader in 4D5307F2.
|
||||||
in_domain_location_used_ |= 0b0011;
|
in_domain_location_used_ |= 0b0011;
|
||||||
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0110)
|
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0110)
|
||||||
: dxbc::Dest::R(0, 0b0110),
|
: dxbc::Dest::R(0, 0b0110),
|
||||||
|
@ -530,9 +531,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||||
// the tessellator offloads the reordering of coordinates for edges to
|
// the tessellator offloads the reordering of coordinates for edges to
|
||||||
// game shaders.
|
// game shaders.
|
||||||
//
|
//
|
||||||
// In Viva Pinata, if we assume that r0.y is U and r0.z is V, the
|
// In 4D5307F2, if we assume that r0.y is U and r0.z is V, the factors
|
||||||
// factors each control point value is multiplied by are the
|
// each control point value is multiplied by are the following:
|
||||||
// following:
|
|
||||||
// - (1-u)*(1-v), u*(1-v), (1-u)*v, u*v for 0.0 (identity swizzle).
|
// - (1-u)*(1-v), u*(1-v), (1-u)*v, u*v for 0.0 (identity swizzle).
|
||||||
// - u*(1-v), (1-u)*(1-v), u*v, (1-u)*v for 1.0 (YXWZ).
|
// - u*(1-v), (1-u)*(1-v), u*v, (1-u)*v for 1.0 (YXWZ).
|
||||||
// - u*v, (1-u)*v, u*(1-v), (1-u)*(1-v) for 2.0 (WZYX).
|
// - u*v, (1-u)*v, u*(1-v), (1-u)*(1-v) for 2.0 (WZYX).
|
||||||
|
@ -1452,7 +1452,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
dest = dxbc::Dest::R(system_temp_point_size_edge_flag_kill_vertex_);
|
dest = dxbc::Dest::R(system_temp_point_size_edge_flag_kill_vertex_);
|
||||||
break;
|
break;
|
||||||
case InstructionStorageTarget::kExportAddress:
|
case InstructionStorageTarget::kExportAddress:
|
||||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
// Validate memexport writes (4D5307E6 has some completely invalid ones).
|
||||||
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
||||||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
|
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
|
||||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
||||||
|
@ -1463,7 +1463,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1]);
|
system_temps_memexport_address_[memexport_alloc_current_count_ - 1]);
|
||||||
break;
|
break;
|
||||||
case InstructionStorageTarget::kExportData: {
|
case InstructionStorageTarget::kExportData: {
|
||||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
// Validate memexport writes (4D5307E6 has some completely invalid ones).
|
||||||
if (memexport_alloc_current_count_ == 0 ||
|
if (memexport_alloc_current_count_ == 0 ||
|
||||||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
|
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
|
||||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||||
|
|
|
@ -705,10 +705,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
// Add a small epsilon to the offset (1.5/4 the fixed-point texture
|
// Add a small epsilon to the offset (1.5/4 the fixed-point texture
|
||||||
// coordinate ULP - shouldn't significantly effect the fixed-point
|
// coordinate ULP - shouldn't significantly effect the fixed-point
|
||||||
// conversion; 1/4 is also not enough with 3x resolution scaling very
|
// conversion; 1/4 is also not enough with 3x resolution scaling very
|
||||||
// noticeably on the weapon in Halo 3) to resolve ambiguity when fetching
|
// noticeably on the weapon in 4D5307E6) to resolve ambiguity when fetching
|
||||||
// point-sampled textures between texels. This applies to both normalized
|
// point-sampled textures between texels. This applies to both normalized
|
||||||
// (Banjo-Kazooie Xbox Live Arcade logo, coordinates interpolated between
|
// (58410954 Xbox Live Arcade logo, coordinates interpolated between
|
||||||
// vertices with half-pixel offset) and unnormalized (Halo 3 lighting
|
// vertices with half-pixel offset) and unnormalized (4D5307E6 lighting
|
||||||
// G-buffer reading, ps_param_gen pixels) coordinates. On Nvidia Pascal,
|
// G-buffer reading, ps_param_gen pixels) coordinates. On Nvidia Pascal,
|
||||||
// without this adjustment, blockiness is visible in both cases. Possibly
|
// without this adjustment, blockiness is visible in both cases. Possibly
|
||||||
// there is a better way, however, an attempt was made to error-correct
|
// there is a better way, however, an attempt was made to error-correct
|
||||||
|
@ -1595,13 +1595,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
|
|
||||||
// - Data.
|
// - Data.
|
||||||
|
|
||||||
// Viva Pinata uses vertex displacement map textures for tessellated
|
// 4D5307F2 uses vertex displacement map textures for tessellated models
|
||||||
// models like the beehive tree with explicit LOD with point sampling
|
// like the beehive tree with explicit LOD with point sampling (they store
|
||||||
// (they store values packed in two components), however, the fetch
|
// values packed in two components), however, the fetch constant has
|
||||||
// constant has anisotropic filtering enabled. However, Direct3D 12
|
// anisotropic filtering enabled. However, Direct3D 12 doesn't allow
|
||||||
// doesn't allow mixing anisotropic and point filtering. Possibly
|
// mixing anisotropic and point filtering. Possibly anistropic filtering
|
||||||
// anistropic filtering should be disabled when explicit LOD is used - do
|
// should be disabled when explicit LOD is used - do this here.
|
||||||
// this here.
|
|
||||||
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
|
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
|
||||||
tfetch_index, instr.attributes.mag_filter,
|
tfetch_index, instr.attributes.mag_filter,
|
||||||
instr.attributes.min_filter, instr.attributes.mip_filter,
|
instr.attributes.min_filter, instr.attributes.mip_filter,
|
||||||
|
|
|
@ -287,8 +287,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
||||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY));
|
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY));
|
||||||
// Choose in which 40-sample half of the tile the pixel is, for swapping
|
// Choose in which 40-sample half of the tile the pixel is, for swapping
|
||||||
// 40-sample columns when accessing the depth buffer - games expect this
|
// 40-sample columns when accessing the depth buffer - games expect this
|
||||||
// behavior when writing depth back to the EDRAM via color writing (GTA IV,
|
// behavior when writing depth back to the EDRAM via color writing (4D5307E6).
|
||||||
// Halo 3).
|
|
||||||
// system_temp_rov_params_.x = tile-local sample 0 X >= 40
|
// system_temp_rov_params_.x = tile-local sample 0 X >= 40
|
||||||
// system_temp_rov_params_.y = row offset
|
// system_temp_rov_params_.y = row offset
|
||||||
// system_temp_rov_params_.z = X sample 0 position within the tile
|
// system_temp_rov_params_.z = X sample 0 position within the tile
|
||||||
|
@ -3282,7 +3281,7 @@ void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp,
|
||||||
dxbc::Src::LF(float(0xFFFFFF)));
|
dxbc::Src::LF(float(0xFFFFFF)));
|
||||||
// Round to the nearest even integer. This seems to be the correct way:
|
// Round to the nearest even integer. This seems to be the correct way:
|
||||||
// rounding towards zero gives 0xFF instead of 0x100 in clear shaders in,
|
// rounding towards zero gives 0xFF instead of 0x100 in clear shaders in,
|
||||||
// for instance, Halo 3, but other clear shaders in it are also broken if
|
// for instance, 4D5307E6, but other clear shaders in it are also broken if
|
||||||
// 0.5 is added before ftou instead of round_ne.
|
// 0.5 is added before ftou instead of round_ne.
|
||||||
a_.OpRoundNE(d24_dest, d24_src);
|
a_.OpRoundNE(d24_dest, d24_src);
|
||||||
// Convert to fixed-point.
|
// Convert to fixed-point.
|
||||||
|
|
|
@ -28,16 +28,18 @@ DEFINE_bool(
|
||||||
"the real reason why they're invalid is found.",
|
"the real reason why they're invalid is found.",
|
||||||
"GPU");
|
"GPU");
|
||||||
|
|
||||||
|
// Extremely bright screen borders in 4D5307E6.
|
||||||
|
// Reading between texels with half-pixel offset in 58410954.
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
half_pixel_offset, true,
|
half_pixel_offset, true,
|
||||||
"Enable support of vertex half-pixel offset (D3D9 PA_SU_VTX_CNTL "
|
"Enable support of vertex half-pixel offset (D3D9 PA_SU_VTX_CNTL "
|
||||||
"PIX_CENTER). Generally games are aware of the half-pixel offset, and "
|
"PIX_CENTER). Generally games are aware of the half-pixel offset, and "
|
||||||
"having this enabled is the correct behavior (disabling this may "
|
"having this enabled is the correct behavior (disabling this may "
|
||||||
"significantly break post-processing in some games, like Halo 3), but in "
|
"significantly break post-processing in some games), but in certain games "
|
||||||
"some games it might have been ignored, resulting in slight blurriness of "
|
"it might have been ignored, resulting in slight blurriness of UI "
|
||||||
"UI textures, for instance, when they are read between texels rather than "
|
"textures, for instance, when they are read between texels rather than "
|
||||||
"at texel centers (Banjo-Kazooie), or the leftmost/topmost pixels may not "
|
"at texel centers, or the leftmost/topmost pixels may not be fully covered "
|
||||||
"be fully covered when MSAA is used with fullscreen passes.",
|
"when MSAA is used with fullscreen passes.",
|
||||||
"GPU");
|
"GPU");
|
||||||
|
|
||||||
DEFINE_int32(query_occlusion_fake_sample_count, 1000,
|
DEFINE_int32(query_occlusion_fake_sample_count, 1000,
|
||||||
|
|
|
@ -57,7 +57,7 @@ DEFINE_bool(
|
||||||
// TODO(Triang3l): More investigation of the cache threshold as cache lookups
|
// TODO(Triang3l): More investigation of the cache threshold as cache lookups
|
||||||
// and insertions require global critical region locking, and insertions also
|
// and insertions require global critical region locking, and insertions also
|
||||||
// require protecting pages. At 1024, the cache only made the performance worse
|
// require protecting pages. At 1024, the cache only made the performance worse
|
||||||
// (Tony Hawk's American Wasteland, 16-bit primitive reset index replacement).
|
// (415607D4, 16-bit primitive reset index replacement).
|
||||||
DEFINE_int32(
|
DEFINE_int32(
|
||||||
primitive_processor_cache_min_indices, 4096,
|
primitive_processor_cache_min_indices, 4096,
|
||||||
"Smallest number of guest indices to store in the cache to try reusing "
|
"Smallest number of guest indices to store in the cache to try reusing "
|
||||||
|
@ -247,14 +247,14 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
||||||
// games using tessellated strips / fans so far.
|
// games using tessellated strips / fans so far.
|
||||||
switch (tessellation_mode) {
|
switch (tessellation_mode) {
|
||||||
case xenos::TessellationMode::kDiscrete:
|
case xenos::TessellationMode::kDiscrete:
|
||||||
// - Call of Duty 3 - nets above barrels in the beginning of the
|
// - 415607E1 - nets above barrels in the beginning of the first
|
||||||
// first mission (turn right after the end of the intro) -
|
// mission (turn right after the end of the intro) -
|
||||||
// kTriangleList.
|
// kTriangleList.
|
||||||
host_vertex_shader_type =
|
host_vertex_shader_type =
|
||||||
Shader::HostVertexShaderType::kTriangleDomainCPIndexed;
|
Shader::HostVertexShaderType::kTriangleDomainCPIndexed;
|
||||||
break;
|
break;
|
||||||
case xenos::TessellationMode::kContinuous:
|
case xenos::TessellationMode::kContinuous:
|
||||||
// - Viva Pinata - tree building with a beehive in the beginning
|
// - 4D5307F2 - tree building with a beehive in the beginning
|
||||||
// (visible on the start screen behind the logo), waterfall in the
|
// (visible on the start screen behind the logo), waterfall in the
|
||||||
// beginning - kTriangleList.
|
// beginning - kTriangleList.
|
||||||
host_vertex_shader_type =
|
host_vertex_shader_type =
|
||||||
|
@ -276,7 +276,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
||||||
Shader::HostVertexShaderType::kQuadDomainCPIndexed;
|
Shader::HostVertexShaderType::kQuadDomainCPIndexed;
|
||||||
break;
|
break;
|
||||||
case xenos::TessellationMode::kContinuous:
|
case xenos::TessellationMode::kContinuous:
|
||||||
// - Defender - retro screen and beams in the main menu - kQuadList.
|
// - 58410823 - retro screen and beams in the main menu - kQuadList.
|
||||||
host_vertex_shader_type =
|
host_vertex_shader_type =
|
||||||
Shader::HostVertexShaderType::kQuadDomainCPIndexed;
|
Shader::HostVertexShaderType::kQuadDomainCPIndexed;
|
||||||
break;
|
break;
|
||||||
|
@ -285,14 +285,14 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case xenos::PrimitiveType::kTrianglePatch:
|
case xenos::PrimitiveType::kTrianglePatch:
|
||||||
// - Banjo-Kazooie: Nuts & Bolts - water - adaptive.
|
// - 4D5307E6 - water - adaptive.
|
||||||
// - Halo 3 - water - adaptive.
|
// - 4D5307ED - water - adaptive.
|
||||||
host_vertex_shader_type =
|
host_vertex_shader_type =
|
||||||
Shader::HostVertexShaderType::kTriangleDomainPatchIndexed;
|
Shader::HostVertexShaderType::kTriangleDomainPatchIndexed;
|
||||||
break;
|
break;
|
||||||
case xenos::PrimitiveType::kQuadPatch:
|
case xenos::PrimitiveType::kQuadPatch:
|
||||||
// - Fable II - continuous.
|
// - 4D5307F1 - continuous.
|
||||||
// - Viva Pinata - garden ground - adaptive.
|
// - 4D5307F2 - garden ground - adaptive.
|
||||||
host_vertex_shader_type =
|
host_vertex_shader_type =
|
||||||
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
|
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -335,10 +335,10 @@ union alignas(uint32_t) PA_SU_SC_MODE_CNTL {
|
||||||
uint32_t cull_back : 1; // +1
|
uint32_t cull_back : 1; // +1
|
||||||
// 0 - front is CCW, 1 - front is CW.
|
// 0 - front is CCW, 1 - front is CW.
|
||||||
uint32_t face : 1; // +2
|
uint32_t face : 1; // +2
|
||||||
// The game Fuse uses poly_mode 2 for triangles, which is "reserved" on R6xx
|
// 4541096E uses poly_mode 2 for triangles, which is "reserved" on R6xx and
|
||||||
// and not defined on Adreno 2xx, but polymode_front/back_ptype are 0
|
// not defined on Adreno 2xx, but polymode_front/back_ptype are 0 (points)
|
||||||
// (points) in this case in Fuse, which should not be respected for
|
// in this case in 4541096E, which should not be respected for non-kDualMode
|
||||||
// non-kDualMode as the game wants to draw filled triangles.
|
// as the title wants to draw filled triangles.
|
||||||
xenos::PolygonModeEnable poly_mode : 2; // +3
|
xenos::PolygonModeEnable poly_mode : 2; // +3
|
||||||
xenos::PolygonType polymode_front_ptype : 3; // +5
|
xenos::PolygonType polymode_front_ptype : 3; // +5
|
||||||
xenos::PolygonType polymode_back_ptype : 3; // +8
|
xenos::PolygonType polymode_back_ptype : 3; // +8
|
||||||
|
@ -559,16 +559,16 @@ union alignas(uint32_t) RB_COLORCONTROL {
|
||||||
// (gl_FragCoord.y near 0 in the top, near 1 in the bottom here - D3D-like.)
|
// (gl_FragCoord.y near 0 in the top, near 1 in the bottom here - D3D-like.)
|
||||||
// For 2 samples, the top sample (closer to gl_FragCoord.y 0) is covered
|
// For 2 samples, the top sample (closer to gl_FragCoord.y 0) is covered
|
||||||
// when alpha is in [0.5, 1), the bottom sample is covered when the alpha is
|
// when alpha is in [0.5, 1), the bottom sample is covered when the alpha is
|
||||||
// [1. With these thresholds, however, in Red Dead Redemption, almost all
|
// [1. With these thresholds, however, in 5454082B, almost all distant trees
|
||||||
// distant trees are transparent, this is asymmetric - fully transparent for
|
// are transparent, this is asymmetric - fully transparent for a quarter of
|
||||||
// a quarter of the range (or even half of the range for 2x and almost the
|
// the range (or even half of the range for 2x and almost the entire range
|
||||||
// entire range for 1x), but fully opaque only in one value.
|
// for 1x), but fully opaque only in one value.
|
||||||
// Though, 2, 2, 2, 2 offset values are commonly used for undithered alpha
|
// Though, 2, 2, 2, 2 offset values are commonly used for undithered alpha
|
||||||
// to coverage (in games such as Red Dead Redemption, and overall in AMD
|
// to coverage (in games such as 5454082B, and overall in AMD driver
|
||||||
// driver implementations) - it appears that 2, 2, 2, 2 offsets are supposed
|
// implementations) - it appears that 2, 2, 2, 2 offsets are supposed to
|
||||||
// to make this symmetric.
|
// make this symmetric.
|
||||||
// Both Red Dead Redemption and RADV (which used AMDVLK as a reference) use
|
// Both 5454082B and RADV (which used AMDVLK as a reference) use 3, 1, 0, 2
|
||||||
// 3, 1, 0, 2 offsets for dithered alpha to mask.
|
// offsets for dithered alpha to mask.
|
||||||
// https://gitlab.freedesktop.org/nchery/mesa/commit/8a52e4cc4fad4f1c75acc0badd624778f9dfe202
|
// https://gitlab.freedesktop.org/nchery/mesa/commit/8a52e4cc4fad4f1c75acc0badd624778f9dfe202
|
||||||
// It appears that the offsets lower the thresholds by (offset / 4 /
|
// It appears that the offsets lower the thresholds by (offset / 4 /
|
||||||
// sample count). That's consistent with both 2, 2, 2, 2 making the test
|
// sample count). That's consistent with both 2, 2, 2, 2 making the test
|
||||||
|
|
|
@ -40,6 +40,7 @@ DEFINE_bool(
|
||||||
"reduce bandwidth usage during transfers as the previous depth won't need "
|
"reduce bandwidth usage during transfers as the previous depth won't need "
|
||||||
"to be read.",
|
"to be read.",
|
||||||
"GPU");
|
"GPU");
|
||||||
|
// The round trip is done, in particular, in 545407F2.
|
||||||
DEFINE_string(
|
DEFINE_string(
|
||||||
depth_float24_conversion, "",
|
depth_float24_conversion, "",
|
||||||
"Method for converting 32-bit Z values to 20e4 floating point when using "
|
"Method for converting 32-bit Z values to 20e4 floating point when using "
|
||||||
|
@ -56,8 +57,8 @@ DEFINE_string(
|
||||||
" + Highest performance, allows early depth test and writing.\n"
|
" + Highest performance, allows early depth test and writing.\n"
|
||||||
" + Host MSAA is possible with pixel-rate shading where supported.\n"
|
" + Host MSAA is possible with pixel-rate shading where supported.\n"
|
||||||
" - EDRAM > RAM > EDRAM depth buffer round trip done in certain games "
|
" - EDRAM > RAM > EDRAM depth buffer round trip done in certain games "
|
||||||
"(such as GTA IV) destroys precision irreparably, causing artifacts if "
|
"destroys precision irreparably, causing artifacts if another rendering "
|
||||||
"another rendering pass is done after the EDRAM reupload.\n"
|
"pass is done after the EDRAM reupload.\n"
|
||||||
" truncate:\n"
|
" truncate:\n"
|
||||||
" Convert to 20e4 directly in pixel shaders, always rounding down.\n"
|
" Convert to 20e4 directly in pixel shaders, always rounding down.\n"
|
||||||
" + Average performance, conservative early depth test is possible.\n"
|
" + Average performance, conservative early depth test is possible.\n"
|
||||||
|
@ -96,18 +97,15 @@ DEFINE_bool(
|
||||||
"bloom, etc., in some cases.",
|
"bloom, etc., in some cases.",
|
||||||
"GPU");
|
"GPU");
|
||||||
// Disabled by default because of full-screen effects that occur when game
|
// Disabled by default because of full-screen effects that occur when game
|
||||||
// shaders assume piecewise linear, much more severe than blending-related
|
// shaders assume piecewise linear (4541080F), much more severe than
|
||||||
// issues.
|
// blending-related issues.
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
gamma_render_target_as_srgb, false,
|
gamma_render_target_as_srgb, false,
|
||||||
"When the host can't write piecewise linear gamma directly with correct "
|
"When the host can't write piecewise linear gamma directly with correct "
|
||||||
"blending, use sRGB output on the host for conceptually correct blending "
|
"blending, use sRGB output on the host for conceptually correct blending "
|
||||||
"in linear color space (to prevent issues such as bright squares around "
|
"in linear color space while having slightly different precision "
|
||||||
"bullet holes and overly dark lighting in Halo 3) while having slightly "
|
"distribution in the render target and severely incorrect values if the "
|
||||||
"different precision distribution in the render target and severely "
|
"game accesses the resulting colors directly as raw data.",
|
||||||
"incorrect values if the game accesses the resulting colors directly as "
|
|
||||||
"raw data (the whole screen in The Orange Box, for instance, since when "
|
|
||||||
"the first loading bar appears).",
|
|
||||||
"GPU");
|
"GPU");
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
mrt_edram_used_range_clamp_to_min, true,
|
mrt_edram_used_range_clamp_to_min, true,
|
||||||
|
@ -493,9 +491,9 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
||||||
// (issues caused by color and depth render target collisions haven't been
|
// (issues caused by color and depth render target collisions haven't been
|
||||||
// found yet), but render targets with smaller index are considered more
|
// found yet), but render targets with smaller index are considered more
|
||||||
// important - specifically, because of the usage in the lighting pass of
|
// important - specifically, because of the usage in the lighting pass of
|
||||||
// Halo 3, which can be checked in the vertical look calibration sequence in
|
// 4D5307E6, which can be checked in the vertical look calibration sequence in
|
||||||
// the beginning of the game: if render target 0 is removed in favor of 1, the
|
// the beginning of the game: if render target 0 is removed in favor of 1, the
|
||||||
// UNSC servicemen and the world will be too dark, like fully in shadow -
|
// characters and the world will be too dark, like fully in shadow -
|
||||||
// especially prominent on the helmet. This happens because the shader picks
|
// especially prominent on the helmet. This happens because the shader picks
|
||||||
// between two render targets to write dynamically (though with a static, bool
|
// between two render targets to write dynamically (though with a static, bool
|
||||||
// constant condition), but all other state is set up in a way that implies
|
// constant condition), but all other state is set up in a way that implies
|
||||||
|
@ -624,7 +622,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
||||||
// "As if it was 64bpp" (contribution of 32bpp render targets multiplied by 2,
|
// "As if it was 64bpp" (contribution of 32bpp render targets multiplied by 2,
|
||||||
// and clamping for 32bpp render targets divides this by 2) because 32bpp
|
// and clamping for 32bpp render targets divides this by 2) because 32bpp
|
||||||
// render targets can be combined with twice as long 64bpp render targets. An
|
// render targets can be combined with twice as long 64bpp render targets. An
|
||||||
// example is the Dead Space 3 menu background (1-sample 1152x720, or 1200x720
|
// example is the 4541099D menu background (1-sample 1152x720, or 1200x720
|
||||||
// after rounding to tiles, with a 32bpp depth buffer at 0 requiring 675
|
// after rounding to tiles, with a 32bpp depth buffer at 0 requiring 675
|
||||||
// tiles, and a 64bpp color buffer at 675 requiring 1350 tiles, but the
|
// tiles, and a 64bpp color buffer at 675 requiring 1350 tiles, but the
|
||||||
// smallest distance between two render target bases is 675 tiles).
|
// smallest distance between two render target bases is 675 tiles).
|
||||||
|
|
|
@ -70,10 +70,10 @@ class RenderTargetCache {
|
||||||
// Significant differences:
|
// Significant differences:
|
||||||
// - 8_8_8_8_GAMMA - the piecewise linear gamma curve is very different than
|
// - 8_8_8_8_GAMMA - the piecewise linear gamma curve is very different than
|
||||||
// sRGB, one possible path is conversion in shaders (resulting in
|
// sRGB, one possible path is conversion in shaders (resulting in
|
||||||
// incorrect blending, especially visible on decals in Halo 3), another is
|
// incorrect blending, especially visible on decals in 4D5307E6), another
|
||||||
// using sRGB render targets and either conversion on resolve or reading
|
// is using sRGB render targets and either conversion on resolve or
|
||||||
// the resolved data as a true sRGB texture (incorrect when the game
|
// reading the resolved data as a true sRGB texture (incorrect when the
|
||||||
// accesses the data directly, like The Orange Box).
|
// game accesses the data directly, like 4541080F).
|
||||||
// - 2_10_10_10_FLOAT - ranges significantly different than in float16, much
|
// - 2_10_10_10_FLOAT - ranges significantly different than in float16, much
|
||||||
// smaller RGB range, and alpha is fixed-point and has only 2 bits.
|
// smaller RGB range, and alpha is fixed-point and has only 2 bits.
|
||||||
// - 16_16, 16_16_16_16 - has -32 to 32 range, not -1 to 1 - need either to
|
// - 16_16, 16_16_16_16 - has -32 to 32 range, not -1 to 1 - need either to
|
||||||
|
@ -445,9 +445,9 @@ class RenderTargetCache {
|
||||||
// aliasing naively, precision may be lost - host depth must only be
|
// aliasing naively, precision may be lost - host depth must only be
|
||||||
// overwritten if the new guest value is different than the current host depth
|
// overwritten if the new guest value is different than the current host depth
|
||||||
// when converted to the guest format (this catches the usual case of
|
// when converted to the guest format (this catches the usual case of
|
||||||
// overwriting the depth buffer for clearing it mostly). Sonic the Hedgehog's
|
// overwriting the depth buffer for clearing it mostly). 534507D6 intro
|
||||||
// intro cutscene, for example, has a good example of corruption that happens
|
// cutscene, for example, has a good example of corruption that happens if
|
||||||
// if this is not handled - the upper 1280x384 pixels are rendered in a very
|
// this is not handled - the upper 1280x384 pixels are rendered in a very
|
||||||
// "striped" way if the depth precision is lost (if this is made always return
|
// "striped" way if the depth precision is lost (if this is made always return
|
||||||
// false).
|
// false).
|
||||||
virtual bool IsHostDepthEncodingDifferent(
|
virtual bool IsHostDepthEncodingDifferent(
|
||||||
|
@ -627,7 +627,7 @@ class RenderTargetCache {
|
||||||
// surface info was changed), to avoid unneeded render target switching (which
|
// surface info was changed), to avoid unneeded render target switching (which
|
||||||
// is especially undesirable on tile-based GPUs) in the implementation if
|
// is especially undesirable on tile-based GPUs) in the implementation if
|
||||||
// simply disabling depth / stencil test or color writes and then re-enabling
|
// simply disabling depth / stencil test or color writes and then re-enabling
|
||||||
// (Banjo-Kazooie does this often with color). Must also be used to determine
|
// (58410954 does this often with color). Must also be used to determine
|
||||||
// whether it's safe to enable depth / stencil or writing to a specific color
|
// whether it's safe to enable depth / stencil or writing to a specific color
|
||||||
// render target in the pipeline for this draw call.
|
// render target in the pipeline for this draw call.
|
||||||
// Only valid for non-pixel-shader-interlock paths.
|
// Only valid for non-pixel-shader-interlock paths.
|
||||||
|
|
|
@ -551,7 +551,7 @@ struct ParsedAluInstruction {
|
||||||
InstructionResult scalar_result;
|
InstructionResult scalar_result;
|
||||||
// Both operations must be executed before any result is stored if vector and
|
// Both operations must be executed before any result is stored if vector and
|
||||||
// scalar operations are paired. There are cases of vector result being used
|
// scalar operations are paired. There are cases of vector result being used
|
||||||
// as scalar operand or vice versa (the halo on Avalanche in Halo 3, for
|
// as scalar operand or vice versa (the ring on Avalanche in 4D5307E6, for
|
||||||
// example), in this case there must be no dependency between the two
|
// example), in this case there must be no dependency between the two
|
||||||
// operations.
|
// operations.
|
||||||
|
|
||||||
|
@ -851,11 +851,11 @@ class Shader {
|
||||||
// highest static register address + 1, or 0 if no registers referenced this
|
// highest static register address + 1, or 0 if no registers referenced this
|
||||||
// way. SQ_PROGRAM_CNTL is not always reliable - some draws (like single point
|
// way. SQ_PROGRAM_CNTL is not always reliable - some draws (like single point
|
||||||
// draws with oPos = 0001 that are done by Xbox 360's Direct3D 9 sometimes;
|
// draws with oPos = 0001 that are done by Xbox 360's Direct3D 9 sometimes;
|
||||||
// can be reproduced by launching Arrival in Halo 3 from the campaign lobby)
|
// can be reproduced by launching the intro mission in 4D5307E6 from the
|
||||||
// that aren't supposed to cover any pixels use an invalid (zero)
|
// campaign lobby) that aren't supposed to cover any pixels use an invalid
|
||||||
// SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this case
|
// (zero) SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this
|
||||||
// SQ_PROGRAM_CNTL may contain a number smaller than actually needed by the
|
// case SQ_PROGRAM_CNTL may contain a number smaller than actually needed by
|
||||||
// pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
|
// the pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
|
||||||
// uses_register_dynamic_addressing is true.
|
// uses_register_dynamic_addressing is true.
|
||||||
uint32_t register_static_address_bound() const {
|
uint32_t register_static_address_bound() const {
|
||||||
return register_static_address_bound_;
|
return register_static_address_bound_;
|
||||||
|
|
|
@ -388,8 +388,8 @@ void Shader::GatherAluInstructionInformation(
|
||||||
// allocation in shader translator implementations.
|
// allocation in shader translator implementations.
|
||||||
// eA is (hopefully) always written to using:
|
// eA is (hopefully) always written to using:
|
||||||
// mad eA, r#, const0100, c#
|
// mad eA, r#, const0100, c#
|
||||||
// (though there are some exceptions, shaders in Halo 3 for some reason set eA
|
// (though there are some exceptions, shaders in 4D5307E6 for some reason set
|
||||||
// to zeros, but the swizzle of the constant is not .xyzw in this case, and
|
// eA to zeros, but the swizzle of the constant is not .xyzw in this case, and
|
||||||
// they don't write to eM#).
|
// they don't write to eM#).
|
||||||
// Export is done to vector_dest of the ucode instruction for both vector and
|
// Export is done to vector_dest of the ucode instruction for both vector and
|
||||||
// scalar operations - no need to check separately.
|
// scalar operations - no need to check separately.
|
||||||
|
|
|
@ -36,7 +36,7 @@ XeHSConstantDataOutput XePatchConstant(
|
||||||
// 2) r0.zyx -> r0.zyx by the guest (because r1.y is set to 0 by Xenia, which
|
// 2) r0.zyx -> r0.zyx by the guest (because r1.y is set to 0 by Xenia, which
|
||||||
// apparently means identity swizzle to games).
|
// apparently means identity swizzle to games).
|
||||||
// 3) r0.z * v0 + r0.y * v1 + r0.x * v2 by the guest.
|
// 3) r0.z * v0 + r0.y * v1 + r0.x * v2 by the guest.
|
||||||
// With this order, there are no cracks in Halo 3 water.
|
// With this order, there are no cracks in 4D5307E6 water.
|
||||||
[unroll] for (i = 0u; i < 3u; ++i) {
|
[unroll] for (i = 0u; i < 3u; ++i) {
|
||||||
output.edges[i] = xe_input_patch[(i + 1u) % 3u].edge_factor;
|
output.edges[i] = xe_input_patch[(i + 1u) % 3u].edge_factor;
|
||||||
}
|
}
|
||||||
|
|
|
@ -986,11 +986,11 @@ uint4 XeDXT3AAs1111TwoBlocksRowToBGRA4(uint2 halfblocks) {
|
||||||
// DXT1/DXT3/DXT5 color components and CTX1 X/Y are ordered in:
|
// DXT1/DXT3/DXT5 color components and CTX1 X/Y are ordered in:
|
||||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||||
// (LSB on the right, MSB on the left.)
|
// (LSB on the right, MSB on the left.)
|
||||||
// TODO(Triang3l): Investigate this better, Halo: Reach is the only known game
|
// TODO(Triang3l): Investigate this better, 4D53085B is the only known game
|
||||||
// that uses it (for lighting in certain places - one of easy to notice usages
|
// that uses it (for lighting in certain places - one of easy to notice usages
|
||||||
// is the T-shaped (or somewhat H-shaped) metal beams in the beginning of
|
// is the T-shaped (or somewhat H-shaped) metal beams in the beginning of the
|
||||||
// Winter Contingency), however the contents don't say anything about the
|
// first mission), however the contents don't say anything about the channel
|
||||||
// channel order.
|
// order.
|
||||||
uint4 row = (((halfblocks.xxyy >> uint2(3u, 11u).xyxy) & 1u) << 8u) |
|
uint4 row = (((halfblocks.xxyy >> uint2(3u, 11u).xyxy) & 1u) << 8u) |
|
||||||
(((halfblocks.xxyy >> uint2(7u, 15u).xyxy) & 1u) << 24u) |
|
(((halfblocks.xxyy >> uint2(7u, 15u).xyxy) & 1u) << 24u) |
|
||||||
(((halfblocks.xxyy >> uint2(2u, 10u).xyxy) & 1u) << 4u) |
|
(((halfblocks.xxyy >> uint2(2u, 10u).xyxy) & 1u) << 4u) |
|
||||||
|
|
|
@ -5,8 +5,8 @@ XeHSControlPointInputAdaptive main(uint xe_edge_factor : SV_VertexID) {
|
||||||
XeHSControlPointInputAdaptive output;
|
XeHSControlPointInputAdaptive output;
|
||||||
// The Xbox 360's GPU accepts the float32 tessellation factors for edges
|
// The Xbox 360's GPU accepts the float32 tessellation factors for edges
|
||||||
// through a special kind of an index buffer.
|
// through a special kind of an index buffer.
|
||||||
// While Viva Pinata sets the factors to 0 for frustum-culled (quad) patches,
|
// While 4D5307F2 sets the factors to 0 for frustum-culled (quad) patches, in
|
||||||
// in Halo 3 only allowing patches with factors above 0 makes distant
|
// 4D5307E6 only allowing patches with factors above 0 makes distant
|
||||||
// (triangle) patches disappear - it appears that there are no special values
|
// (triangle) patches disappear - it appears that there are no special values
|
||||||
// for culled patches on the Xbox 360 (unlike zero, negative and NaN on
|
// for culled patches on the Xbox 360 (unlike zero, negative and NaN on
|
||||||
// Direct3D 11).
|
// Direct3D 11).
|
||||||
|
|
|
@ -11,7 +11,7 @@ RWBuffer<uint4> xe_texture_load_dest : register(u0);
|
||||||
// Dword 1:
|
// Dword 1:
|
||||||
// rrrrrrrrgggggggg
|
// rrrrrrrrgggggggg
|
||||||
// RRRRRRRRGGGGGGGG
|
// RRRRRRRRGGGGGGGG
|
||||||
// (R is in the higher bits, according to how this format is used in Halo 3).
|
// (R is in the higher bits, according to how this format is used in 4D5307E6).
|
||||||
// Dword 2:
|
// Dword 2:
|
||||||
// AA BB CC DD
|
// AA BB CC DD
|
||||||
// EE FF GG HH
|
// EE FF GG HH
|
||||||
|
|
|
@ -465,9 +465,10 @@ std::pair<uint32_t, uint32_t> SharedMemory::MemoryInvalidationCallback(
|
||||||
// invalidated - if no GPU-written data nearby that was not intended to be
|
// invalidated - if no GPU-written data nearby that was not intended to be
|
||||||
// invalidated since it's not in sync with CPU memory and can't be
|
// invalidated since it's not in sync with CPU memory and can't be
|
||||||
// reuploaded. It's a lot cheaper to upload some excess data than to catch
|
// reuploaded. It's a lot cheaper to upload some excess data than to catch
|
||||||
// access violations - with 4 KB callbacks, the original Doom runs at 4 FPS
|
// access violations - with 4 KB callbacks, 58410824 (being a
|
||||||
// on Intel Core i7-3770, with 64 KB the CPU game code takes 3 ms to run per
|
// software-rendered game) runs at 4 FPS on Intel Core i7-3770, with 64 KB,
|
||||||
// frame, but with 256 KB it's 0.7 ms.
|
// the CPU game code takes 3 ms to run per frame, but with 256 KB, it's
|
||||||
|
// 0.7 ms.
|
||||||
if (page_first & 63) {
|
if (page_first & 63) {
|
||||||
uint64_t gpu_written_start =
|
uint64_t gpu_written_start =
|
||||||
system_page_flags_[block_first].valid_and_gpu_written;
|
system_page_flags_[block_first].valid_and_gpu_written;
|
||||||
|
|
|
@ -49,7 +49,8 @@ void CopySwapBlock(xenos::Endian endian, void* output, const void* input,
|
||||||
void ConvertTexelCTX1ToR8G8(xenos::Endian endian, void* output,
|
void ConvertTexelCTX1ToR8G8(xenos::Endian endian, void* output,
|
||||||
const void* input, size_t length) {
|
const void* input, size_t length) {
|
||||||
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||||
// (R is in the higher bits, according to how this format is used in Halo 3).
|
// (R is in the higher bits, according to how this format is used in
|
||||||
|
// 4D5307E6).
|
||||||
union {
|
union {
|
||||||
uint8_t data[8];
|
uint8_t data[8];
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -352,11 +352,11 @@ TextureGuestLayout GetGuestTextureLayout(
|
||||||
xenos::kTextureSubresourceAlignmentBytes);
|
xenos::kTextureSubresourceAlignmentBytes);
|
||||||
|
|
||||||
// Estimate the memory amount actually referenced by the texture, which may
|
// Estimate the memory amount actually referenced by the texture, which may
|
||||||
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in Ridge
|
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in 4E4D083E,
|
||||||
// Racer Unbounded, for which memory exactly for 1280x720 is allocated, and
|
// for which memory exactly for 1280x720 is allocated, and aligning the
|
||||||
// aligning the height to 32 would cause access of an unallocated page) or
|
// height to 32 would cause access of an unallocated page) or bigger than
|
||||||
// bigger than the stride. For tiled textures, this is the dimensions
|
// the stride. For tiled textures, this is the dimensions aligned to 32x32x4
|
||||||
// aligned to 32x32x4 blocks (or x1 for the missing dimensions).
|
// blocks (or x1 for the missing dimensions).
|
||||||
uint32_t level_width_blocks =
|
uint32_t level_width_blocks =
|
||||||
xe::align(std::max(width_texels >> level, uint32_t(1)),
|
xe::align(std::max(width_texels >> level, uint32_t(1)),
|
||||||
format_info->block_width) /
|
format_info->block_width) /
|
||||||
|
|
|
@ -64,14 +64,14 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
||||||
// implies 32-block alignment for both uncompressed and compressed textures)
|
// implies 32-block alignment for both uncompressed and compressed textures)
|
||||||
// stored in the fetch constant, and height aligned to 32 blocks for Z slice
|
// stored in the fetch constant, and height aligned to 32 blocks for Z slice
|
||||||
// and array layer stride calculation purposes. The pitch can be different
|
// and array layer stride calculation purposes. The pitch can be different
|
||||||
// from the actual width - an example is Plants vs. Zombies, using 1408 pitch
|
// from the actual width - an example is 584109FF, using 1408 pitch for a
|
||||||
// for a 1280x menu background).
|
// 1280x menu background).
|
||||||
// - The mip levels use `max(next_pow2(width or height in texels) >> level, 1)`
|
// - The mip levels use `max(next_pow2(width or height in texels) >> level, 1)`
|
||||||
// aligned to 32 blocks for the same purpose, likely disregarding the pitch
|
// aligned to 32 blocks for the same purpose, likely disregarding the pitch
|
||||||
// from the fetch constant.
|
// from the fetch constant.
|
||||||
//
|
//
|
||||||
// There is also mip tail packing if the fetch constant specifies that packed
|
// There is also mip tail packing if the fetch constant specifies that packed
|
||||||
// mips are enabled, for both tiled and linear textures (Prey uses linear
|
// mips are enabled, for both tiled and linear textures (545407E0 uses linear
|
||||||
// DXT-compressed textures with packed mips very extensively for the game world
|
// DXT-compressed textures with packed mips very extensively for the game world
|
||||||
// materials). In this case, mips with width or height of 16 or smaller are
|
// materials). In this case, mips with width or height of 16 or smaller are
|
||||||
// stored not individually, but instead, in 32-texel (note: not 32-block - mip
|
// stored not individually, but instead, in 32-texel (note: not 32-block - mip
|
||||||
|
@ -99,7 +99,7 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
||||||
// tail, and the offset calculation function doesn't have level == 0 checks in
|
// tail, and the offset calculation function doesn't have level == 0 checks in
|
||||||
// it, only early-out if level < packed tail level (which can be 0). There are
|
// it, only early-out if level < packed tail level (which can be 0). There are
|
||||||
// examples of textures with packed base, for example, in the intro level of
|
// examples of textures with packed base, for example, in the intro level of
|
||||||
// Prey (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
|
// 545407E0 (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
|
||||||
// machines).
|
// machines).
|
||||||
//
|
//
|
||||||
// Linear texture rows are aligned to 256 bytes, for both the base and the mips
|
// Linear texture rows are aligned to 256 bytes, for both the base and the mips
|
||||||
|
@ -107,22 +107,21 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
||||||
// fetch constant).
|
// fetch constant).
|
||||||
//
|
//
|
||||||
// However, all the 32x32x4 padding, being just padding, is not necessarily
|
// However, all the 32x32x4 padding, being just padding, is not necessarily
|
||||||
// being actually accessed, especially for linear textures. Ridge Racer
|
// being actually accessed, especially for linear textures. 4E4D083E has a
|
||||||
// Unbounded has a 1280x720 k_8_8_8_8 linear texture, and allocates memory for
|
// 1280x720 k_8_8_8_8 linear texture, and allocates memory for exactly 1280x720,
|
||||||
// exactly 1280x720, so aligning the height to 32 to 1280x736 results in access
|
// so aligning the height to 32 to 1280x736 results in access violations. So,
|
||||||
// violations. So, while for stride calculations all the padding must be
|
// while for stride calculations all the padding must be respected, for actual
|
||||||
// respected, for actual memory loads it's better to avoid trying to access it
|
// memory loads it's better to avoid trying to access it when possible:
|
||||||
// when possible:
|
|
||||||
// - If the pitch is bigger than the width, it's better to calculate the last
|
// - If the pitch is bigger than the width, it's better to calculate the last
|
||||||
// row's length from the width rather than the pitch (this also possibly works
|
// row's length from the width rather than the pitch (this also possibly works
|
||||||
// in the other direction though - pitch < width is a weird situation, but
|
// in the other direction though - pitch < width is a weird situation, but
|
||||||
// probably legal, and may lead to reading data from beyond the calculated
|
// probably legal, and may lead to reading data from beyond the calculated
|
||||||
// subresource stride).
|
// subresource stride).
|
||||||
// - For linear textures (like that 1280x720 example from Ridge Racer
|
// - For linear textures (like that 1280x720 example from 4E4D083E), it's easy
|
||||||
// Unbounded), it's easy to calculate the exact memory extent that may be
|
// to calculate the exact memory extent that may be accessed knowing the
|
||||||
// accessed knowing the dimensions (unlike for tiled textures with complex
|
// dimensions (unlike for tiled textures with complex addressing within
|
||||||
// addressing within 32x32x4-block tiles), so there's no need to align them to
|
// 32x32x4-block tiles), so there's no need to align them to 32x32x4 for
|
||||||
// 32x32x4 for memory extent calculation.
|
// memory extent calculation.
|
||||||
// - For the linear packed mip tail, the extent can be calculated as max of
|
// - For the linear packed mip tail, the extent can be calculated as max of
|
||||||
// (block offsets + block extents) of all levels stored in it.
|
// (block offsets + block extents) of all levels stored in it.
|
||||||
//
|
//
|
||||||
|
@ -152,16 +151,16 @@ struct TextureGuestLayout {
|
||||||
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
|
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
|
||||||
// depending on the dimension), but for the linear subresources, this may be
|
// depending on the dimension), but for the linear subresources, this may be
|
||||||
// significantly (including less 4 KB pages) smaller than the aligned size
|
// significantly (including less 4 KB pages) smaller than the aligned size
|
||||||
// (like for Ridge Racer Unbounded where aligning the height of a 1280x720
|
// (like for 4E4D083E where aligning the height of a 1280x720 linear texture
|
||||||
// linear texture results in access violations). For the linear mip tail,
|
// results in access violations). For the linear mip tail, this includes all
|
||||||
// this includes all the mip levels stored in it. If the width is bigger
|
// the mip levels stored in it. If the width is bigger than the pitch, this
|
||||||
// than the pitch, this will also be taken into account for the last row so
|
// will also be taken into account for the last row so all memory actually
|
||||||
// all memory actually used by the texture will be loaded, and may be bigger
|
// used by the texture will be loaded, and may be bigger than the distance
|
||||||
// than the distance between array slices or levels. The purpose of this
|
// between array slices or levels. The purpose of this parameter is to make
|
||||||
// parameter is to make the memory amount that needs to be resident as close
|
// the memory amount that needs to be resident as close to the real amount
|
||||||
// to the real amount as possible, to make sure all the needed data will be
|
// as possible, to make sure all the needed data will be read, but also, if
|
||||||
// read, but also, if possible, unneeded memory pages won't be accessed
|
// possible, unneeded memory pages won't be accessed (since that may trigger
|
||||||
// (since that may trigger an access violation on the CPU).
|
// an access violation on the CPU).
|
||||||
uint32_t x_extent_blocks;
|
uint32_t x_extent_blocks;
|
||||||
uint32_t y_extent_blocks;
|
uint32_t y_extent_blocks;
|
||||||
uint32_t z_extent;
|
uint32_t z_extent;
|
||||||
|
|
|
@ -483,7 +483,7 @@ enum class FetchOpcode : uint32_t {
|
||||||
// - 3D (used for both 3D and stacked 2D texture): U, V, W (normalized or
|
// - 3D (used for both 3D and stacked 2D texture): U, V, W (normalized or
|
||||||
// unnormalized - same for both 3D W and stack layer; also VolMagFilter /
|
// unnormalized - same for both 3D W and stack layer; also VolMagFilter /
|
||||||
// VolMinFilter between stack layers is supported, used for color correction
|
// VolMinFilter between stack layers is supported, used for color correction
|
||||||
// in Burnout Revenge).
|
// in 454107DC).
|
||||||
// - Cube: SC, TC (between 1 and 2 for normalized), face ID (0.0 to 5.0), the
|
// - Cube: SC, TC (between 1 and 2 for normalized), face ID (0.0 to 5.0), the
|
||||||
// cube vector ALU instruction is used to calculate them.
|
// cube vector ALU instruction is used to calculate them.
|
||||||
// https://gpuopen.com/learn/fetching-from-cubes-and-octahedrons/
|
// https://gpuopen.com/learn/fetching-from-cubes-and-octahedrons/
|
||||||
|
@ -495,9 +495,9 @@ enum class FetchOpcode : uint32_t {
|
||||||
// The total LOD for a sample is additive and is based on what is enabled.
|
// The total LOD for a sample is additive and is based on what is enabled.
|
||||||
//
|
//
|
||||||
// For cube maps, according to what texCUBEgrad compiles to in a modified
|
// For cube maps, according to what texCUBEgrad compiles to in a modified
|
||||||
// HLSL shader of Brave: A Warrior's Tale and to XNA assembler output for PC
|
// HLSL shader of 455607D1 and to XNA assembler output for PC SM3 texldd,
|
||||||
// SM3 texldd, register gradients are in cube space (not in SC/TC space,
|
// register gradients are in cube space (not in SC/TC space, unlike the
|
||||||
// unlike the coordinates themselves). This isn't true for the GCN, however.
|
// coordinates themselves). This isn't true for the GCN, however.
|
||||||
//
|
//
|
||||||
// TODO(Triang3l): Find if gradients are unnormalized for cube maps if
|
// TODO(Triang3l): Find if gradients are unnormalized for cube maps if
|
||||||
// coordinates are unnormalized. Since texldd doesn't perform any
|
// coordinates are unnormalized. Since texldd doesn't perform any
|
||||||
|
@ -814,8 +814,8 @@ static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3);
|
||||||
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
|
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
|
||||||
// this rule for multiplication, as games often rely on it in vector
|
// this rule for multiplication, as games often rely on it in vector
|
||||||
// normalization (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of
|
// normalization (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of
|
||||||
// things in games - causes white screen in Halo 3, white specular on
|
// things in games - causes white screen in 4D5307E6, white specular on
|
||||||
// characters in GTA IV. The result is always positive zero in this case, no
|
// characters in 545407F2. The result is always positive zero in this case, no
|
||||||
// matter what the signs of the other operands are, according to R5xx
|
// matter what the signs of the other operands are, according to R5xx
|
||||||
// Acceleration section 8.7.5 "Legacy multiply behavior" and testing on
|
// Acceleration section 8.7.5 "Legacy multiply behavior" and testing on
|
||||||
// Adreno 200. This means that the following need to be taken into account
|
// Adreno 200. This means that the following need to be taken into account
|
||||||
|
@ -1628,8 +1628,8 @@ enum class ExportRegister : uint32_t {
|
||||||
// X - PSIZE (gl_PointSize).
|
// X - PSIZE (gl_PointSize).
|
||||||
// Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point
|
// Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point
|
||||||
// drawing.
|
// drawing.
|
||||||
// Z - KILLVERTEX flag (used in Banjo-Kazooie: Nuts & Bolts for grass), set
|
// Z - KILLVERTEX flag (used in 4D5307ED for grass), set for killing
|
||||||
// for killing primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
|
// primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
|
||||||
kVSPointSizeEdgeFlagKillVertex = 63,
|
kVSPointSizeEdgeFlagKillVertex = 63,
|
||||||
|
|
||||||
kPSColor0 = 0,
|
kPSColor0 = 0,
|
||||||
|
|
|
@ -507,7 +507,7 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||||
uint32_t upload_size = source_length;
|
uint32_t upload_size = source_length;
|
||||||
|
|
||||||
// Ping the memory subsystem for allocation size.
|
// Ping the memory subsystem for allocation size.
|
||||||
// TODO(DrChat): Artifacting occurring in GripShift with this enabled.
|
// TODO(DrChat): Artifacting occurring in 5841089E with this enabled.
|
||||||
// physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
|
// physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
|
||||||
assert(upload_base <= source_addr);
|
assert(upload_base <= source_addr);
|
||||||
uint32_t source_offset = source_addr - upload_base;
|
uint32_t source_offset = source_addr - upload_base;
|
||||||
|
|
|
@ -758,7 +758,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
||||||
depth_bias_scale = depth_bias_scales[1];
|
depth_bias_scale = depth_bias_scales[1];
|
||||||
depth_bias_offset = depth_bias_offsets[1];
|
depth_bias_offset = depth_bias_offsets[1];
|
||||||
}
|
}
|
||||||
// Convert to Vulkan units based on the values in Call of Duty 4:
|
// Convert to Vulkan units based on the values in 415607E6:
|
||||||
// r_polygonOffsetScale is -1 there, but 32 in the register.
|
// r_polygonOffsetScale is -1 there, but 32 in the register.
|
||||||
// r_polygonOffsetBias is -1 also, but passing 2/65536.
|
// r_polygonOffsetBias is -1 also, but passing 2/65536.
|
||||||
// 1/65536 and 2 scales are applied separately, however, and for shadow maps
|
// 1/65536 and 2 scales are applied separately, however, and for shadow maps
|
||||||
|
|
|
@ -235,10 +235,10 @@ enum class SurfaceNumFormat : uint32_t {
|
||||||
//
|
//
|
||||||
// Depth surfaces are also stored as 32bpp tiles, however, as opposed to color
|
// Depth surfaces are also stored as 32bpp tiles, however, as opposed to color
|
||||||
// surfaces, 40x16-sample halves of each tile are swapped - game shaders (for
|
// surfaces, 40x16-sample halves of each tile are swapped - game shaders (for
|
||||||
// example, in GTA IV, Halo 3) perform this swapping when writing specific
|
// example, in 4D5307E6 main menu, 545407F2) perform this swapping when writing
|
||||||
// depth/stencil values by drawing to a depth buffer's memory through a color
|
// specific depth/stencil values by drawing to a depth buffer's memory through a
|
||||||
// render target (to reupload a depth/stencil surface previously evicted from
|
// color render target (to reupload a depth/stencil surface previously evicted
|
||||||
// the EDRAM to the main memory, for instance).
|
// from the EDRAM to the main memory, for instance).
|
||||||
|
|
||||||
enum class MsaaSamples : uint32_t {
|
enum class MsaaSamples : uint32_t {
|
||||||
k1X = 0,
|
k1X = 0,
|
||||||
|
@ -728,12 +728,12 @@ enum class SampleControl : uint32_t {
|
||||||
// - sample_control is SQ_CONTEXT_MISC::sc_sample_cntl.
|
// - sample_control is SQ_CONTEXT_MISC::sc_sample_cntl.
|
||||||
// - interpolator_control_sampling_pattern is
|
// - interpolator_control_sampling_pattern is
|
||||||
// SQ_INTERPOLATOR_CNTL::sampling_pattern.
|
// SQ_INTERPOLATOR_CNTL::sampling_pattern.
|
||||||
// Centroid interpolation can be tested in Red Dead Redemption. If the GPU host
|
// Centroid interpolation can be tested in 5454082B. If the GPU host backend
|
||||||
// backend implements guest MSAA properly, using host MSAA, with everything
|
// implements guest MSAA properly, using host MSAA, with everything interpolated
|
||||||
// interpolated at centers, the Diez Coronas start screen background may have
|
// at centers, the Monument Valley start screen background may have a few
|
||||||
// a few distinctly bright pixels on the mesas/buttes, where extrapolation
|
// distinctly bright pixels on the mesas/buttes, where extrapolation happens.
|
||||||
// happens. Interpolating certain values (ones that aren't used for gradient
|
// Interpolating certain values (ones that aren't used for gradient calculation,
|
||||||
// calculation, not texture coordinates) at centroids fixes this issue.
|
// not texture coordinates) at centroids fixes this issue.
|
||||||
inline uint32_t GetInterpolatorSamplingPattern(
|
inline uint32_t GetInterpolatorSamplingPattern(
|
||||||
MsaaSamples msaa_samples, SampleControl sample_control,
|
MsaaSamples msaa_samples, SampleControl sample_control,
|
||||||
uint32_t interpolator_control_sampling_pattern) {
|
uint32_t interpolator_control_sampling_pattern) {
|
||||||
|
@ -763,9 +763,9 @@ enum class TessellationMode : uint32_t {
|
||||||
enum class PolygonModeEnable : uint32_t {
|
enum class PolygonModeEnable : uint32_t {
|
||||||
kDisabled = 0, // Render triangles.
|
kDisabled = 0, // Render triangles.
|
||||||
kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type.
|
kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type.
|
||||||
// The game Fuse uses 2 for triangles, which is "reserved" on R6xx and not
|
// 4541096E uses 2 for triangles, which is "reserved" on R6xx and not defined
|
||||||
// defined on Adreno 2xx, but polymode_front/back_ptype are 0 (points) in this
|
// on Adreno 2xx, but polymode_front/back_ptype are 0 (points) in this case in
|
||||||
// case in Fuse, which should not be respected for non-kDualMode as the game
|
// 4541096E, which should not be respected for non-kDualMode as the title
|
||||||
// wants to draw filled triangles.
|
// wants to draw filled triangles.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -785,17 +785,15 @@ enum class ModeControl : uint32_t {
|
||||||
// for it especially since the Xbox 360 doesn't have early per-sample depth /
|
// for it especially since the Xbox 360 doesn't have early per-sample depth /
|
||||||
// stencil, only early hi-Z / hi-stencil, and other registers possibly
|
// stencil, only early hi-Z / hi-stencil, and other registers possibly
|
||||||
// toggling pixel shader execution are yet to be found):
|
// toggling pixel shader execution are yet to be found):
|
||||||
// - Most of depth pre-pass draws in Call of Duty 4 use the kDepth more with
|
// - Most of depth pre-pass draws in 415607E6 use the kDepth more with a
|
||||||
// a `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
|
// `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
|
||||||
// However, when alphatested surfaces are drawn, kColorDepth is explicitly
|
// However, when alphatested surfaces are drawn, kColorDepth is explicitly
|
||||||
// used with the same shader performing the texture fetch.
|
// used with the same shader performing the texture fetch.
|
||||||
// - Red Dead Redemption has some kDepth draws with alphatest enabled, but the
|
// - 5454082B has some kDepth draws with alphatest enabled, but the shader is
|
||||||
// shader is `oC0 = r0`, which makes no sense (alphatest based on an
|
// `oC0 = r0`, which makes no sense (alphatest based on an interpolant from
|
||||||
// interpolant from the vertex shader) as no texture alpha cutout is
|
// the vertex shader) as no texture alpha cutout is involved.
|
||||||
// involved.
|
// - 5454082B also has kDepth draws with pretty complex shaders clearly for
|
||||||
// - Red Dead Redemption also has kDepth draws with pretty complex shaders
|
// use only in the color pass - even fetching and filtering a shadowmap.
|
||||||
// clearly for use only in the color pass - even fetching and filtering a
|
|
||||||
// shadowmap.
|
|
||||||
// For now, based on these, let's assume the pixel shader is never used with
|
// For now, based on these, let's assume the pixel shader is never used with
|
||||||
// kDepth.
|
// kDepth.
|
||||||
kDepth = 5,
|
kDepth = 5,
|
||||||
|
@ -833,10 +831,10 @@ enum class ModeControl : uint32_t {
|
||||||
// coordinates of the corners).
|
// coordinates of the corners).
|
||||||
//
|
//
|
||||||
// The rectangle is used for both the source render target and the destination
|
// The rectangle is used for both the source render target and the destination
|
||||||
// texture, according to how it's used in Tales of Vesperia.
|
// texture, according to how it's used in 4E4D07E9.
|
||||||
//
|
//
|
||||||
// Direct3D 9 gives the rectangle in source render target coordinates (for
|
// Direct3D 9 gives the rectangle in source render target coordinates (for
|
||||||
// example, in Halo 3, the sniper rifle scope has a (128,64)->(448,256)
|
// example, in 4D5307E6, the sniper rifle scope has a (128,64)->(448,256)
|
||||||
// rectangle). It doesn't adjust the EDRAM base pointer, otherwise (taking into
|
// rectangle). It doesn't adjust the EDRAM base pointer, otherwise (taking into
|
||||||
// account that 4x MSAA is used for the scope) it would have been
|
// account that 4x MSAA is used for the scope) it would have been
|
||||||
// (8,0)->(328,192), but it's not. However, it adjusts the destination texture
|
// (8,0)->(328,192), but it's not. However, it adjusts the destination texture
|
||||||
|
@ -851,7 +849,7 @@ enum class ModeControl : uint32_t {
|
||||||
// RB_COPY_DEST_PITCH's purpose appears to be not clamping or something like
|
// RB_COPY_DEST_PITCH's purpose appears to be not clamping or something like
|
||||||
// that, but just specifying pitch for going between rows, and height for going
|
// that, but just specifying pitch for going between rows, and height for going
|
||||||
// between 3D texture slices. copy_dest_pitch is rounded to 32 by Direct3D 9,
|
// between 3D texture slices. copy_dest_pitch is rounded to 32 by Direct3D 9,
|
||||||
// copy_dest_height is not. In the Halo 3 sniper rifle scope example,
|
// copy_dest_height is not. In the 4D5307E6 sniper rifle scope example,
|
||||||
// copy_dest_pitch is 320, and copy_dest_height is 192 - the same as the resolve
|
// copy_dest_pitch is 320, and copy_dest_height is 192 - the same as the resolve
|
||||||
// rectangle size (resolving from a 320x192 portion of the surface at 128,64 to
|
// rectangle size (resolving from a 320x192 portion of the surface at 128,64 to
|
||||||
// the whole texture, at 0,0). Relative to RB_COPY_DEST_BASE, the height should
|
// the whole texture, at 0,0). Relative to RB_COPY_DEST_BASE, the height should
|
||||||
|
@ -860,17 +858,17 @@ enum class ModeControl : uint32_t {
|
||||||
// of the register) that it exists purely to be able to go between 3D texture
|
// of the register) that it exists purely to be able to go between 3D texture
|
||||||
// slices.
|
// slices.
|
||||||
//
|
//
|
||||||
// Window scissor must also be applied - in the jigsaw puzzle in Banjo-Tooie,
|
// Window scissor must also be applied - in the jigsaw puzzle in 58410955, there
|
||||||
// there are 1280x720 resolve rectangles, but only the scissored 1280x256
|
// are 1280x720 resolve rectangles, but only the scissored 1280x256 needs to be
|
||||||
// needs to be copied, otherwise it overflows even beyond the EDRAM, and the
|
// copied, otherwise it overflows even beyond the EDRAM, and the depth buffer is
|
||||||
// depth buffer is visible on the screen. It also ensures the coordinates are
|
// visible on the screen. It also ensures the coordinates are not negative (in
|
||||||
// not negative (in F.E.A.R., for example, the right tile is resolved with
|
// 565507D9, for example, the right tile is resolved with vertices
|
||||||
// vertices (-640,0)->(640,720), however, the destination texture pointer is
|
// (-640,0)->(640,720), however, the destination texture pointer is adjusted
|
||||||
// adjusted properly to the right half of the texture, and the source render
|
// properly to the right half of the texture, and the source render target has a
|
||||||
// target has a pitch of 800).
|
// pitch of 800).
|
||||||
|
|
||||||
// Granularity of offset and size in resolve operations is 8x8 pixels
|
// Granularity of offset and size in resolve operations is 8x8 pixels
|
||||||
// (GPU_RESOLVE_ALIGNMENT - for example, Halo 3 resolves a 24x16 region for a
|
// (GPU_RESOLVE_ALIGNMENT - for example, 4D5307E6 resolves a 24x16 region for a
|
||||||
// 18x10 texture, 8x8 region for a 1x1 texture).
|
// 18x10 texture, 8x8 region for a 1x1 texture).
|
||||||
// https://github.com/jmfauvel/CSGO-SDK/blob/master/game/client/view.cpp#L944
|
// https://github.com/jmfauvel/CSGO-SDK/blob/master/game/client/view.cpp#L944
|
||||||
// https://github.com/stanriders/hl2-asw-port/blob/master/src/game/client/vgui_int.cpp#L901
|
// https://github.com/stanriders/hl2-asw-port/blob/master/src/game/client/vgui_int.cpp#L901
|
||||||
|
@ -1072,9 +1070,9 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t {
|
||||||
// pitch is irrelevant to them (but the 256-byte alignment requirement still
|
// pitch is irrelevant to them (but the 256-byte alignment requirement still
|
||||||
// applies to linear textures).
|
// applies to linear textures).
|
||||||
// Examples of pitch > aligned width:
|
// Examples of pitch > aligned width:
|
||||||
// - Plants vs. Zombies (loading screen and menu backgrounds, 1408 for a
|
// - 584109FF (loading screen and menu backgrounds, 1408 for a 1280x linear
|
||||||
// 1280x linear k_DXT4_5 texture, which corresponds to 22 * 256 bytes
|
// k_DXT4_5 texture, which corresponds to 22 * 256 bytes rather than
|
||||||
// rather than 20 * 256 for just 1280x).
|
// 20 * 256 for just 1280x).
|
||||||
uint32_t pitch : 9; // +22
|
uint32_t pitch : 9; // +22
|
||||||
uint32_t tiled : 1; // +31
|
uint32_t tiled : 1; // +31
|
||||||
|
|
||||||
|
|
|
@ -98,7 +98,7 @@ X_HRESULT XgiApp::DispatchMessageSync(uint32_t message, uint32_t buffer_ptr,
|
||||||
return X_E_SUCCESS;
|
return X_E_SUCCESS;
|
||||||
}
|
}
|
||||||
case 0x000B0014: {
|
case 0x000B0014: {
|
||||||
// Gets Jetpac XBLA in game
|
// Gets 584107FB in game.
|
||||||
// get high score table?
|
// get high score table?
|
||||||
XELOGD("XGI_unknown");
|
XELOGD("XGI_unknown");
|
||||||
return X_STATUS_SUCCESS;
|
return X_STATUS_SUCCESS;
|
||||||
|
|
|
@ -66,7 +66,7 @@ X_HRESULT XLiveBaseApp::DispatchMessageSync(uint32_t message,
|
||||||
return X_E_FAIL;
|
return X_E_FAIL;
|
||||||
}
|
}
|
||||||
case 0x00058046: {
|
case 0x00058046: {
|
||||||
// Required to be successful for Forza 4 to detect signed-in profile
|
// Required to be successful for 4D530910 to detect signed-in profile
|
||||||
// Doesn't seem to set anything in the given buffer, probably only takes
|
// Doesn't seem to set anything in the given buffer, probably only takes
|
||||||
// input
|
// input
|
||||||
XELOGD("XLiveBaseUnk58046({:08X}, {:08X}) unimplemented", buffer_ptr,
|
XELOGD("XLiveBaseUnk58046({:08X}, {:08X}) unimplemented", buffer_ptr,
|
||||||
|
|
|
@ -73,8 +73,8 @@ struct XCONTENT_DATA {
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_display_name(const std::u16string_view value) {
|
void set_display_name(const std::u16string_view value) {
|
||||||
// Some games (eg Goldeneye XBLA) require multiple null-terminators for it
|
// Some games (e.g. 584108A9) require multiple null-terminators for it to
|
||||||
// to read the string properly, blanking the array should take care of that
|
// read the string properly, blanking the array should take care of that
|
||||||
|
|
||||||
std::fill_n(display_name_raw.chars, countof(display_name_raw.chars), 0);
|
std::fill_n(display_name_raw.chars, countof(display_name_raw.chars), 0);
|
||||||
string_util::copy_and_swap_truncating(display_name_raw.chars, value,
|
string_util::copy_and_swap_truncating(display_name_raw.chars, value,
|
||||||
|
|
|
@ -20,9 +20,8 @@ namespace kernel {
|
||||||
namespace xam {
|
namespace xam {
|
||||||
|
|
||||||
UserProfile::UserProfile() {
|
UserProfile::UserProfile() {
|
||||||
// NeoGeo Battle Coliseum checks the user XUID against a mask of
|
// 58410A1F checks the user XUID against a mask of 0x00C0000000000000 (3<<54),
|
||||||
// 0x00C0000000000000 (3<<54), if non-zero, it prevents the user from playing
|
// if non-zero, it prevents the user from playing the game.
|
||||||
// the game.
|
|
||||||
// "You do not have permissions to perform this operation."
|
// "You do not have permissions to perform this operation."
|
||||||
xuid_ = 0xB13EBABEBABEBABE;
|
xuid_ = 0xB13EBABEBABEBABE;
|
||||||
name_ = "User";
|
name_ = "User";
|
||||||
|
|
|
@ -249,8 +249,10 @@ dword_result_t NetDll_WSAStartup(dword_t caller, word_t version,
|
||||||
data_ptr->max_sockets = wsaData.iMaxSockets;
|
data_ptr->max_sockets = wsaData.iMaxSockets;
|
||||||
data_ptr->max_udpdg = wsaData.iMaxUdpDg;
|
data_ptr->max_udpdg = wsaData.iMaxUdpDg;
|
||||||
|
|
||||||
// Some games (PoG) want this value round-tripped - they'll compare if it
|
// Some games (5841099F) want this value round-tripped - they'll compare if
|
||||||
// changes and bugcheck if it does.
|
// it changes and bugcheck if it does.
|
||||||
|
// TODO(Triang3l): Verify if the title ID in the comment is correct - added
|
||||||
|
// by benvanik as an acronym initially.
|
||||||
uint32_t vendor_ptr = xe::load_and_swap<uint32_t>(data_out + 0x190);
|
uint32_t vendor_ptr = xe::load_and_swap<uint32_t>(data_out + 0x190);
|
||||||
xe::store_and_swap<uint32_t>(data_out + 0x190, vendor_ptr);
|
xe::store_and_swap<uint32_t>(data_out + 0x190, vendor_ptr);
|
||||||
}
|
}
|
||||||
|
@ -459,7 +461,7 @@ dword_result_t NetDll_XNetGetTitleXnAddr(dword_t caller,
|
||||||
// TODO(gibbed): A proper mac address.
|
// TODO(gibbed): A proper mac address.
|
||||||
// RakNet's 360 version appears to depend on abEnet to create "random" 64-bit
|
// RakNet's 360 version appears to depend on abEnet to create "random" 64-bit
|
||||||
// numbers. A zero value will cause RakPeer::Startup to fail. This causes
|
// numbers. A zero value will cause RakPeer::Startup to fail. This causes
|
||||||
// Peggle 2 to crash on startup.
|
// 58411436 to crash on startup.
|
||||||
// The 360-specific code is scrubbed from the RakNet repo, but there's still
|
// The 360-specific code is scrubbed from the RakNet repo, but there's still
|
||||||
// traces of what it's doing which match the game code.
|
// traces of what it's doing which match the game code.
|
||||||
// https://github.com/facebookarchive/RakNet/blob/master/Source/RakPeer.cpp#L382
|
// https://github.com/facebookarchive/RakNet/blob/master/Source/RakPeer.cpp#L382
|
||||||
|
|
|
@ -79,8 +79,8 @@ dword_result_t XNotifyGetNext(dword_t handle, dword_t match_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
*id_ptr = dequeued ? id : 0;
|
*id_ptr = dequeued ? id : 0;
|
||||||
// param_ptr may be null - Ghost Recon Advanced Warfighter 2 Demo explicitly
|
// param_ptr may be null - 555307F0 Demo explicitly passes nullptr in the
|
||||||
// passes nullptr in the code.
|
// code.
|
||||||
// https://github.com/xenia-project/xenia/pull/1577
|
// https://github.com/xenia-project/xenia/pull/1577
|
||||||
if (param_ptr) {
|
if (param_ptr) {
|
||||||
*param_ptr = dequeued ? param : 0;
|
*param_ptr = dequeued ? param : 0;
|
||||||
|
|
|
@ -17,8 +17,7 @@ namespace kernel {
|
||||||
namespace xam {
|
namespace xam {
|
||||||
|
|
||||||
dword_result_t XamPartyGetUserList(dword_t player_count, lpdword_t party_list) {
|
dword_result_t XamPartyGetUserList(dword_t player_count, lpdword_t party_list) {
|
||||||
// Sonic & All-Stars Racing Transformed want specificly this code
|
// 5345085D wants specifically this code to skip loading party data.
|
||||||
// to skip loading party data.
|
|
||||||
// This code is not documented in NT_STATUS code list
|
// This code is not documented in NT_STATUS code list
|
||||||
return 0x807D0003;
|
return 0x807D0003;
|
||||||
}
|
}
|
||||||
|
|
|
@ -716,7 +716,7 @@ dword_result_t XamUserCreateAchievementEnumerator(dword_t title_id,
|
||||||
i, // dummy image id
|
i, // dummy image id
|
||||||
0,
|
0,
|
||||||
{0, 0},
|
{0, 0},
|
||||||
8}; // flags=8 makes dummy achievements show up in Crackdown's
|
8}; // flags=8 makes dummy achievements show up in 4D5307DC
|
||||||
// achievements list.
|
// achievements list.
|
||||||
e->AppendItem(item);
|
e->AppendItem(item);
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ DECLARE_XBDM_EXPORT1(DmCloseLoadedModules, kDebug, kStub);
|
||||||
MAKE_DUMMY_STUB_STATUS(DmFreePool);
|
MAKE_DUMMY_STUB_STATUS(DmFreePool);
|
||||||
|
|
||||||
dword_result_t DmGetXbeInfo() {
|
dword_result_t DmGetXbeInfo() {
|
||||||
// TODO(gibbed): Crackdown appears to expect this as success?
|
// TODO(gibbed): 4D5307DC appears to expect this as success?
|
||||||
// Unknown arguments -- let's hope things don't explode.
|
// Unknown arguments -- let's hope things don't explode.
|
||||||
return 0x02DA0000;
|
return 0x02DA0000;
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,7 +119,7 @@ static_assert_size(XMA_CONTEXT_INIT, 56);
|
||||||
|
|
||||||
dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
|
dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
|
||||||
pointer_t<XMA_CONTEXT_INIT> context_init) {
|
pointer_t<XMA_CONTEXT_INIT> context_init) {
|
||||||
// Input buffers may be null (buffer 1 in Tony Hawk's American Wasteland).
|
// Input buffers may be null (buffer 1 in 415607D4).
|
||||||
// Convert to host endianness.
|
// Convert to host endianness.
|
||||||
uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr;
|
uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr;
|
||||||
uint32_t input_buffer_0_physical_address = 0;
|
uint32_t input_buffer_0_physical_address = 0;
|
||||||
|
|
|
@ -47,8 +47,8 @@ void HandleSetThreadName(pointer_t<X_EXCEPTION_RECORD> record) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shadowrun (and its demo) has a bug where it ends up passing freed memory
|
// 4D5307D6 (and its demo) has a bug where it ends up passing freed memory for
|
||||||
// for the name, so at the point of SetThreadName it's filled with junk.
|
// the name, so at the point of SetThreadName it's filled with junk.
|
||||||
|
|
||||||
// TODO(gibbed): cvar for thread name encoding for conversion, some games use
|
// TODO(gibbed): cvar for thread name encoding for conversion, some games use
|
||||||
// SJIS and there's no way to automatically know this.
|
// SJIS and there's no way to automatically know this.
|
||||||
|
|
|
@ -49,10 +49,10 @@ static bool IsValidPath(const std::string_view s, bool is_pattern) {
|
||||||
if (got_asterisk) {
|
if (got_asterisk) {
|
||||||
// * must be followed by a . (*.)
|
// * must be followed by a . (*.)
|
||||||
//
|
//
|
||||||
// Viva Piñata: Party Animals (4D530819) has a bug in its game code where
|
// 4D530819 has a bug in its game code where it attempts to
|
||||||
// it attempts to FindFirstFile() with filters of "Game:\\*_X3.rkv",
|
// FindFirstFile() with filters of "Game:\\*_X3.rkv", "Game:\\m*_X3.rkv",
|
||||||
// "Game:\\m*_X3.rkv", and "Game:\\w*_X3.rkv" and will infinite loop if
|
// and "Game:\\w*_X3.rkv" and will infinite loop if the path filter is
|
||||||
// the path filter is allowed.
|
// allowed.
|
||||||
if (c != '.') {
|
if (c != '.') {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,7 +111,7 @@ struct X_OBJECT_TYPE {
|
||||||
|
|
||||||
class XObject {
|
class XObject {
|
||||||
public:
|
public:
|
||||||
// Burnout Paradise needs proper handle value for certain calculations
|
// 45410806 needs proper handle value for certain calculations
|
||||||
// It gets handle value from TLS (without base handle value is 0x88)
|
// It gets handle value from TLS (without base handle value is 0x88)
|
||||||
// and substract 0xF8000088. Without base we're receiving wrong address
|
// and substract 0xF8000088. Without base we're receiving wrong address
|
||||||
// Instead of receiving address that starts with 0x82... we're receiving
|
// Instead of receiving address that starts with 0x82... we're receiving
|
||||||
|
|
|
@ -1531,12 +1531,11 @@ bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) {
|
||||||
// Must invalidate here because the range being released may be reused in
|
// Must invalidate here because the range being released may be reused in
|
||||||
// another mapping of physical memory - but callback flags are set in each
|
// another mapping of physical memory - but callback flags are set in each
|
||||||
// heap separately (https://github.com/xenia-project/xenia/issues/1559 -
|
// heap separately (https://github.com/xenia-project/xenia/issues/1559 -
|
||||||
// dynamic vertices in Viva Pinata start screen and menu allocated in
|
// dynamic vertices in 4D5307F2 start screen and menu allocated in 0xA0000000
|
||||||
// 0xA0000000 at addresses that overlap intro video textures in 0xE0000000,
|
// at addresses that overlap intro video textures in 0xE0000000, with the
|
||||||
// with the state of the allocator as of February 24th, 2020). If memory is
|
// state of the allocator as of February 24th, 2020). If memory is invalidated
|
||||||
// invalidated in Alloc instead, Alloc won't be aware of callbacks enabled in
|
// in Alloc instead, Alloc won't be aware of callbacks enabled in other heaps,
|
||||||
// other heaps, thus callback handlers will keep considering this range valid
|
// thus callback handlers will keep considering this range valid forever.
|
||||||
// forever.
|
|
||||||
uint32_t region_size;
|
uint32_t region_size;
|
||||||
if (QuerySize(base_address, ®ion_size)) {
|
if (QuerySize(base_address, ®ion_size)) {
|
||||||
TriggerCallbacks(std::move(global_lock), base_address, region_size, true,
|
TriggerCallbacks(std::move(global_lock), base_address, region_size, true,
|
||||||
|
|
Loading…
Reference in New Issue