[GPU] xenos.h: max texture size, interpolators

This commit is contained in:
Triang3l 2020-07-11 18:56:56 +03:00
parent 79413345af
commit b84239d507
8 changed files with 60 additions and 40 deletions

View File

@ -1123,12 +1123,12 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader,
// drawcall.
// https://www.google.com/patents/US20060055701
uint16_t extents[] = {
0 >> 3, // min x
8192 >> 3, // max x
0 >> 3, // min y
8192 >> 3, // max y
0, // min z
1, // max z
0 >> 3, // min x
xenos::kTexture2DCubeMaxWidthHeight >> 3, // max x
0 >> 3, // min y
xenos::kTexture2DCubeMaxWidthHeight >> 3, // max y
0, // min z
1, // max z
};
assert_true(endianness == xenos::Endian::k8in16);
xe::copy_and_swap_16_unaligned(memory_->TranslatePhysical(address), extents,

View File

@ -3065,11 +3065,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Conversion to Direct3D 12 normalized device coordinates.
// See viewport configuration in UpdateFixedFunctionState for explanations.
// X and Y scale/offset is to convert unnormalized coordinates generated by
// shaders (for rectangle list drawing, for instance) to the 8192x8192
// viewport (the maximum render target size) that is used to emulate
// unnormalized coordinates. Z scale/offset is to convert from OpenGL NDC to
// Direct3D NDC if needed. Also apply half-pixel offset to reproduce Direct3D
// 9 rasterization rules - must be done before clipping, not through the
// shaders (for rectangle list drawing, for instance) to the viewport of the
// largest possible render target size that is used to emulate unnormalized
// coordinates. Z scale/offset is to convert from OpenGL NDC to Direct3D NDC
// if needed. Also apply half-pixel offset to reproduce Direct3D 9
// rasterization rules - must be done before clipping, not through the
// viewport, for SSAA and resolution scale to work correctly.
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
@ -3116,14 +3116,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
ndc_offset_x += 0.5f / viewport_scale_x;
}
} else {
ndc_offset_x += 1.0f / 8192.0f;
ndc_offset_x += 1.0f / xenos::kTexture2DCubeMaxWidthHeight;
}
if (pa_cl_vte_cntl.vport_y_scale_ena) {
if (viewport_scale_y != 0.0f) {
ndc_offset_y += 0.5f / viewport_scale_y;
}
} else {
ndc_offset_y -= 1.0f / 8192.0f;
ndc_offset_y -= 1.0f / xenos::kTexture2DCubeMaxWidthHeight;
}
}
dirty |= system_constants_.ndc_scale[0] != ndc_scale_x;
@ -3158,13 +3158,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
point_screen_to_ndc_x =
(viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f;
} else {
point_screen_to_ndc_x = 1.0f / 8192.0f;
point_screen_to_ndc_x = 1.0f / xenos::kTexture2DCubeMaxWidthHeight;
}
if (pa_cl_vte_cntl.vport_y_scale_ena) {
point_screen_to_ndc_y =
(viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f;
} else {
point_screen_to_ndc_y = -1.0f / 8192.0f;
point_screen_to_ndc_y = -1.0f / xenos::kTexture2DCubeMaxWidthHeight;
}
dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x;
dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y;

View File

@ -1487,7 +1487,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// of the buffer.
root_constants.tile_sample_dest_base -= dest_address & ~0xFFFu;
}
assert_true(dest_pitch <= 8192);
assert_true(dest_pitch <= xenos::kTexture2DCubeMaxWidthHeight);
root_constants.tile_sample_dest_info =
((dest_pitch + 31) >> 5) |
(rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9)

View File

@ -219,10 +219,10 @@ class D3D12CommandProcessor;
// other, and because the height is unknown (and the viewport and scissor are
// not always present - D3DPT_RECTLIST is used very commonly, especially for
// clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and
// copying, and it's usually drawn without a viewport and with 8192x8192
// scissor), there may be cases of simultaneously bound render targets
// overlapping each other in the EDRAM in a way that is difficult to resolve,
// and stores/loads may destroy data.
// copying, and it's usually drawn without a viewport and with the scissor of
// the maximum possible size), there may be cases of simultaneously bound
// render targets overlapping each other in the EDRAM in a way that is
// difficult to resolve, and stores/loads may destroy data.
//
// =============================================================================
// 2x width and height scaling implementation:

View File

@ -2189,7 +2189,9 @@ void TextureCache::BindingInfoFromFetchConstant(
// No texture data at all.
return;
}
if (fetch.dimension == xenos::DataDimension::k1D && width > 8192) {
// TODO(Triang3l): Support long 1D textures.
if (fetch.dimension == xenos::DataDimension::k1D &&
width > xenos::kTexture2DCubeMaxWidthHeight) {
XELOGE(
"1D texture is too wide ({}) - ignoring! "
"Report the game to Xenia developers",

View File

@ -450,7 +450,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
void DxbcShaderTranslator::StartVertexOrDomainShader() {
// Zero the interpolators.
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i),
DxbcSrc::LF(0.0f));
}
@ -647,7 +647,8 @@ void DxbcShaderTranslator::StartPixelShader() {
DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f));
}
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
uint32_t interpolator_count =
std::min(xenos::kMaxInterpolators, register_count());
if (interpolator_count != 0) {
// Copy interpolants to GPRs.
if (edram_rov_used_) {
@ -960,7 +961,7 @@ void DxbcShaderTranslator::StartTranslation() {
// Zero general-purpose registers to prevent crashes when the game
// references them after only initializing them conditionally.
for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0;
for (uint32_t i = IsDxbcPixelShader() ? xenos::kMaxInterpolators : 0;
i < register_count(); ++i) {
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
: DxbcDest::R(i),
@ -2813,13 +2814,13 @@ void DxbcShaderTranslator::WriteInputSignature() {
// Intepolators (TEXCOORD#).
size_t interpolator_position = shader_object_.size();
shader_object_.resize(shader_object_.size() +
kInterpolatorCount * kParameterDwords);
parameter_count += kInterpolatorCount;
xenos::kMaxInterpolators * kParameterDwords);
parameter_count += xenos::kMaxInterpolators;
{
DxbcSignatureParameter* interpolators =
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
interpolator_position);
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
DxbcSignatureParameter& interpolator = interpolators[i];
interpolator.semantic_index = i;
interpolator.component_type =
@ -2909,7 +2910,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
DxbcSignatureParameter* interpolators =
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
interpolator_position);
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
interpolators[i].semantic_name = semantic_offset;
}
DxbcSignatureParameter& point_parameters =
@ -3077,13 +3078,13 @@ void DxbcShaderTranslator::WriteOutputSignature() {
// Intepolators (TEXCOORD#).
size_t interpolator_position = shader_object_.size();
shader_object_.resize(shader_object_.size() +
kInterpolatorCount * kParameterDwords);
parameter_count += kInterpolatorCount;
xenos::kMaxInterpolators * kParameterDwords);
parameter_count += xenos::kMaxInterpolators;
{
DxbcSignatureParameter* interpolators =
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
interpolator_position);
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
DxbcSignatureParameter& interpolator = interpolators[i];
interpolator.semantic_index = i;
interpolator.component_type =
@ -3197,7 +3198,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
DxbcSignatureParameter* interpolators =
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
interpolator_position);
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
interpolators[i].semantic_name = semantic_offset;
}
DxbcSignatureParameter& point_parameters =
@ -3665,7 +3666,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
}
// Interpolator output.
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
@ -3727,7 +3728,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Interpolator input.
if (!is_depth_only_pixel_shader_) {
uint32_t interpolator_count =
std::min(kInterpolatorCount, register_count());
std::min(xenos::kMaxInterpolators, register_count());
for (uint32_t i = 0; i < interpolator_count; ++i) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) |

View File

@ -2049,8 +2049,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
static_assert(kSysConst_Count <= 64,
"Too many system constants, can't use uint64_t for usage bits");
static constexpr uint32_t kInterpolatorCount = 16;
static constexpr uint32_t kPointParametersTexCoord = kInterpolatorCount;
static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators;
static constexpr uint32_t kClipSpaceZWTexCoord = kPointParametersTexCoord + 1;
enum class InOutRegister : uint32_t {
@ -2061,7 +2060,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kDSInControlPointIndex = 0,
kVSDSOutInterpolators = 0,
kVSDSOutPointParameters = kVSDSOutInterpolators + kInterpolatorCount,
kVSDSOutPointParameters = kVSDSOutInterpolators + xenos::kMaxInterpolators,
kVSDSOutClipSpaceZW,
kVSDSOutPosition,
// Clip and cull distances must be tightly packed in Direct3D!
@ -2073,7 +2072,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// kill.
kPSInInterpolators = 0,
kPSInPointParameters = kPSInInterpolators + kInterpolatorCount,
kPSInPointParameters = kPSInInterpolators + xenos::kMaxInterpolators,
kPSInClipSpaceZW,
kPSInPosition,
kPSInFrontFace,

View File

@ -549,6 +549,8 @@ enum class VertexShaderExportMode : uint32_t {
kMultipass = 7,
};
constexpr uint32_t kMaxInterpolators = 16;
enum class SampleControl : uint32_t {
kCentroidsOnly = 0,
kCentersOnly = 1,
@ -570,10 +572,10 @@ inline uint32_t GetInterpolatorSamplingPattern(
uint32_t interpolator_control_sampling_pattern) {
if (msaa_samples == MsaaSamples::k1X ||
sample_control == SampleControl::kCentersOnly) {
return ((1 << 16) - 1) * uint32_t(SampleLocation::kCenter);
return ((1 << kMaxInterpolators) - 1) * uint32_t(SampleLocation::kCenter);
}
if (sample_control == SampleControl::kCentroidsOnly) {
return ((1 << 16) - 1) * uint32_t(SampleLocation::kCentroid);
return ((1 << kMaxInterpolators) - 1) * uint32_t(SampleLocation::kCentroid);
}
assert_true(sample_control == SampleControl::kCentroidsAndCenters);
return interpolator_control_sampling_pattern;
@ -722,6 +724,21 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
});
});
// Texture fetch constant size field widths.
constexpr uint32_t kTexture1DMaxWidthLog2 = 24;
constexpr uint32_t kTexture1DMaxWidth = uint32_t(1) << kTexture1DMaxWidthLog2;
constexpr uint32_t kTexture2DCubeMaxWidthHeightLog2 = 13;
constexpr uint32_t kTexture2DCubeMaxWidthHeight =
uint32_t(1) << kTexture2DCubeMaxWidthHeightLog2;
constexpr uint32_t kTexture2DMaxStackDepthLog2 = 6;
constexpr uint32_t kTexture2DMaxStackDepth = uint32_t(1)
<< kTexture2DMaxStackDepthLog2;
constexpr uint32_t kTexture3DMaxWidthHeightLog2 = 11;
constexpr uint32_t kTexture3DMaxWidthHeight = uint32_t(1)
<< kTexture3DMaxWidthHeightLog2;
constexpr uint32_t kTexture3DMaxDepthLog2 = 10;
constexpr uint32_t kTexture3DMaxDepth = uint32_t(1) << kTexture3DMaxDepthLog2;
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
XEPACKEDUNION(xe_gpu_texture_fetch_t, {
XEPACKEDSTRUCTANONYMOUS({
@ -752,6 +769,7 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl
uint32_t base_address : 20; // +12 base address >> 12
// Size is stored with 1 subtracted from each component.
union { // dword_2
struct {
uint32_t width : 24;