[GPU] xenos.h: max texture size, interpolators
This commit is contained in:
parent
79413345af
commit
b84239d507
|
@ -1123,12 +1123,12 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader,
|
|||
// drawcall.
|
||||
// https://www.google.com/patents/US20060055701
|
||||
uint16_t extents[] = {
|
||||
0 >> 3, // min x
|
||||
8192 >> 3, // max x
|
||||
0 >> 3, // min y
|
||||
8192 >> 3, // max y
|
||||
0, // min z
|
||||
1, // max z
|
||||
0 >> 3, // min x
|
||||
xenos::kTexture2DCubeMaxWidthHeight >> 3, // max x
|
||||
0 >> 3, // min y
|
||||
xenos::kTexture2DCubeMaxWidthHeight >> 3, // max y
|
||||
0, // min z
|
||||
1, // max z
|
||||
};
|
||||
assert_true(endianness == xenos::Endian::k8in16);
|
||||
xe::copy_and_swap_16_unaligned(memory_->TranslatePhysical(address), extents,
|
||||
|
|
|
@ -3065,11 +3065,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// Conversion to Direct3D 12 normalized device coordinates.
|
||||
// See viewport configuration in UpdateFixedFunctionState for explanations.
|
||||
// X and Y scale/offset is to convert unnormalized coordinates generated by
|
||||
// shaders (for rectangle list drawing, for instance) to the 8192x8192
|
||||
// viewport (the maximum render target size) that is used to emulate
|
||||
// unnormalized coordinates. Z scale/offset is to convert from OpenGL NDC to
|
||||
// Direct3D NDC if needed. Also apply half-pixel offset to reproduce Direct3D
|
||||
// 9 rasterization rules - must be done before clipping, not through the
|
||||
// shaders (for rectangle list drawing, for instance) to the viewport of the
|
||||
// largest possible render target size that is used to emulate unnormalized
|
||||
// coordinates. Z scale/offset is to convert from OpenGL NDC to Direct3D NDC
|
||||
// if needed. Also apply half-pixel offset to reproduce Direct3D 9
|
||||
// rasterization rules - must be done before clipping, not through the
|
||||
// viewport, for SSAA and resolution scale to work correctly.
|
||||
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
|
||||
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
|
||||
|
@ -3116,14 +3116,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
ndc_offset_x += 0.5f / viewport_scale_x;
|
||||
}
|
||||
} else {
|
||||
ndc_offset_x += 1.0f / 8192.0f;
|
||||
ndc_offset_x += 1.0f / xenos::kTexture2DCubeMaxWidthHeight;
|
||||
}
|
||||
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
||||
if (viewport_scale_y != 0.0f) {
|
||||
ndc_offset_y += 0.5f / viewport_scale_y;
|
||||
}
|
||||
} else {
|
||||
ndc_offset_y -= 1.0f / 8192.0f;
|
||||
ndc_offset_y -= 1.0f / xenos::kTexture2DCubeMaxWidthHeight;
|
||||
}
|
||||
}
|
||||
dirty |= system_constants_.ndc_scale[0] != ndc_scale_x;
|
||||
|
@ -3158,13 +3158,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
point_screen_to_ndc_x =
|
||||
(viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f;
|
||||
} else {
|
||||
point_screen_to_ndc_x = 1.0f / 8192.0f;
|
||||
point_screen_to_ndc_x = 1.0f / xenos::kTexture2DCubeMaxWidthHeight;
|
||||
}
|
||||
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
||||
point_screen_to_ndc_y =
|
||||
(viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f;
|
||||
} else {
|
||||
point_screen_to_ndc_y = -1.0f / 8192.0f;
|
||||
point_screen_to_ndc_y = -1.0f / xenos::kTexture2DCubeMaxWidthHeight;
|
||||
}
|
||||
dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x;
|
||||
dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y;
|
||||
|
|
|
@ -1487,7 +1487,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
// of the buffer.
|
||||
root_constants.tile_sample_dest_base -= dest_address & ~0xFFFu;
|
||||
}
|
||||
assert_true(dest_pitch <= 8192);
|
||||
assert_true(dest_pitch <= xenos::kTexture2DCubeMaxWidthHeight);
|
||||
root_constants.tile_sample_dest_info =
|
||||
((dest_pitch + 31) >> 5) |
|
||||
(rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9)
|
||||
|
|
|
@ -219,10 +219,10 @@ class D3D12CommandProcessor;
|
|||
// other, and because the height is unknown (and the viewport and scissor are
|
||||
// not always present - D3DPT_RECTLIST is used very commonly, especially for
|
||||
// clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and
|
||||
// copying, and it's usually drawn without a viewport and with 8192x8192
|
||||
// scissor), there may be cases of simultaneously bound render targets
|
||||
// overlapping each other in the EDRAM in a way that is difficult to resolve,
|
||||
// and stores/loads may destroy data.
|
||||
// copying, and it's usually drawn without a viewport and with the scissor of
|
||||
// the maximum possible size), there may be cases of simultaneously bound
|
||||
// render targets overlapping each other in the EDRAM in a way that is
|
||||
// difficult to resolve, and stores/loads may destroy data.
|
||||
//
|
||||
// =============================================================================
|
||||
// 2x width and height scaling implementation:
|
||||
|
|
|
@ -2189,7 +2189,9 @@ void TextureCache::BindingInfoFromFetchConstant(
|
|||
// No texture data at all.
|
||||
return;
|
||||
}
|
||||
if (fetch.dimension == xenos::DataDimension::k1D && width > 8192) {
|
||||
// TODO(Triang3l): Support long 1D textures.
|
||||
if (fetch.dimension == xenos::DataDimension::k1D &&
|
||||
width > xenos::kTexture2DCubeMaxWidthHeight) {
|
||||
XELOGE(
|
||||
"1D texture is too wide ({}) - ignoring! "
|
||||
"Report the game to Xenia developers",
|
||||
|
|
|
@ -450,7 +450,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
|
||||
void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
||||
// Zero the interpolators.
|
||||
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i),
|
||||
DxbcSrc::LF(0.0f));
|
||||
}
|
||||
|
@ -647,7 +647,8 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f));
|
||||
}
|
||||
|
||||
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
|
||||
uint32_t interpolator_count =
|
||||
std::min(xenos::kMaxInterpolators, register_count());
|
||||
if (interpolator_count != 0) {
|
||||
// Copy interpolants to GPRs.
|
||||
if (edram_rov_used_) {
|
||||
|
@ -960,7 +961,7 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
|
||||
// Zero general-purpose registers to prevent crashes when the game
|
||||
// references them after only initializing them conditionally.
|
||||
for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0;
|
||||
for (uint32_t i = IsDxbcPixelShader() ? xenos::kMaxInterpolators : 0;
|
||||
i < register_count(); ++i) {
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
|
@ -2813,13 +2814,13 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
// Intepolators (TEXCOORD#).
|
||||
size_t interpolator_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() +
|
||||
kInterpolatorCount * kParameterDwords);
|
||||
parameter_count += kInterpolatorCount;
|
||||
xenos::kMaxInterpolators * kParameterDwords);
|
||||
parameter_count += xenos::kMaxInterpolators;
|
||||
{
|
||||
DxbcSignatureParameter* interpolators =
|
||||
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
|
||||
interpolator_position);
|
||||
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
DxbcSignatureParameter& interpolator = interpolators[i];
|
||||
interpolator.semantic_index = i;
|
||||
interpolator.component_type =
|
||||
|
@ -2909,7 +2910,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
DxbcSignatureParameter* interpolators =
|
||||
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
|
||||
interpolator_position);
|
||||
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
interpolators[i].semantic_name = semantic_offset;
|
||||
}
|
||||
DxbcSignatureParameter& point_parameters =
|
||||
|
@ -3077,13 +3078,13 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
// Intepolators (TEXCOORD#).
|
||||
size_t interpolator_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() +
|
||||
kInterpolatorCount * kParameterDwords);
|
||||
parameter_count += kInterpolatorCount;
|
||||
xenos::kMaxInterpolators * kParameterDwords);
|
||||
parameter_count += xenos::kMaxInterpolators;
|
||||
{
|
||||
DxbcSignatureParameter* interpolators =
|
||||
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
|
||||
interpolator_position);
|
||||
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
DxbcSignatureParameter& interpolator = interpolators[i];
|
||||
interpolator.semantic_index = i;
|
||||
interpolator.component_type =
|
||||
|
@ -3197,7 +3198,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
DxbcSignatureParameter* interpolators =
|
||||
reinterpret_cast<DxbcSignatureParameter*>(shader_object_.data() +
|
||||
interpolator_position);
|
||||
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
interpolators[i].semantic_name = semantic_offset;
|
||||
}
|
||||
DxbcSignatureParameter& point_parameters =
|
||||
|
@ -3665,7 +3666,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
}
|
||||
// Interpolator output.
|
||||
for (uint32_t i = 0; i < kInterpolatorCount; ++i) {
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
|
@ -3727,7 +3728,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// Interpolator input.
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
uint32_t interpolator_count =
|
||||
std::min(kInterpolatorCount, register_count());
|
||||
std::min(xenos::kMaxInterpolators, register_count());
|
||||
for (uint32_t i = 0; i < interpolator_count; ++i) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) |
|
||||
|
|
|
@ -2049,8 +2049,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
static_assert(kSysConst_Count <= 64,
|
||||
"Too many system constants, can't use uint64_t for usage bits");
|
||||
|
||||
static constexpr uint32_t kInterpolatorCount = 16;
|
||||
static constexpr uint32_t kPointParametersTexCoord = kInterpolatorCount;
|
||||
static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators;
|
||||
static constexpr uint32_t kClipSpaceZWTexCoord = kPointParametersTexCoord + 1;
|
||||
|
||||
enum class InOutRegister : uint32_t {
|
||||
|
@ -2061,7 +2060,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kDSInControlPointIndex = 0,
|
||||
|
||||
kVSDSOutInterpolators = 0,
|
||||
kVSDSOutPointParameters = kVSDSOutInterpolators + kInterpolatorCount,
|
||||
kVSDSOutPointParameters = kVSDSOutInterpolators + xenos::kMaxInterpolators,
|
||||
kVSDSOutClipSpaceZW,
|
||||
kVSDSOutPosition,
|
||||
// Clip and cull distances must be tightly packed in Direct3D!
|
||||
|
@ -2073,7 +2072,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// kill.
|
||||
|
||||
kPSInInterpolators = 0,
|
||||
kPSInPointParameters = kPSInInterpolators + kInterpolatorCount,
|
||||
kPSInPointParameters = kPSInInterpolators + xenos::kMaxInterpolators,
|
||||
kPSInClipSpaceZW,
|
||||
kPSInPosition,
|
||||
kPSInFrontFace,
|
||||
|
|
|
@ -549,6 +549,8 @@ enum class VertexShaderExportMode : uint32_t {
|
|||
kMultipass = 7,
|
||||
};
|
||||
|
||||
constexpr uint32_t kMaxInterpolators = 16;
|
||||
|
||||
enum class SampleControl : uint32_t {
|
||||
kCentroidsOnly = 0,
|
||||
kCentersOnly = 1,
|
||||
|
@ -570,10 +572,10 @@ inline uint32_t GetInterpolatorSamplingPattern(
|
|||
uint32_t interpolator_control_sampling_pattern) {
|
||||
if (msaa_samples == MsaaSamples::k1X ||
|
||||
sample_control == SampleControl::kCentersOnly) {
|
||||
return ((1 << 16) - 1) * uint32_t(SampleLocation::kCenter);
|
||||
return ((1 << kMaxInterpolators) - 1) * uint32_t(SampleLocation::kCenter);
|
||||
}
|
||||
if (sample_control == SampleControl::kCentroidsOnly) {
|
||||
return ((1 << 16) - 1) * uint32_t(SampleLocation::kCentroid);
|
||||
return ((1 << kMaxInterpolators) - 1) * uint32_t(SampleLocation::kCentroid);
|
||||
}
|
||||
assert_true(sample_control == SampleControl::kCentroidsAndCenters);
|
||||
return interpolator_control_sampling_pattern;
|
||||
|
@ -722,6 +724,21 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
|
|||
});
|
||||
});
|
||||
|
||||
// Texture fetch constant size field widths.
|
||||
constexpr uint32_t kTexture1DMaxWidthLog2 = 24;
|
||||
constexpr uint32_t kTexture1DMaxWidth = uint32_t(1) << kTexture1DMaxWidthLog2;
|
||||
constexpr uint32_t kTexture2DCubeMaxWidthHeightLog2 = 13;
|
||||
constexpr uint32_t kTexture2DCubeMaxWidthHeight =
|
||||
uint32_t(1) << kTexture2DCubeMaxWidthHeightLog2;
|
||||
constexpr uint32_t kTexture2DMaxStackDepthLog2 = 6;
|
||||
constexpr uint32_t kTexture2DMaxStackDepth = uint32_t(1)
|
||||
<< kTexture2DMaxStackDepthLog2;
|
||||
constexpr uint32_t kTexture3DMaxWidthHeightLog2 = 11;
|
||||
constexpr uint32_t kTexture3DMaxWidthHeight = uint32_t(1)
|
||||
<< kTexture3DMaxWidthHeightLog2;
|
||||
constexpr uint32_t kTexture3DMaxDepthLog2 = 10;
|
||||
constexpr uint32_t kTexture3DMaxDepth = uint32_t(1) << kTexture3DMaxDepthLog2;
|
||||
|
||||
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
|
||||
XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
|
@ -752,6 +769,7 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
|||
uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl
|
||||
uint32_t base_address : 20; // +12 base address >> 12
|
||||
|
||||
// Size is stored with 1 subtracted from each component.
|
||||
union { // dword_2
|
||||
struct {
|
||||
uint32_t width : 24;
|
||||
|
|
Loading…
Reference in New Issue