[Vulkan] Viewport from draw_util and vtx_fmt
This commit is contained in:
parent
c0550bafe9
commit
a94301d967
|
@ -168,10 +168,13 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
spv::Id type;
|
||||
};
|
||||
const SystemConstant system_constants[] = {
|
||||
{"flags", offsetof(SystemConstants, flags), type_uint_},
|
||||
{"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian),
|
||||
type_uint_},
|
||||
{"vertex_base_index", offsetof(SystemConstants, vertex_base_index),
|
||||
type_int_},
|
||||
{"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_},
|
||||
{"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_},
|
||||
};
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(xe::countof(system_constants));
|
||||
|
@ -997,6 +1000,133 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
|||
}
|
||||
|
||||
void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags));
|
||||
spv::Id system_constant_flags = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kOutputPerVertexMemberPosition));
|
||||
spv::Id position_ptr = builder_->createAccessChain(
|
||||
spv::StorageClassOutput, output_per_vertex_, id_vector_temp_);
|
||||
spv::Id guest_position = builder_->createLoad(position_ptr, spv::NoPrecision);
|
||||
|
||||
// Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W
|
||||
// into W.
|
||||
spv::Id position_w =
|
||||
builder_->createCompositeExtract(guest_position, type_float_, 3);
|
||||
spv::Id is_w_not_reciprocal = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, system_constant_flags,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_WNotReciprocal))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_w_inv = builder_->createBinOp(
|
||||
spv::OpFDiv, type_float_, const_float_1_, position_w);
|
||||
builder_->addDecoration(guest_position_w_inv, spv::DecorationNoContraction);
|
||||
position_w =
|
||||
builder_->createTriOp(spv::OpSelect, type_float_, is_w_not_reciprocal,
|
||||
position_w, guest_position_w_inv);
|
||||
|
||||
// Check if the shader returns XY/W rather than XY, and if it does, revert
|
||||
// that.
|
||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||
// affine interpolation.
|
||||
uint_vector_temp_.clear();
|
||||
uint_vector_temp_.reserve(2);
|
||||
uint_vector_temp_.push_back(0);
|
||||
uint_vector_temp_.push_back(1);
|
||||
spv::Id position_xy = builder_->createRvalueSwizzle(
|
||||
spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_);
|
||||
spv::Id is_xy_divided_by_w = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, system_constant_flags,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_XYDividedByW))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_xy_mul_w = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, type_float2_, position_xy, position_w);
|
||||
builder_->addDecoration(guest_position_xy_mul_w,
|
||||
spv::DecorationNoContraction);
|
||||
position_xy =
|
||||
builder_->createTriOp(spv::OpSelect, type_float2_, is_xy_divided_by_w,
|
||||
guest_position_xy_mul_w, position_xy);
|
||||
|
||||
// Check if the shader returns Z/W rather than Z, and if it does, revert that.
|
||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||
// affine interpolation.
|
||||
spv::Id position_z =
|
||||
builder_->createCompositeExtract(guest_position, type_float_, 2);
|
||||
spv::Id is_z_divided_by_w = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, system_constant_flags,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_ZDividedByW))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_z_mul_w =
|
||||
builder_->createBinOp(spv::OpFMul, type_float_, position_z, position_w);
|
||||
builder_->addDecoration(guest_position_z_mul_w, spv::DecorationNoContraction);
|
||||
position_z =
|
||||
builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w,
|
||||
guest_position_z_mul_w, position_z);
|
||||
|
||||
// Build XYZ of the position with W format handled.
|
||||
spv::Id position_xyz;
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(position_xy);
|
||||
composite_construct_op->addIdOperand(position_z);
|
||||
position_xyz = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
}
|
||||
|
||||
// Apply the NDC scale and offset for guest to host viewport transformation.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantNdcScale));
|
||||
spv::Id ndc_scale = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
position_xyz =
|
||||
builder_->createBinOp(spv::OpFMul, type_float3_, position_xyz, ndc_scale);
|
||||
builder_->addDecoration(position_xyz, spv::DecorationNoContraction);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantNdcOffset));
|
||||
spv::Id ndc_offset = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
spv::Id ndc_offset_mul_w = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, type_float3_, ndc_offset, position_w);
|
||||
builder_->addDecoration(ndc_offset_mul_w, spv::DecorationNoContraction);
|
||||
position_xyz = builder_->createBinOp(spv::OpFAdd, type_float3_, position_xyz,
|
||||
ndc_offset_mul_w);
|
||||
builder_->addDecoration(position_xyz, spv::DecorationNoContraction);
|
||||
|
||||
// Store the position converted to the host.
|
||||
spv::Id position;
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(position_xyz);
|
||||
composite_construct_op->addIdOperand(position_w);
|
||||
position = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
}
|
||||
builder_->createStore(position, position_ptr);
|
||||
|
||||
// Write 1 to point size (using a geometry shader or another kind of fallback
|
||||
// to expand point sprites - point size support is not guaranteed, and the
|
||||
// size would also be limited, and can't be controlled independently along two
|
||||
|
|
|
@ -25,12 +25,33 @@ namespace gpu {
|
|||
|
||||
class SpirvShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
enum : uint32_t {
|
||||
kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW_Shift,
|
||||
kSysFlag_WNotReciprocal_Shift,
|
||||
|
||||
kSysFlag_Count,
|
||||
|
||||
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
|
||||
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
||||
};
|
||||
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
|
||||
|
||||
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
||||
// - SystemConstantIndex enum.
|
||||
// - Structure members in BeginTranslation.
|
||||
struct SystemConstants {
|
||||
uint32_t flags;
|
||||
xenos::Endian vertex_index_endian;
|
||||
int32_t vertex_base_index;
|
||||
uint32_t padding_vertex_base_index;
|
||||
|
||||
float ndc_scale[3];
|
||||
uint32_t padding_ndc_scale;
|
||||
|
||||
float ndc_offset[3];
|
||||
uint32_t padding_ndc_offset;
|
||||
};
|
||||
|
||||
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
|
||||
|
@ -329,8 +350,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
spv::Id const_float2_0_1_;
|
||||
|
||||
enum SystemConstantIndex : unsigned int {
|
||||
kSystemConstantFlags,
|
||||
kSystemConstantIndexVertexIndexEndian,
|
||||
kSystemConstantIndexVertexBaseIndex,
|
||||
kSystemConstantNdcScale,
|
||||
kSystemConstantNdcOffset,
|
||||
};
|
||||
spv::Id uniform_system_constants_;
|
||||
spv::Id uniform_float_constants_;
|
||||
|
|
|
@ -686,14 +686,45 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
current_graphics_pipeline_layout_ = pipeline_layout;
|
||||
}
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
GetVulkanContext().GetVulkanProvider();
|
||||
const VkPhysicalDeviceProperties& device_properties =
|
||||
provider.device_properties();
|
||||
|
||||
// Get dynamic rasterizer state.
|
||||
draw_util::ViewportInfo viewport_info;
|
||||
// Just handling maxViewportDimensions is enough - viewportBoundsRange[1] must
|
||||
// be at least 2 * max(maxViewportDimensions[0...1]) - 1, and
|
||||
// maxViewportDimensions must be greater than or equal to the size of the
|
||||
// largest possible framebuffer attachment (if the viewport has positive
|
||||
// offset and is between maxViewportDimensions and viewportBoundsRange[1],
|
||||
// GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the
|
||||
// clamped range will be outside the largest possible framebuffer anyway.
|
||||
// TODO(Triang3l): Possibly handle maxViewportDimensions and
|
||||
// viewportBoundsRange separately because when using fragment shader
|
||||
// interlocks, framebuffers are not used, while the range may be wider than
|
||||
// dimensions? Though viewport bigger than 4096 - the smallest possible
|
||||
// maximum dimension (which is below the 8192 texture size limit on the Xbox
|
||||
// 360) - and with offset, is probably a situation that never happens in real
|
||||
// life. Or even disregard the viewport bounds range in the fragment shader
|
||||
// interlocks case completely - apply the viewport and the scissor offset
|
||||
// directly to pixel address and to things like ps_param_gen.
|
||||
draw_util::GetHostViewportInfo(
|
||||
regs, 1.0f, 1.0f, false,
|
||||
float(device_properties.limits.maxViewportDimensions[0]),
|
||||
float(device_properties.limits.maxViewportDimensions[1]), true,
|
||||
viewport_info);
|
||||
|
||||
// Update fixed-function dynamic state.
|
||||
UpdateFixedFunctionState();
|
||||
UpdateFixedFunctionState(viewport_info);
|
||||
|
||||
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
||||
|
||||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
||||
: xenos::Endian::kNone);
|
||||
UpdateSystemConstantValues(
|
||||
indexed ? index_buffer_info->endianness : xenos::Endian::kNone,
|
||||
viewport_info);
|
||||
|
||||
// Update uniform buffers and descriptor sets after binding the pipeline with
|
||||
// the new layout.
|
||||
|
@ -701,8 +732,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
return false;
|
||||
}
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Ensure vertex buffers are resident.
|
||||
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
||||
// validity is tracked.
|
||||
|
@ -1229,7 +1258,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags()
|
|||
return stages;
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::UpdateFixedFunctionState() {
|
||||
void VulkanCommandProcessor::UpdateFixedFunctionState(
|
||||
const draw_util::ViewportInfo& viewport_info) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
@ -1245,53 +1275,13 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() {
|
|||
uint32_t pixel_size_x = 1, pixel_size_y = 1;
|
||||
|
||||
// Viewport.
|
||||
// PA_CL_VTE_CNTL contains whether offsets and scales are enabled.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// In games, either all are enabled (for regular drawing) or none are (for
|
||||
// rectangle lists usually).
|
||||
//
|
||||
// If scale/offset is enabled, the Xenos shader is writing (neglecting W
|
||||
// division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1)
|
||||
// box. If it's not, the position is in screen space. Since we can only use
|
||||
// the NDC in PC APIs, we use a viewport of the largest possible size, and
|
||||
// divide the position by it in translated shaders.
|
||||
//
|
||||
// TODO(Triang3l): Move all of this to draw_util.
|
||||
// TODO(Triang3l): Limit the viewport if exceeding the device limit; move to
|
||||
// NDC scale/offset constants.
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
float viewport_scale_x =
|
||||
pa_cl_vte_cntl.vport_x_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
|
||||
: 4096.0f;
|
||||
float viewport_scale_y =
|
||||
pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
|
||||
: 4096.0f;
|
||||
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
||||
: 1.0f;
|
||||
float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||
: std::abs(viewport_scale_x);
|
||||
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: std::abs(viewport_scale_y);
|
||||
float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
||||
: 0.0f;
|
||||
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
||||
viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
|
||||
viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
|
||||
}
|
||||
VkViewport viewport;
|
||||
viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x);
|
||||
viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y);
|
||||
viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x);
|
||||
viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y);
|
||||
viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f);
|
||||
viewport.maxDepth =
|
||||
std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f);
|
||||
viewport.x = viewport_info.left;
|
||||
viewport.y = viewport_info.top;
|
||||
viewport.width = viewport_info.width;
|
||||
viewport.height = viewport_info.height;
|
||||
viewport.minDepth = viewport_info.z_min;
|
||||
viewport.maxDepth = viewport_info.z_max;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width;
|
||||
|
@ -1326,16 +1316,39 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() {
|
|||
}
|
||||
|
||||
void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||
xenos::Endian index_endian) {
|
||||
xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
||||
|
||||
bool dirty = false;
|
||||
|
||||
// Flags.
|
||||
uint32_t flags = 0;
|
||||
// W0 division control.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
||||
// = false: multiply the X, Y coordinates by 1/W0.
|
||||
// 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
||||
// = false: multiply the Z coordinate by 1/W0.
|
||||
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
|
||||
// to get 1/W0.
|
||||
if (pa_cl_vte_cntl.vtx_xy_fmt) {
|
||||
flags |= SpirvShaderTranslator::kSysFlag_XYDividedByW;
|
||||
}
|
||||
if (pa_cl_vte_cntl.vtx_z_fmt) {
|
||||
flags |= SpirvShaderTranslator::kSysFlag_ZDividedByW;
|
||||
}
|
||||
if (pa_cl_vte_cntl.vtx_w0_fmt) {
|
||||
flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal;
|
||||
}
|
||||
dirty |= system_constants_.flags != flags;
|
||||
system_constants_.flags = flags;
|
||||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian != index_endian;
|
||||
system_constants_.vertex_index_endian = index_endian;
|
||||
|
@ -1344,6 +1357,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// Conversion to host normalized device coordinates.
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
|
||||
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
|
||||
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
|
||||
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "xenia/gpu/command_processor.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
|
||||
|
@ -170,8 +171,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
|
||||
VkShaderStageFlags GetGuestVertexShaderStageFlags() const;
|
||||
|
||||
void UpdateFixedFunctionState();
|
||||
void UpdateSystemConstantValues(xenos::Endian index_endian);
|
||||
void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info);
|
||||
void UpdateSystemConstantValues(xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info);
|
||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader);
|
||||
// Allocates a descriptor, space in the uniform buffer pool, and fills the
|
||||
|
|
Loading…
Reference in New Issue