[Vulkan] Viewport from draw_util and vtx_fmt

This commit is contained in:
Triang3l 2020-11-18 12:48:12 +03:00
parent c0550bafe9
commit a94301d967
4 changed files with 232 additions and 55 deletions

View File

@ -168,10 +168,13 @@ void SpirvShaderTranslator::StartTranslation() {
spv::Id type;
};
const SystemConstant system_constants[] = {
{"flags", offsetof(SystemConstants, flags), type_uint_},
{"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian),
type_uint_},
{"vertex_base_index", offsetof(SystemConstants, vertex_base_index),
type_int_},
{"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_},
{"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_},
};
id_vector_temp_.clear();
id_vector_temp_.reserve(xe::countof(system_constants));
@ -997,6 +1000,133 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
}
void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags));
spv::Id system_constant_flags = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision);
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kOutputPerVertexMemberPosition));
spv::Id position_ptr = builder_->createAccessChain(
spv::StorageClassOutput, output_per_vertex_, id_vector_temp_);
spv::Id guest_position = builder_->createLoad(position_ptr, spv::NoPrecision);
// Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W
// into W.
spv::Id position_w =
builder_->createCompositeExtract(guest_position, type_float_, 3);
spv::Id is_w_not_reciprocal = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, system_constant_flags,
builder_->makeUintConstant(
static_cast<unsigned int>(kSysFlag_WNotReciprocal))),
const_uint_0_);
spv::Id guest_position_w_inv = builder_->createBinOp(
spv::OpFDiv, type_float_, const_float_1_, position_w);
builder_->addDecoration(guest_position_w_inv, spv::DecorationNoContraction);
position_w =
builder_->createTriOp(spv::OpSelect, type_float_, is_w_not_reciprocal,
position_w, guest_position_w_inv);
// Check if the shader returns XY/W rather than XY, and if it does, revert
// that.
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
// affine interpolation.
uint_vector_temp_.clear();
uint_vector_temp_.reserve(2);
uint_vector_temp_.push_back(0);
uint_vector_temp_.push_back(1);
spv::Id position_xy = builder_->createRvalueSwizzle(
spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_);
spv::Id is_xy_divided_by_w = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, system_constant_flags,
builder_->makeUintConstant(
static_cast<unsigned int>(kSysFlag_XYDividedByW))),
const_uint_0_);
spv::Id guest_position_xy_mul_w = builder_->createBinOp(
spv::OpVectorTimesScalar, type_float2_, position_xy, position_w);
builder_->addDecoration(guest_position_xy_mul_w,
spv::DecorationNoContraction);
position_xy =
builder_->createTriOp(spv::OpSelect, type_float2_, is_xy_divided_by_w,
guest_position_xy_mul_w, position_xy);
// Check if the shader returns Z/W rather than Z, and if it does, revert that.
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
// affine interpolation.
spv::Id position_z =
builder_->createCompositeExtract(guest_position, type_float_, 2);
spv::Id is_z_divided_by_w = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, system_constant_flags,
builder_->makeUintConstant(
static_cast<unsigned int>(kSysFlag_ZDividedByW))),
const_uint_0_);
spv::Id guest_position_z_mul_w =
builder_->createBinOp(spv::OpFMul, type_float_, position_z, position_w);
builder_->addDecoration(guest_position_z_mul_w, spv::DecorationNoContraction);
position_z =
builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w,
guest_position_z_mul_w, position_z);
// Build XYZ of the position with W format handled.
spv::Id position_xyz;
{
std::unique_ptr<spv::Instruction> composite_construct_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct);
composite_construct_op->addIdOperand(position_xy);
composite_construct_op->addIdOperand(position_z);
position_xyz = composite_construct_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(composite_construct_op));
}
// Apply the NDC scale and offset for guest to host viewport transformation.
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantNdcScale));
spv::Id ndc_scale = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision);
position_xyz =
builder_->createBinOp(spv::OpFMul, type_float3_, position_xyz, ndc_scale);
builder_->addDecoration(position_xyz, spv::DecorationNoContraction);
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantNdcOffset));
spv::Id ndc_offset = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision);
spv::Id ndc_offset_mul_w = builder_->createBinOp(
spv::OpVectorTimesScalar, type_float3_, ndc_offset, position_w);
builder_->addDecoration(ndc_offset_mul_w, spv::DecorationNoContraction);
position_xyz = builder_->createBinOp(spv::OpFAdd, type_float3_, position_xyz,
ndc_offset_mul_w);
builder_->addDecoration(position_xyz, spv::DecorationNoContraction);
// Store the position converted to the host.
spv::Id position;
{
std::unique_ptr<spv::Instruction> composite_construct_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct);
composite_construct_op->addIdOperand(position_xyz);
composite_construct_op->addIdOperand(position_w);
position = composite_construct_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(composite_construct_op));
}
builder_->createStore(position, position_ptr);
// Write 1 to point size (using a geometry shader or another kind of fallback
// to expand point sprites - point size support is not guaranteed, and the
// size would also be limited, and can't be controlled independently along two

View File

@ -25,12 +25,33 @@ namespace gpu {
class SpirvShaderTranslator : public ShaderTranslator {
public:
enum : uint32_t {
kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal_Shift,
kSysFlag_Count,
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
};
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - SystemConstantIndex enum.
// - Structure members in BeginTranslation.
struct SystemConstants {
uint32_t flags;
xenos::Endian vertex_index_endian;
int32_t vertex_base_index;
uint32_t padding_vertex_base_index;
float ndc_scale[3];
uint32_t padding_ndc_scale;
float ndc_offset[3];
uint32_t padding_ndc_offset;
};
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
@ -329,8 +350,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id const_float2_0_1_;
enum SystemConstantIndex : unsigned int {
kSystemConstantFlags,
kSystemConstantIndexVertexIndexEndian,
kSystemConstantIndexVertexBaseIndex,
kSystemConstantNdcScale,
kSystemConstantNdcOffset,
};
spv::Id uniform_system_constants_;
spv::Id uniform_float_constants_;

View File

@ -686,14 +686,45 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
current_graphics_pipeline_layout_ = pipeline_layout;
}
const RegisterFile& regs = *register_file_;
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const VkPhysicalDeviceProperties& device_properties =
provider.device_properties();
// Get dynamic rasterizer state.
draw_util::ViewportInfo viewport_info;
// Just handling maxViewportDimensions is enough - viewportBoundsRange[1] must
// be at least 2 * max(maxViewportDimensions[0...1]) - 1, and
// maxViewportDimensions must be greater than or equal to the size of the
// largest possible framebuffer attachment (if the viewport has positive
// offset and is between maxViewportDimensions and viewportBoundsRange[1],
// GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the
// clamped range will be outside the largest possible framebuffer anyway.
// TODO(Triang3l): Possibly handle maxViewportDimensions and
// viewportBoundsRange separately because when using fragment shader
// interlocks, framebuffers are not used, while the range may be wider than
// dimensions? Though viewport bigger than 4096 - the smallest possible
// maximum dimension (which is below the 8192 texture size limit on the Xbox
// 360) - and with offset, is probably a situation that never happens in real
// life. Or even disregard the viewport bounds range in the fragment shader
// interlocks case completely - apply the viewport and the scissor offset
// directly to pixel address and to things like ps_param_gen.
draw_util::GetHostViewportInfo(
regs, 1.0f, 1.0f, false,
float(device_properties.limits.maxViewportDimensions[0]),
float(device_properties.limits.maxViewportDimensions[1]), true,
viewport_info);
// Update fixed-function dynamic state.
UpdateFixedFunctionState();
UpdateFixedFunctionState(viewport_info);
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
// Update system constants before uploading them.
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
: xenos::Endian::kNone);
UpdateSystemConstantValues(
indexed ? index_buffer_info->endianness : xenos::Endian::kNone,
viewport_info);
// Update uniform buffers and descriptor sets after binding the pipeline with
// the new layout.
@ -701,8 +732,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
return false;
}
const RegisterFile& regs = *register_file_;
// Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity is tracked.
@ -1229,7 +1258,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags()
return stages;
}
void VulkanCommandProcessor::UpdateFixedFunctionState() {
void VulkanCommandProcessor::UpdateFixedFunctionState(
const draw_util::ViewportInfo& viewport_info) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
@ -1245,53 +1275,13 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() {
uint32_t pixel_size_x = 1, pixel_size_y = 1;
// Viewport.
// PA_CL_VTE_CNTL contains whether offsets and scales are enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// In games, either all are enabled (for regular drawing) or none are (for
// rectangle lists usually).
//
// If scale/offset is enabled, the Xenos shader is writing (neglecting W
// division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1)
// box. If it's not, the position is in screen space. Since we can only use
// the NDC in PC APIs, we use a viewport of the largest possible size, and
// divide the position by it in translated shaders.
//
// TODO(Triang3l): Move all of this to draw_util.
// TODO(Triang3l): Limit the viewport if exceeding the device limit; move to
// NDC scale/offset constants.
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_scale_x =
pa_cl_vte_cntl.vport_x_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
: 4096.0f;
float viewport_scale_y =
pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
: 4096.0f;
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
: 1.0f;
float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
: std::abs(viewport_scale_x);
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: std::abs(viewport_scale_y);
float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
: 0.0f;
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
}
VkViewport viewport;
viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x);
viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y);
viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x);
viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y);
viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f);
viewport.maxDepth =
std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f);
viewport.x = viewport_info.left;
viewport.y = viewport_info.top;
viewport.width = viewport_info.width;
viewport.height = viewport_info.height;
viewport.minDepth = viewport_info.z_min;
viewport.maxDepth = viewport_info.z_max;
ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x;
ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y;
ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width;
@ -1326,16 +1316,39 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() {
}
void VulkanCommandProcessor::UpdateSystemConstantValues(
xenos::Endian index_endian) {
xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
bool dirty = false;
// Flags.
uint32_t flags = 0;
// W0 division control.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
// = false: multiply the X, Y coordinates by 1/W0.
// 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
// = false: multiply the Z coordinate by 1/W0.
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
// to get 1/W0.
if (pa_cl_vte_cntl.vtx_xy_fmt) {
flags |= SpirvShaderTranslator::kSysFlag_XYDividedByW;
}
if (pa_cl_vte_cntl.vtx_z_fmt) {
flags |= SpirvShaderTranslator::kSysFlag_ZDividedByW;
}
if (pa_cl_vte_cntl.vtx_w0_fmt) {
flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal;
}
dirty |= system_constants_.flags != flags;
system_constants_.flags = flags;
// Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian;
system_constants_.vertex_index_endian = index_endian;
@ -1344,6 +1357,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// Conversion to host normalized device coordinates.
for (uint32_t i = 0; i < 3; ++i) {
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
}
if (dirty) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants);

View File

@ -19,6 +19,7 @@
#include <vector>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/draw_util.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
@ -170,8 +171,9 @@ class VulkanCommandProcessor : public CommandProcessor {
VkShaderStageFlags GetGuestVertexShaderStageFlags() const;
void UpdateFixedFunctionState();
void UpdateSystemConstantValues(xenos::Endian index_endian);
void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info);
void UpdateSystemConstantValues(xenos::Endian index_endian,
const draw_util::ViewportInfo& viewport_info);
bool UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader);
// Allocates a descriptor, space in the uniform buffer pool, and fills the