Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental
This commit is contained in:
commit
0e3403d6da
|
@ -2272,7 +2272,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
UpdateSystemConstantValues(
|
||||
memexport_used, primitive_polygonal,
|
||||
primitive_processing_result.line_loop_closing_index,
|
||||
primitive_processing_result.host_index_endian, viewport_info,
|
||||
primitive_processing_result.host_shader_index_endian, viewport_info,
|
||||
used_texture_mask, normalized_depth_control, normalized_color_mask);
|
||||
|
||||
// Update constant buffers, descriptors and root parameters.
|
||||
|
@ -2517,7 +2517,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
ID3D12Resource* scratch_index_buffer = nullptr;
|
||||
switch (primitive_processing_result.index_buffer_type) {
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kGuest: {
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kGuestDMA: {
|
||||
if (memexport_used) {
|
||||
// If the shared memory is a UAV, it can't be used as an index buffer
|
||||
// (UAV is a read/write state, index buffer is a read-only state).
|
||||
|
@ -2549,7 +2549,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
primitive_processor_->GetConvertedIndexBufferGpuAddress(
|
||||
primitive_processing_result.host_index_buffer_handle);
|
||||
break;
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin:
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForAuto:
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForDMA:
|
||||
index_buffer_view.BufferLocation =
|
||||
primitive_processor_->GetBuiltinIndexBufferGpuAddress(
|
||||
primitive_processing_result.host_index_buffer_handle);
|
||||
|
@ -3167,8 +3168,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
const RegisterFile& regs = *register_file_;
|
||||
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
|
@ -3372,43 +3371,47 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
}
|
||||
|
||||
// Point size.
|
||||
float point_vertex_diameter_min =
|
||||
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
|
||||
float point_vertex_diameter_max =
|
||||
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
|
||||
float point_constant_diameter_x =
|
||||
float(pa_su_point_size.width) * (2.0f / 16.0f);
|
||||
float point_constant_diameter_y =
|
||||
float(pa_su_point_size.height) * (2.0f / 16.0f);
|
||||
dirty |=
|
||||
system_constants_.point_vertex_diameter_min != point_vertex_diameter_min;
|
||||
dirty |=
|
||||
system_constants_.point_vertex_diameter_max != point_vertex_diameter_max;
|
||||
dirty |=
|
||||
system_constants_.point_constant_diameter[0] != point_constant_diameter_x;
|
||||
dirty |=
|
||||
system_constants_.point_constant_diameter[1] != point_constant_diameter_y;
|
||||
system_constants_.point_vertex_diameter_min = point_vertex_diameter_min;
|
||||
system_constants_.point_vertex_diameter_max = point_vertex_diameter_max;
|
||||
system_constants_.point_constant_diameter[0] = point_constant_diameter_x;
|
||||
system_constants_.point_constant_diameter[1] = point_constant_diameter_y;
|
||||
// 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter to
|
||||
// radius conversion to avoid multiplying the per-vertex diameter by an
|
||||
// additional constant in the shader.
|
||||
float point_screen_diameter_to_ndc_radius_x =
|
||||
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) /
|
||||
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
||||
float point_screen_diameter_to_ndc_radius_y =
|
||||
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) /
|
||||
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] !=
|
||||
point_screen_diameter_to_ndc_radius_y;
|
||||
system_constants_.point_screen_diameter_to_ndc_radius[0] =
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
system_constants_.point_screen_diameter_to_ndc_radius[1] =
|
||||
point_screen_diameter_to_ndc_radius_y;
|
||||
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
|
||||
float point_vertex_diameter_min =
|
||||
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
|
||||
float point_vertex_diameter_max =
|
||||
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
|
||||
float point_constant_diameter_x =
|
||||
float(pa_su_point_size.width) * (2.0f / 16.0f);
|
||||
float point_constant_diameter_y =
|
||||
float(pa_su_point_size.height) * (2.0f / 16.0f);
|
||||
dirty |= system_constants_.point_vertex_diameter_min !=
|
||||
point_vertex_diameter_min;
|
||||
dirty |= system_constants_.point_vertex_diameter_max !=
|
||||
point_vertex_diameter_max;
|
||||
dirty |= system_constants_.point_constant_diameter[0] !=
|
||||
point_constant_diameter_x;
|
||||
dirty |= system_constants_.point_constant_diameter[1] !=
|
||||
point_constant_diameter_y;
|
||||
system_constants_.point_vertex_diameter_min = point_vertex_diameter_min;
|
||||
system_constants_.point_vertex_diameter_max = point_vertex_diameter_max;
|
||||
system_constants_.point_constant_diameter[0] = point_constant_diameter_x;
|
||||
system_constants_.point_constant_diameter[1] = point_constant_diameter_y;
|
||||
// 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter
|
||||
// to radius conversion to avoid multiplying the per-vertex diameter by an
|
||||
// additional constant in the shader.
|
||||
float point_screen_diameter_to_ndc_radius_x =
|
||||
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) /
|
||||
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
||||
float point_screen_diameter_to_ndc_radius_y =
|
||||
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) /
|
||||
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] !=
|
||||
point_screen_diameter_to_ndc_radius_y;
|
||||
system_constants_.point_screen_diameter_to_ndc_radius[0] =
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
system_constants_.point_screen_diameter_to_ndc_radius[1] =
|
||||
point_screen_diameter_to_ndc_radius_y;
|
||||
}
|
||||
|
||||
// Texture signedness / gamma.
|
||||
bool gamma_render_target_as_srgb =
|
||||
|
|
|
@ -28,7 +28,7 @@ namespace d3d12 {
|
|||
D3D12PrimitiveProcessor::~D3D12PrimitiveProcessor() { Shutdown(true); }
|
||||
|
||||
bool D3D12PrimitiveProcessor::Initialize() {
|
||||
if (!InitializeCommon(true, false, false, true)) {
|
||||
if (!InitializeCommon(true, false, false, true, true, true)) {
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
|
@ -83,9 +83,9 @@ void D3D12PrimitiveProcessor::EndFrame() {
|
|||
frame_index_buffers_.clear();
|
||||
}
|
||||
|
||||
bool D3D12PrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count, std::function<void(uint16_t*)> fill_callback) {
|
||||
assert_not_zero(index_count);
|
||||
bool D3D12PrimitiveProcessor::InitializeBuiltinIndexBuffer(
|
||||
size_t size_bytes, std::function<void(void*)> fill_callback) {
|
||||
assert_not_zero(size_bytes);
|
||||
assert_null(builtin_index_buffer_);
|
||||
assert_null(builtin_index_buffer_upload_);
|
||||
|
||||
|
@ -94,9 +94,8 @@ bool D3D12PrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
ID3D12Device* device = provider.GetDevice();
|
||||
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
ui::d3d12::util::FillBufferResourceDesc(
|
||||
resource_desc, UINT64(sizeof(uint16_t) * index_count),
|
||||
D3D12_RESOURCE_FLAG_NONE);
|
||||
ui::d3d12::util::FillBufferResourceDesc(resource_desc, UINT64(size_bytes),
|
||||
D3D12_RESOURCE_FLAG_NONE);
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> draw_resource;
|
||||
if (FAILED(device->CreateCommittedResource(
|
||||
&ui::d3d12::util::kHeapPropertiesDefault,
|
||||
|
@ -105,8 +104,8 @@ bool D3D12PrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
IID_PPV_ARGS(&draw_resource)))) {
|
||||
XELOGE(
|
||||
"D3D12 primitive processor: Failed to create the built-in index "
|
||||
"buffer GPU resource with {} 16-bit indices",
|
||||
index_count);
|
||||
"buffer GPU resource with {} bytes",
|
||||
size_bytes);
|
||||
return false;
|
||||
}
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> upload_resource;
|
||||
|
@ -117,8 +116,8 @@ bool D3D12PrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
IID_PPV_ARGS(&upload_resource)))) {
|
||||
XELOGE(
|
||||
"D3D12 primitive processor: Failed to create the built-in index "
|
||||
"buffer upload resource with {} 16-bit indices",
|
||||
index_count);
|
||||
"buffer upload resource with {} bytes",
|
||||
size_bytes);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -127,8 +126,8 @@ bool D3D12PrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
if (FAILED(upload_resource->Map(0, &upload_read_range, &mapping))) {
|
||||
XELOGE(
|
||||
"D3D12 primitive processor: Failed to map the built-in index buffer "
|
||||
"upload resource with {} 16-bit indices",
|
||||
index_count);
|
||||
"upload resource with {} bytes",
|
||||
size_bytes);
|
||||
return false;
|
||||
}
|
||||
fill_callback(reinterpret_cast<uint16_t*>(mapping));
|
||||
|
|
|
@ -56,9 +56,8 @@ class D3D12PrimitiveProcessor final : public PrimitiveProcessor {
|
|||
}
|
||||
|
||||
protected:
|
||||
bool InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count,
|
||||
std::function<void(uint16_t*)> fill_callback) override;
|
||||
bool InitializeBuiltinIndexBuffer(
|
||||
size_t size_bytes, std::function<void(void*)> fill_callback) override;
|
||||
|
||||
void* RequestHostConvertedIndexBufferForCurrentFrame(
|
||||
xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd,
|
||||
|
|
|
@ -964,8 +964,6 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
|||
|
||||
// Check if the shader returns XY/W rather than XY, and if it does, revert
|
||||
// that.
|
||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||
// affine interpolation.
|
||||
a_.OpAnd(temp_x_dest, flags_src, dxbc::Src::LU(kSysFlag_XYDividedByW));
|
||||
a_.OpIf(true, temp_x_src);
|
||||
a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0011),
|
||||
|
@ -974,8 +972,6 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
|||
a_.OpEndIf();
|
||||
|
||||
// Check if the shader returns Z/W rather than Z, and if it does, revert that.
|
||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||
// affine interpolation.
|
||||
a_.OpAnd(temp_x_dest, flags_src, dxbc::Src::LU(kSysFlag_ZDividedByW));
|
||||
a_.OpIf(true, temp_x_src);
|
||||
a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0100),
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "xenia/gpu/primitive_processor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
@ -106,7 +107,9 @@ PrimitiveProcessor::~PrimitiveProcessor() { ShutdownCommon(); }
|
|||
|
||||
bool PrimitiveProcessor::InitializeCommon(
|
||||
bool full_32bit_vertex_indices_supported, bool triangle_fans_supported,
|
||||
bool line_loops_supported, bool quad_lists_supported) {
|
||||
bool line_loops_supported, bool quad_lists_supported,
|
||||
bool point_sprites_supported_without_vs_expansion,
|
||||
bool rectangle_lists_supported_without_vs_expansion) {
|
||||
full_32bit_vertex_indices_used_ = full_32bit_vertex_indices_supported;
|
||||
convert_triangle_fans_to_lists_ =
|
||||
!triangle_fans_supported || cvars::force_convert_triangle_fans_to_lists;
|
||||
|
@ -115,33 +118,94 @@ bool PrimitiveProcessor::InitializeCommon(
|
|||
convert_quad_lists_to_triangle_lists_ =
|
||||
!quad_lists_supported ||
|
||||
cvars::force_convert_quad_lists_to_triangle_lists;
|
||||
// No override cvars as hosts are not required to support the fallback paths
|
||||
// since they require different vertex shader structure (for the fallback
|
||||
// HostVertexShaderTypes).
|
||||
expand_point_sprites_in_vs_ = !point_sprites_supported_without_vs_expansion;
|
||||
expand_rectangle_lists_in_vs_ =
|
||||
!rectangle_lists_supported_without_vs_expansion;
|
||||
|
||||
// Initialize the index buffer for conversion of auto-indexed primitive types.
|
||||
uint32_t builtin_index_count = 0;
|
||||
size_t builtin_index_buffer_size = 0;
|
||||
// 32-bit, before 16-bit due to alignment (for primitive expansion - when the
|
||||
// indices encode not only the guest vertex index, but also a part needed for
|
||||
// host expansion, thus may contain values above UINT16_MAX, such as up to
|
||||
// (UINT16_MAX - 1) * 4 + 3 for point sprites).
|
||||
// Using an index buffer for point sprite and rectangle list expansion instead
|
||||
// of instancing as how instancing is implemented may vary wildly between
|
||||
// GPUs, potentially slowly (like no different instances in the same
|
||||
// wavefront) with small vertex counts per instance. Also using triangle
|
||||
// strips with primitive restart, not triangle lists, so the vertex shader may
|
||||
// be invoked once for the inner edge vertices, which is important for memory
|
||||
// export in guest shaders, not to write to the same location from two
|
||||
// invocations.
|
||||
uint32_t builtin_ib_two_triangle_strip_count = 0;
|
||||
if (expand_point_sprites_in_vs_) {
|
||||
builtin_ib_two_triangle_strip_count =
|
||||
std::max(uint32_t(UINT16_MAX), builtin_ib_two_triangle_strip_count);
|
||||
}
|
||||
if (expand_rectangle_lists_in_vs_) {
|
||||
builtin_ib_two_triangle_strip_count =
|
||||
std::max(uint32_t(UINT16_MAX / 3), builtin_ib_two_triangle_strip_count);
|
||||
}
|
||||
if (builtin_ib_two_triangle_strip_count) {
|
||||
builtin_ib_offset_two_triangle_strips_ = builtin_index_buffer_size;
|
||||
builtin_index_buffer_size +=
|
||||
sizeof(uint32_t) *
|
||||
GetTwoTriangleStripIndexCount(builtin_ib_two_triangle_strip_count);
|
||||
} else {
|
||||
builtin_ib_offset_two_triangle_strips_ = SIZE_MAX;
|
||||
}
|
||||
// 16-bit (for indirection on top of single auto-indexed vertices) - enough
|
||||
// even if the backend has primitive reset enabled all the time (Metal) as
|
||||
// auto-indexed draws are limited to UINT16_MAX vertices, not UINT16_MAX + 1.
|
||||
if (convert_triangle_fans_to_lists_) {
|
||||
builtin_ib_offset_triangle_fans_to_lists_ =
|
||||
sizeof(uint16_t) * builtin_index_count;
|
||||
builtin_index_count += GetTriangleFanListIndexCount(UINT16_MAX);
|
||||
builtin_ib_offset_triangle_fans_to_lists_ = builtin_index_buffer_size;
|
||||
builtin_index_buffer_size +=
|
||||
sizeof(uint16_t) * GetTriangleFanListIndexCount(UINT16_MAX);
|
||||
} else {
|
||||
builtin_ib_offset_triangle_fans_to_lists_ = SIZE_MAX;
|
||||
}
|
||||
if (convert_quad_lists_to_triangle_lists_) {
|
||||
builtin_ib_offset_quad_lists_to_triangle_lists_ =
|
||||
sizeof(uint16_t) * builtin_index_count;
|
||||
builtin_index_count += GetQuadListTriangleListIndexCount(UINT16_MAX);
|
||||
builtin_ib_offset_quad_lists_to_triangle_lists_ = builtin_index_buffer_size;
|
||||
builtin_index_buffer_size +=
|
||||
sizeof(uint16_t) * GetQuadListTriangleListIndexCount(UINT16_MAX);
|
||||
} else {
|
||||
builtin_ib_offset_quad_lists_to_triangle_lists_ = SIZE_MAX;
|
||||
}
|
||||
if (builtin_index_count) {
|
||||
if (!InitializeBuiltin16BitIndexBuffer(
|
||||
builtin_index_count, [this](uint16_t* mapping) {
|
||||
if (builtin_index_buffer_size) {
|
||||
if (!InitializeBuiltinIndexBuffer(
|
||||
builtin_index_buffer_size,
|
||||
[this, builtin_ib_two_triangle_strip_count](void* mapping) {
|
||||
uint32_t* mapping_32bit = reinterpret_cast<uint32_t*>(mapping);
|
||||
if (builtin_ib_offset_two_triangle_strips_ != SIZE_MAX) {
|
||||
// Two-triangle strips.
|
||||
uint32_t* two_triangle_strip_ptr =
|
||||
mapping_32bit +
|
||||
builtin_ib_offset_two_triangle_strips_ / sizeof(uint32_t);
|
||||
for (uint32_t i = 0; i < builtin_ib_two_triangle_strip_count;
|
||||
++i) {
|
||||
if (i) {
|
||||
// Primitive restart.
|
||||
*(two_triangle_strip_ptr++) = UINT32_MAX;
|
||||
}
|
||||
// Host vertex index within the pair in the lower 2 bits,
|
||||
// guest primitive index in the rest.
|
||||
uint32_t two_triangle_strip_first_index = i << 2;
|
||||
for (uint32_t j = 0; j < 4; ++j) {
|
||||
*(two_triangle_strip_ptr++) =
|
||||
two_triangle_strip_first_index + j;
|
||||
}
|
||||
}
|
||||
}
|
||||
uint16_t* mapping_16bit = reinterpret_cast<uint16_t*>(mapping);
|
||||
if (builtin_ib_offset_triangle_fans_to_lists_ != SIZE_MAX) {
|
||||
// Triangle fans as triangle lists.
|
||||
// Ordered as (v1, v2, v0), (v2, v3, v0) in Direct3D.
|
||||
// https://docs.microsoft.com/en-us/windows/desktop/direct3d9/triangle-fans
|
||||
uint16_t* triangle_list_ptr =
|
||||
mapping + builtin_ib_offset_triangle_fans_to_lists_ /
|
||||
sizeof(uint16_t);
|
||||
mapping_16bit + builtin_ib_offset_triangle_fans_to_lists_ /
|
||||
sizeof(uint16_t);
|
||||
for (uint32_t i = 2; i < UINT16_MAX; ++i) {
|
||||
*(triangle_list_ptr++) = uint16_t(i - 1);
|
||||
*(triangle_list_ptr++) = uint16_t(i);
|
||||
|
@ -150,8 +214,9 @@ bool PrimitiveProcessor::InitializeCommon(
|
|||
}
|
||||
if (builtin_ib_offset_quad_lists_to_triangle_lists_ != SIZE_MAX) {
|
||||
uint16_t* triangle_list_ptr =
|
||||
mapping + builtin_ib_offset_quad_lists_to_triangle_lists_ /
|
||||
sizeof(uint16_t);
|
||||
mapping_16bit +
|
||||
builtin_ib_offset_quad_lists_to_triangle_lists_ /
|
||||
sizeof(uint16_t);
|
||||
// TODO(Triang3l): SIMD for faster initialization?
|
||||
for (uint32_t i = 0; i < UINT16_MAX / 4; ++i) {
|
||||
uint16_t quad_first_index = uint16_t(i * 4);
|
||||
|
@ -309,15 +374,27 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
return false;
|
||||
}
|
||||
} else {
|
||||
host_vertex_shader_type = Shader::HostVertexShaderType::kVertex;
|
||||
switch (guest_primitive_type) {
|
||||
case xenos::PrimitiveType::kPointList:
|
||||
if (expand_point_sprites_in_vs_) {
|
||||
host_primitive_type = xenos::PrimitiveType::kTriangleStrip;
|
||||
host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip;
|
||||
}
|
||||
break;
|
||||
case xenos::PrimitiveType::kLineList:
|
||||
case xenos::PrimitiveType::kLineStrip:
|
||||
case xenos::PrimitiveType::kTriangleList:
|
||||
case xenos::PrimitiveType::kTriangleStrip:
|
||||
// Supported natively on all backends.
|
||||
break;
|
||||
case xenos::PrimitiveType::kRectangleList:
|
||||
// Supported natively or through geometry or compute shaders on all
|
||||
// backends.
|
||||
if (expand_rectangle_lists_in_vs_) {
|
||||
host_primitive_type = xenos::PrimitiveType::kTriangleStrip;
|
||||
host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kRectangleListAsTriangleStrip;
|
||||
}
|
||||
break;
|
||||
case xenos::PrimitiveType::kTriangleFan:
|
||||
if (convert_triangle_fans_to_lists_) {
|
||||
|
@ -342,7 +419,6 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
assert_always();
|
||||
return false;
|
||||
}
|
||||
host_vertex_shader_type = Shader::HostVertexShaderType::kVertex;
|
||||
}
|
||||
|
||||
// Process the indices.
|
||||
|
@ -359,12 +435,86 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
guest_draw_vertex_count = vgt_dma_size.num_words;
|
||||
}
|
||||
uint32_t line_loop_closing_index = 0;
|
||||
uint32_t guest_index_base;
|
||||
uint32_t guest_index_base = 0, guest_index_buffer_needed_bytes = 0;
|
||||
CachedResult cacheable;
|
||||
cacheable.host_draw_vertex_count = guest_draw_vertex_count;
|
||||
cacheable.host_primitive_reset_enabled = false;
|
||||
cacheable.host_index_buffer_handle = SIZE_MAX;
|
||||
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kAutoIndex) {
|
||||
if (host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip ||
|
||||
host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kRectangleListAsTriangleStrip) {
|
||||
// As two-triangle strips, with guest indices being either autogenerated or
|
||||
// fetched via DMA.
|
||||
uint32_t primitive_count = guest_draw_vertex_count;
|
||||
if (host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kRectangleListAsTriangleStrip) {
|
||||
primitive_count /= 3;
|
||||
}
|
||||
cacheable.host_draw_vertex_count =
|
||||
GetTwoTriangleStripIndexCount(primitive_count);
|
||||
cacheable.host_index_format = xenos::IndexFormat::kInt32;
|
||||
cacheable.host_primitive_reset_enabled = true;
|
||||
assert_true(builtin_ib_offset_two_triangle_strips_ != SIZE_MAX);
|
||||
cacheable.host_index_buffer_handle = builtin_ib_offset_two_triangle_strips_;
|
||||
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kAutoIndex) {
|
||||
cacheable.index_buffer_type =
|
||||
ProcessedIndexBufferType::kHostBuiltinForAuto;
|
||||
cacheable.host_shader_index_endian = xenos::Endian::kNone;
|
||||
} else {
|
||||
// There is an index buffer.
|
||||
assert_true(vgt_draw_initiator.source_select ==
|
||||
xenos::SourceSelect::kDMA);
|
||||
if (vgt_draw_initiator.source_select != xenos::SourceSelect::kDMA) {
|
||||
// TODO(Triang3l): Support immediate-indexed vertices.
|
||||
XELOGE(
|
||||
"Primitive processor: Unsupported vertex index source {}. Report "
|
||||
"the game to Xenia developers!",
|
||||
uint32_t(vgt_draw_initiator.source_select));
|
||||
return false;
|
||||
}
|
||||
xenos::IndexFormat guest_index_format = vgt_draw_initiator.index_size;
|
||||
// Normalize the endian.
|
||||
cacheable.index_buffer_type =
|
||||
ProcessedIndexBufferType::kHostBuiltinForDMA;
|
||||
xenos::Endian guest_index_endian = vgt_dma_size.swap_mode;
|
||||
if (guest_index_format == xenos::IndexFormat::kInt16 &&
|
||||
(guest_index_endian != xenos::Endian::kNone &&
|
||||
guest_index_endian != xenos::Endian::k8in16)) {
|
||||
XELOGW(
|
||||
"Primitive processor: 32-bit endian swap mode {} is used for "
|
||||
"16-bit indices. This shouldn't normally be happening, but report "
|
||||
"the game to Xenia developers for investigation of the intended "
|
||||
"behavior (ignore or actually swap across adjacent indices)! "
|
||||
"Currently disabling the swap for 16-and-32 and replacing 8-in-32 "
|
||||
"with 8-in-16.",
|
||||
uint32_t(guest_index_endian));
|
||||
guest_index_endian = guest_index_endian == xenos::Endian::k8in32
|
||||
? xenos::Endian::k8in16
|
||||
: xenos::Endian::kNone;
|
||||
}
|
||||
cacheable.host_shader_index_endian = guest_index_endian;
|
||||
// Get the index buffer memory range.
|
||||
uint32_t index_size_log2 =
|
||||
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
|
||||
// The base should already be aligned, but aligning here too for safety.
|
||||
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
|
||||
~uint32_t((1 << index_size_log2) - 1);
|
||||
guest_index_buffer_needed_bytes = guest_draw_vertex_count
|
||||
<< index_size_log2;
|
||||
if (guest_index_base > SharedMemory::kBufferSize ||
|
||||
SharedMemory::kBufferSize - guest_index_base <
|
||||
guest_index_buffer_needed_bytes) {
|
||||
XELOGE(
|
||||
"Primitive processor: Index buffer at 0x{:08X}, 0x{:X} bytes "
|
||||
"required, is out of the physical memory bounds",
|
||||
guest_index_base, guest_index_buffer_needed_bytes);
|
||||
assert_always();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (vgt_draw_initiator.source_select ==
|
||||
xenos::SourceSelect::kAutoIndex) {
|
||||
// Auto-indexed - use a remapping index buffer if needed to change the
|
||||
// primitive type.
|
||||
if (tessellation_enabled &&
|
||||
|
@ -376,9 +526,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
assert_always();
|
||||
return false;
|
||||
}
|
||||
guest_index_base = 0;
|
||||
cacheable.host_index_format = xenos::IndexFormat::kInt16;
|
||||
cacheable.host_index_endian = xenos::Endian::kNone;
|
||||
cacheable.host_shader_index_endian = xenos::Endian::kNone;
|
||||
cacheable.host_primitive_reset_enabled = false;
|
||||
cacheable.index_buffer_type = ProcessedIndexBufferType::kNone;
|
||||
if (host_primitive_type != guest_primitive_type) {
|
||||
|
@ -388,7 +537,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
xenos::PrimitiveType::kTriangleList);
|
||||
cacheable.host_draw_vertex_count =
|
||||
GetTriangleFanListIndexCount(cacheable.host_draw_vertex_count);
|
||||
cacheable.index_buffer_type = ProcessedIndexBufferType::kHostBuiltin;
|
||||
cacheable.index_buffer_type =
|
||||
ProcessedIndexBufferType::kHostBuiltinForAuto;
|
||||
assert_true(builtin_ib_offset_triangle_fans_to_lists_ != SIZE_MAX);
|
||||
cacheable.host_index_buffer_handle =
|
||||
builtin_ib_offset_triangle_fans_to_lists_;
|
||||
|
@ -409,7 +559,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
xenos::PrimitiveType::kTriangleList);
|
||||
cacheable.host_draw_vertex_count = GetQuadListTriangleListIndexCount(
|
||||
cacheable.host_draw_vertex_count);
|
||||
cacheable.index_buffer_type = ProcessedIndexBufferType::kHostBuiltin;
|
||||
cacheable.index_buffer_type =
|
||||
ProcessedIndexBufferType::kHostBuiltinForAuto;
|
||||
assert_true(builtin_ib_offset_quad_lists_to_triangle_lists_ !=
|
||||
SIZE_MAX);
|
||||
cacheable.host_index_buffer_handle =
|
||||
|
@ -503,8 +654,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
// The base should already be aligned, but aligning here too for safety.
|
||||
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
|
||||
~uint32_t((1 << index_size_log2) - 1);
|
||||
uint32_t guest_index_buffer_needed_bytes = guest_draw_vertex_count
|
||||
<< index_size_log2;
|
||||
guest_index_buffer_needed_bytes = guest_draw_vertex_count
|
||||
<< index_size_log2;
|
||||
if (guest_index_base > SharedMemory::kBufferSize ||
|
||||
SharedMemory::kBufferSize - guest_index_base <
|
||||
guest_index_buffer_needed_bytes) {
|
||||
|
@ -517,7 +668,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
}
|
||||
|
||||
cacheable.host_index_format = guest_index_format;
|
||||
cacheable.host_index_endian = guest_index_endian;
|
||||
cacheable.host_shader_index_endian = guest_index_endian;
|
||||
uint32_t guest_index_mask_guest_endian =
|
||||
guest_index_format == xenos::IndexFormat::kInt16
|
||||
? UINT16_MAX
|
||||
|
@ -666,7 +817,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
assert_unhandled_case(guest_index_endian);
|
||||
return false;
|
||||
}
|
||||
cacheable.host_index_endian = xenos::Endian::kNone;
|
||||
cacheable.host_shader_index_endian = xenos::Endian::kNone;
|
||||
}
|
||||
}
|
||||
cache_transaction.SetNewResult(cacheable);
|
||||
|
@ -677,7 +828,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
// endian-swap, or even to safely drop the upper 8 bits if no swap is even
|
||||
// needed) indirectly.
|
||||
cacheable.host_draw_vertex_count = guest_draw_vertex_count;
|
||||
cacheable.index_buffer_type = ProcessedIndexBufferType::kGuest;
|
||||
cacheable.index_buffer_type = ProcessedIndexBufferType::kGuestDMA;
|
||||
cacheable.host_primitive_reset_enabled = guest_primitive_reset_enabled;
|
||||
if (guest_primitive_reset_enabled) {
|
||||
if (guest_index_format == xenos::IndexFormat::kInt16) {
|
||||
|
@ -742,8 +893,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
} else {
|
||||
// Low 24 bits of the guest index are compared to the primitive reset
|
||||
// index. If the backend doesn't support full 32-bit indices, for
|
||||
// ProcessedIndexBufferType::kGuest, the host needs to read the buffer
|
||||
// indirectly in the vertex shaders and swap, and for
|
||||
// ProcessedIndexBufferType::kGuestDMA, the host needs to read the
|
||||
// buffer indirectly in the vertex shaders and swap, and for
|
||||
// ProcessedIndexBufferType::kHostConverted (if primitive reset is
|
||||
// actually used, thus exactly 0xFFFFFFFF must be sent to the host for
|
||||
// it in a true index buffer), no indirection is done, but
|
||||
|
@ -800,26 +951,31 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
assert_unhandled_case(guest_index_endian);
|
||||
return false;
|
||||
}
|
||||
cacheable.host_index_endian = full_32bit_vertex_indices_used_
|
||||
? guest_index_endian
|
||||
: xenos::Endian::kNone;
|
||||
cacheable.host_shader_index_endian =
|
||||
full_32bit_vertex_indices_used_ ? guest_index_endian
|
||||
: xenos::Endian::kNone;
|
||||
}
|
||||
cache_transaction.SetNewResult(cacheable);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cacheable.index_buffer_type == ProcessedIndexBufferType::kGuest) {
|
||||
// Request the index buffer memory.
|
||||
// TODO(Triang3l): Shared memory request cache.
|
||||
if (!shared_memory_.RequestRange(guest_index_base,
|
||||
guest_index_buffer_needed_bytes)) {
|
||||
XELOGE(
|
||||
"PrimitiveProcessor: Failed to request index buffer 0x{:08X}, "
|
||||
"0x{:X} bytes needed, in the shared memory",
|
||||
guest_index_base, guest_index_buffer_needed_bytes);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Request the indices in the shared memory if they need to be accessed from
|
||||
// there on the GPU.
|
||||
if (cacheable.index_buffer_type == ProcessedIndexBufferType::kGuestDMA ||
|
||||
cacheable.index_buffer_type ==
|
||||
ProcessedIndexBufferType::kHostBuiltinForDMA) {
|
||||
// Request the index buffer memory.
|
||||
// TODO(Triang3l): Shared memory request cache.
|
||||
if (!shared_memory_.RequestRange(guest_index_base,
|
||||
guest_index_buffer_needed_bytes)) {
|
||||
XELOGE(
|
||||
"PrimitiveProcessor: Failed to request index buffer 0x{:08X}, 0x{:X} "
|
||||
"bytes needed, in the shared memory",
|
||||
guest_index_base, guest_index_buffer_needed_bytes);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -832,7 +988,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
result_out.index_buffer_type = cacheable.index_buffer_type;
|
||||
result_out.guest_index_base = guest_index_base;
|
||||
result_out.host_index_format = cacheable.host_index_format;
|
||||
result_out.host_index_endian = cacheable.host_index_endian;
|
||||
result_out.host_shader_index_endian = cacheable.host_shader_index_endian;
|
||||
result_out.host_primitive_reset_enabled =
|
||||
cacheable.host_primitive_reset_enabled;
|
||||
result_out.host_index_buffer_handle = cacheable.host_index_buffer_handle;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#ifndef XENIA_GPU_PRIMITIVE_PROCESSOR_H_
|
||||
#define XENIA_GPU_PRIMITIVE_PROCESSOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
@ -110,13 +111,16 @@ class PrimitiveProcessor {
|
|||
// For 32-bit, indirection is needed if the host only supports 24-bit
|
||||
// indices (even for non-endian-swapped, as the GPU should be ignoring the
|
||||
// upper 8 bits completely, rather than exhibiting undefined behavior.
|
||||
kGuest,
|
||||
kGuestDMA,
|
||||
// Converted and stored in the primitive converter for the current draw
|
||||
// command. For 32-bit indices, if the host doesn't support all 32 bits,
|
||||
// this kind of an index buffer will always be pre-masked and pre-swapped.
|
||||
kHostConverted,
|
||||
// Auto-indexed on the guest, but with an adapter index buffer on the host.
|
||||
kHostBuiltin,
|
||||
kHostBuiltinForAuto,
|
||||
// Adapter index buffer on the host for indirect loading of indices via DMA
|
||||
// (from the shared memory).
|
||||
kHostBuiltinForDMA,
|
||||
};
|
||||
|
||||
struct ProcessingResult {
|
||||
|
@ -136,13 +140,14 @@ class PrimitiveProcessor {
|
|||
ProcessedIndexBufferType index_buffer_type;
|
||||
uint32_t guest_index_base;
|
||||
xenos::IndexFormat host_index_format;
|
||||
xenos::Endian host_index_endian;
|
||||
xenos::Endian host_shader_index_endian;
|
||||
// The reset index, if enabled, is always 0xFFFF for host_index_format
|
||||
// kInt16 and 0xFFFFFFFF for kInt32. Never enabled for "list" primitive
|
||||
// types, thus safe for direct usage on Vulkan.
|
||||
bool host_primitive_reset_enabled;
|
||||
// Backend-specific handle for the index buffer valid for the current draw,
|
||||
// only valid for index_buffer_type kHostConverted and kHostBuiltin.
|
||||
// only valid for index_buffer_type kHostConverted, kHostBuiltinForAuto and
|
||||
// kHostBuiltinForDMA.
|
||||
size_t host_index_buffer_handle;
|
||||
bool IsTessellated() const {
|
||||
return Shader::IsHostVertexShaderTypeDomain(host_vertex_shader_type);
|
||||
|
@ -165,6 +170,12 @@ class PrimitiveProcessor {
|
|||
bool IsConvertingQuadListsToTriangleLists() const {
|
||||
return convert_quad_lists_to_triangle_lists_;
|
||||
}
|
||||
bool IsExpandingPointSpritesInVS() const {
|
||||
return expand_point_sprites_in_vs_;
|
||||
}
|
||||
bool IsExpandingRectangleListsInVS() const {
|
||||
return expand_rectangle_lists_in_vs_;
|
||||
}
|
||||
|
||||
// Submission must be open to call (may request the index buffer in the shared
|
||||
// memory).
|
||||
|
@ -217,8 +228,8 @@ class PrimitiveProcessor {
|
|||
// if indirection may be needed.
|
||||
// - When full 32-bit indices are not supported, the host must be using
|
||||
// auto-indexed draws for 32-bit indices of ProcessedIndexBufferType
|
||||
// kGuest, while fetching the index data manually from the shared memory
|
||||
// buffer and endian-swapping it.
|
||||
// kGuestDMA, while fetching the index data manually from the shared
|
||||
// memory buffer and endian-swapping it.
|
||||
// - Indirection, however, precludes primitive reset usage - so if
|
||||
// primitive reset is needed, the primitive processor will pre-swap and
|
||||
// pre-mask the index buffer so there are only host-endian 0x00###### or
|
||||
|
@ -235,19 +246,26 @@ class PrimitiveProcessor {
|
|||
// those guest primitive types directly or through geometry shader
|
||||
// emulation. Debug overriding will be resolved in the common code if
|
||||
// needed.
|
||||
// - point_sprites_supported_without_vs_expansion,
|
||||
// rectangle_lists_supported_without_vs_expansion:
|
||||
// - Pass true or false depending on whether the host actually supports
|
||||
// those guest primitive types directly or through geometry shader
|
||||
// emulation. Overrides do not apply to these as hosts are not required to
|
||||
// support the fallback paths since they require different vertex shader
|
||||
// structure (for the fallback HostVertexShaderTypes).
|
||||
bool InitializeCommon(bool full_32bit_vertex_indices_supported,
|
||||
bool triangle_fans_supported, bool line_loops_supported,
|
||||
bool quad_lists_supported);
|
||||
bool quad_lists_supported,
|
||||
bool point_sprites_supported_without_vs_expansion,
|
||||
bool rectangle_lists_supported_without_vs_expansion);
|
||||
// If any primitive type conversion is needed for auto-indexed draws, called
|
||||
// from InitializeCommon (thus only once in the primitive processor's
|
||||
// lifetime) to set up the backend's index buffer containing indices for
|
||||
// primitive type remapping. The backend must allocate a `sizeof(uint16_t) *
|
||||
// index_count` buffer and call fill_callback for its mapping if creation is
|
||||
// successful. 16-bit indices are enough even if the backend has primitive
|
||||
// reset enabled all the time (Metal) as auto-indexed draws are limited to
|
||||
// UINT16_MAX vertices, not UINT16_MAX + 1.
|
||||
virtual bool InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count, std::function<void(uint16_t*)> fill_callback) = 0;
|
||||
// primitive type remapping. The backend must allocate a 4-byte-aligned buffer
|
||||
// with `size_bytes` and call fill_callback for its mapping if creation has
|
||||
// been successful.
|
||||
virtual bool InitializeBuiltinIndexBuffer(
|
||||
size_t size_bytes, std::function<void(void*)> fill_callback) = 0;
|
||||
// Call last in implementation-specific shutdown, also callable from the
|
||||
// destructor.
|
||||
void ShutdownCommon();
|
||||
|
@ -509,6 +527,12 @@ class PrimitiveProcessor {
|
|||
}
|
||||
};
|
||||
|
||||
static constexpr uint32_t GetTwoTriangleStripIndexCount(
|
||||
uint32_t strip_count) {
|
||||
// 4 vertices per strip, and primitive restarts between strips.
|
||||
return 4 * strip_count + (std::max(strip_count, UINT32_C(1)) - 1);
|
||||
}
|
||||
|
||||
// Triangle fan test cases:
|
||||
// - 4D5307E6 - main menu - game logo, developer logo, backgrounds of the menu
|
||||
// item list (the whole menu and individual items) - no index buffer.
|
||||
|
@ -675,8 +699,11 @@ class PrimitiveProcessor {
|
|||
bool convert_triangle_fans_to_lists_ = false;
|
||||
bool convert_line_loops_to_strips_ = false;
|
||||
bool convert_quad_lists_to_triangle_lists_ = false;
|
||||
bool expand_point_sprites_in_vs_ = false;
|
||||
bool expand_rectangle_lists_in_vs_ = false;
|
||||
|
||||
// Byte offsets used, for simplicity, directly as handles.
|
||||
size_t builtin_ib_offset_two_triangle_strips_ = SIZE_MAX;
|
||||
size_t builtin_ib_offset_triangle_fans_to_lists_ = SIZE_MAX;
|
||||
size_t builtin_ib_offset_quad_lists_to_triangle_lists_ = SIZE_MAX;
|
||||
|
||||
|
@ -745,7 +772,7 @@ class PrimitiveProcessor {
|
|||
uint32_t host_draw_vertex_count;
|
||||
ProcessedIndexBufferType index_buffer_type;
|
||||
xenos::IndexFormat host_index_format;
|
||||
xenos::Endian host_index_endian;
|
||||
xenos::Endian host_shader_index_endian;
|
||||
bool host_primitive_reset_enabled;
|
||||
size_t host_index_buffer_handle;
|
||||
};
|
||||
|
|
|
@ -106,16 +106,20 @@ void SpirvShaderTranslator::Reset() {
|
|||
|
||||
uniform_float_constants_ = spv::NoResult;
|
||||
|
||||
input_fragment_coord_ = spv::NoResult;
|
||||
input_point_coordinates_ = spv::NoResult;
|
||||
input_fragment_coordinates_ = spv::NoResult;
|
||||
input_front_facing_ = spv::NoResult;
|
||||
std::fill(input_output_interpolators_.begin(),
|
||||
input_output_interpolators_.end(), spv::NoResult);
|
||||
output_point_coordinates_ = spv::NoResult;
|
||||
output_point_size_ = spv::NoResult;
|
||||
|
||||
sampler_bindings_.clear();
|
||||
texture_bindings_.clear();
|
||||
|
||||
main_interface_.clear();
|
||||
var_main_registers_ = spv::NoResult;
|
||||
var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
|
||||
|
||||
main_switch_op_.reset();
|
||||
main_switch_next_pc_phi_operands_.clear();
|
||||
|
@ -230,7 +234,16 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
{"vertex_base_index", offsetof(SystemConstants, vertex_base_index),
|
||||
type_int_},
|
||||
{"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_},
|
||||
{"point_vertex_diameter_min",
|
||||
offsetof(SystemConstants, point_vertex_diameter_min), type_float_},
|
||||
{"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_},
|
||||
{"point_vertex_diameter_max",
|
||||
offsetof(SystemConstants, point_vertex_diameter_max), type_float_},
|
||||
{"point_constant_diameter",
|
||||
offsetof(SystemConstants, point_constant_diameter), type_float2_},
|
||||
{"point_screen_diameter_to_ndc_radius",
|
||||
offsetof(SystemConstants, point_screen_diameter_to_ndc_radius),
|
||||
type_float2_},
|
||||
{"texture_swizzled_signs",
|
||||
offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2},
|
||||
{"texture_swizzles", offsetof(SystemConstants, texture_swizzles),
|
||||
|
@ -1063,9 +1076,10 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
|
|||
main_interface_.push_back(input_vertex_index_);
|
||||
}
|
||||
|
||||
uint32_t output_location = 0;
|
||||
|
||||
// Create the interpolator outputs.
|
||||
{
|
||||
uint32_t interpolator_location = 0;
|
||||
uint32_t interpolators_remaining = GetModificationInterpolatorMask();
|
||||
uint32_t interpolator_index;
|
||||
while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) {
|
||||
|
@ -1075,10 +1089,41 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
|
|||
fmt::format("xe_out_interpolator_{}", interpolator_index).c_str());
|
||||
input_output_interpolators_[interpolator_index] = interpolator;
|
||||
builder_->addDecoration(interpolator, spv::DecorationLocation,
|
||||
int(interpolator_location));
|
||||
int(output_location));
|
||||
builder_->addDecoration(interpolator, spv::DecorationInvariant);
|
||||
main_interface_.push_back(interpolator);
|
||||
++interpolator_location;
|
||||
++output_location;
|
||||
}
|
||||
}
|
||||
|
||||
Modification shader_modification = GetSpirvShaderModification();
|
||||
|
||||
if (shader_modification.vertex.output_point_parameters) {
|
||||
if (shader_modification.vertex.host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
|
||||
// Create the point coordinates output.
|
||||
output_point_coordinates_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput,
|
||||
type_float2_, "xe_out_point_coordinates");
|
||||
builder_->addDecoration(output_point_coordinates_,
|
||||
spv::DecorationLocation, int(output_location));
|
||||
builder_->addDecoration(output_point_coordinates_,
|
||||
spv::DecorationInvariant);
|
||||
main_interface_.push_back(output_point_coordinates_);
|
||||
++output_location;
|
||||
} else {
|
||||
// Create the point size output. Not using gl_PointSize from gl_PerVertex
|
||||
// not to rely on the shaderTessellationAndGeometryPointSize feature, and
|
||||
// also because the value written to gl_PointSize must be greater than
|
||||
// zero.
|
||||
output_point_size_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput,
|
||||
type_float_, "xe_out_point_size");
|
||||
builder_->addDecoration(output_point_size_, spv::DecorationLocation,
|
||||
int(output_location));
|
||||
builder_->addDecoration(output_point_size_, spv::DecorationInvariant);
|
||||
main_interface_.push_back(output_point_size_);
|
||||
++output_location;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1103,9 +1148,23 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
|
|||
}
|
||||
|
||||
void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
||||
var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassFunction, type_float3_,
|
||||
"xe_var_point_size_edge_flag_kill_vertex");
|
||||
// The edge flag isn't used for any purpose by the translator.
|
||||
if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) {
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(3);
|
||||
// Set the point size to a negative value to tell the point sprite expansion
|
||||
// that it should use the default point size if the vertex shader does not
|
||||
// override it.
|
||||
id_vector_temp_.push_back(builder_->makeFloatConstant(-1.0f));
|
||||
// The edge flag is ignored.
|
||||
id_vector_temp_.push_back(const_float_0_);
|
||||
// Don't kill by default (zero bits 0:30).
|
||||
id_vector_temp_.push_back(const_float_0_);
|
||||
var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassFunction, type_float3_,
|
||||
"xe_var_point_size_edge_flag_kill_vertex",
|
||||
builder_->makeCompositeConstant(type_float3_, id_vector_temp_));
|
||||
}
|
||||
|
||||
// Zero general-purpose registers to prevent crashes when the game
|
||||
// references them after only initializing them conditionally.
|
||||
|
@ -1129,24 +1188,33 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
|||
}
|
||||
}
|
||||
|
||||
Modification shader_modification = GetSpirvShaderModification();
|
||||
|
||||
// TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip,
|
||||
// start the vertex loop, and load the index there.
|
||||
|
||||
// Load the vertex index or the tessellation parameters.
|
||||
if (register_count()) {
|
||||
// TODO(Triang3l): Barycentric coordinates and patch index.
|
||||
if (IsSpirvVertexShader()) {
|
||||
// TODO(Triang3l): Close line loop primitive.
|
||||
// Load the unswapped index as uint for swapping, or for indirect loading
|
||||
// if needed.
|
||||
spv::Id vertex_index = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_uint_,
|
||||
builder_->createLoad(input_vertex_index_, spv::NoPrecision));
|
||||
if (!features_.full_draw_index_uint32) {
|
||||
// Check if the full 32-bit index needs to be loaded indirectly.
|
||||
if (shader_modification.vertex.host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
|
||||
// Load the point index, autogenerated or indirectly from the index
|
||||
// buffer.
|
||||
// Extract the primitive index from the two-triangle strip vertex index.
|
||||
spv::Id const_uint_2 = builder_->makeUintConstant(2);
|
||||
vertex_index = builder_->createBinOp(
|
||||
spv::OpShiftRightLogical, type_uint_, vertex_index, const_uint_2);
|
||||
// Check if the index needs to be loaded from the index buffer.
|
||||
spv::Id load_vertex_index = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_VertexIndexLoad))),
|
||||
builder_->makeUintConstant(static_cast<unsigned int>(
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad))),
|
||||
const_uint_0_);
|
||||
spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint();
|
||||
spv::Block& block_load_vertex_index_start = builder_->makeNewBlock();
|
||||
|
@ -1157,25 +1225,61 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
|||
&block_load_vertex_index_start,
|
||||
&block_load_vertex_index_merge);
|
||||
builder_->setBuildPoint(&block_load_vertex_index_start);
|
||||
// Load the 32-bit index.
|
||||
// TODO(Triang3l): Bounds checking.
|
||||
// Check if the index is 32-bit.
|
||||
spv::Id vertex_index_is_32bit = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(static_cast<unsigned int>(
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))),
|
||||
const_uint_0_);
|
||||
// Calculate the vertex index address in the shared memory.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
|
||||
spv::Id vertex_index_address = builder_->createBinOp(
|
||||
spv::OpIAdd, type_uint_,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_,
|
||||
id_vector_temp_),
|
||||
spv::NoPrecision),
|
||||
builder_->createBinOp(
|
||||
spv::OpShiftLeftLogical, type_uint_, vertex_index,
|
||||
builder_->createTriOp(spv::OpSelect, type_uint_,
|
||||
vertex_index_is_32bit, const_uint_2,
|
||||
builder_->makeUintConstant(1))));
|
||||
// Load the 32 bits containing the whole vertex index or two 16-bit
|
||||
// vertex indices.
|
||||
// TODO(Triang3l): Bounds checking.
|
||||
spv::Id loaded_vertex_index =
|
||||
LoadUint32FromSharedMemory(builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
|
||||
vertex_index_address, const_uint_2)));
|
||||
// Extract the 16-bit index from the loaded 32 bits if needed.
|
||||
loaded_vertex_index = builder_->createTriOp(
|
||||
spv::OpSelect, type_uint_, vertex_index_is_32bit,
|
||||
loaded_vertex_index,
|
||||
builder_->createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index,
|
||||
builder_->createBinOp(
|
||||
spv::OpIAdd, type_uint_,
|
||||
builder_->createBinOp(
|
||||
spv::OpShiftRightLogical, type_uint_,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision),
|
||||
builder_->makeUintConstant(2)),
|
||||
vertex_index)));
|
||||
spv::OpShiftLeftLogical, type_uint_,
|
||||
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
|
||||
vertex_index_address, const_uint_2),
|
||||
builder_->makeUintConstant(4 - 1)),
|
||||
builder_->makeUintConstant(16)));
|
||||
// Endian-swap the loaded index.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
|
||||
loaded_vertex_index = EndianSwap32Uint(
|
||||
loaded_vertex_index,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_,
|
||||
id_vector_temp_),
|
||||
spv::NoPrecision));
|
||||
// Get the actual build point for phi.
|
||||
spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
|
||||
builder_->createBranch(&block_load_vertex_index_merge);
|
||||
|
@ -1195,19 +1299,81 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
|||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(loaded_vertex_index_phi_op));
|
||||
}
|
||||
} else {
|
||||
// TODO(Triang3l): Close line loop primitive.
|
||||
// Load the unswapped index as uint for swapping, or for indirect
|
||||
// loading if needed.
|
||||
if (!features_.full_draw_index_uint32) {
|
||||
// Check if the full 32-bit index needs to be loaded indirectly.
|
||||
spv::Id load_vertex_index = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_VertexIndexLoad))),
|
||||
const_uint_0_);
|
||||
spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint();
|
||||
spv::Block& block_load_vertex_index_start = builder_->makeNewBlock();
|
||||
spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock();
|
||||
SpirvCreateSelectionMerge(block_load_vertex_index_merge.getId(),
|
||||
spv::SelectionControlDontFlattenMask);
|
||||
builder_->createConditionalBranch(load_vertex_index,
|
||||
&block_load_vertex_index_start,
|
||||
&block_load_vertex_index_merge);
|
||||
builder_->setBuildPoint(&block_load_vertex_index_start);
|
||||
// Load the 32-bit index.
|
||||
// TODO(Triang3l): Bounds checking.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
|
||||
spv::Id loaded_vertex_index =
|
||||
LoadUint32FromSharedMemory(builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createBinOp(
|
||||
spv::OpIAdd, type_uint_,
|
||||
builder_->createBinOp(
|
||||
spv::OpShiftRightLogical, type_uint_,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision),
|
||||
builder_->makeUintConstant(2)),
|
||||
vertex_index)));
|
||||
// Get the actual build point for phi.
|
||||
spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
|
||||
builder_->createBranch(&block_load_vertex_index_merge);
|
||||
// Select between the loaded index and the original index from Vulkan.
|
||||
builder_->setBuildPoint(&block_load_vertex_index_merge);
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op =
|
||||
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
|
||||
type_uint_, spv::OpPhi);
|
||||
loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index);
|
||||
loaded_vertex_index_phi_op->addIdOperand(
|
||||
block_load_vertex_index_end.getId());
|
||||
loaded_vertex_index_phi_op->addIdOperand(vertex_index);
|
||||
loaded_vertex_index_phi_op->addIdOperand(
|
||||
block_load_vertex_index_pre.getId());
|
||||
vertex_index = loaded_vertex_index_phi_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(loaded_vertex_index_phi_op));
|
||||
}
|
||||
}
|
||||
// Endian-swap the index.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
|
||||
vertex_index = EndianSwap32Uint(
|
||||
vertex_index, builder_->createLoad(
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision));
|
||||
}
|
||||
// Endian-swap the index and convert to int.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
|
||||
spv::Id vertex_index_endian =
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
vertex_index = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
EndianSwap32Uint(vertex_index, vertex_index_endian));
|
||||
// Convert the index to a signed integer.
|
||||
vertex_index =
|
||||
builder_->createUnaryOp(spv::OpBitcast, type_int_, vertex_index);
|
||||
// Add the base to the index.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
|
@ -1258,61 +1424,66 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
|
|||
builder_->createTriOp(spv::OpSelect, type_float_, is_w_not_reciprocal,
|
||||
position_w, guest_position_w_inv);
|
||||
|
||||
// Check if the shader returns XY/W rather than XY, and if it does, revert
|
||||
// that.
|
||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||
// affine interpolation.
|
||||
uint_vector_temp_.clear();
|
||||
uint_vector_temp_.reserve(2);
|
||||
uint_vector_temp_.push_back(0);
|
||||
uint_vector_temp_.push_back(1);
|
||||
spv::Id position_xy = builder_->createRvalueSwizzle(
|
||||
spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_);
|
||||
spv::Id is_xy_divided_by_w = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_XYDividedByW))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_xy_mul_w = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, type_float2_, position_xy, position_w);
|
||||
builder_->addDecoration(guest_position_xy_mul_w,
|
||||
spv::DecorationNoContraction);
|
||||
position_xy =
|
||||
builder_->createTriOp(spv::OpSelect, type_float2_, is_xy_divided_by_w,
|
||||
guest_position_xy_mul_w, position_xy);
|
||||
|
||||
// Check if the shader returns Z/W rather than Z, and if it does, revert that.
|
||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||
// affine interpolation.
|
||||
spv::Id position_z =
|
||||
builder_->createCompositeExtract(guest_position, type_float_, 2);
|
||||
spv::Id is_z_divided_by_w = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_ZDividedByW))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_z_mul_w =
|
||||
builder_->createBinOp(spv::OpFMul, type_float_, position_z, position_w);
|
||||
builder_->addDecoration(guest_position_z_mul_w, spv::DecorationNoContraction);
|
||||
position_z =
|
||||
builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w,
|
||||
guest_position_z_mul_w, position_z);
|
||||
|
||||
// Build XYZ of the position with W format handled.
|
||||
spv::Id position_xyz;
|
||||
|
||||
// Open a scope since position_xy and position_z won't be synchronized anymore
|
||||
// after position_xyz is built and modified later.
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(position_xy);
|
||||
composite_construct_op->addIdOperand(position_z);
|
||||
position_xyz = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
// Check if the shader returns XY/W rather than XY, and if it does, revert
|
||||
// that.
|
||||
uint_vector_temp_.clear();
|
||||
uint_vector_temp_.reserve(2);
|
||||
uint_vector_temp_.push_back(0);
|
||||
uint_vector_temp_.push_back(1);
|
||||
spv::Id position_xy = builder_->createRvalueSwizzle(
|
||||
spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_);
|
||||
spv::Id is_xy_divided_by_w = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_XYDividedByW))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_xy_mul_w = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, type_float2_, position_xy, position_w);
|
||||
builder_->addDecoration(guest_position_xy_mul_w,
|
||||
spv::DecorationNoContraction);
|
||||
position_xy = builder_->createTriOp(
|
||||
spv::OpSelect, type_float2_,
|
||||
builder_->smearScalar(spv::NoPrecision, is_xy_divided_by_w,
|
||||
type_bool2_),
|
||||
guest_position_xy_mul_w, position_xy);
|
||||
|
||||
// Check if the shader returns Z/W rather than Z, and if it does, revert
|
||||
// that.
|
||||
spv::Id position_z =
|
||||
builder_->createCompositeExtract(guest_position, type_float_, 2);
|
||||
spv::Id is_z_divided_by_w = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_ZDividedByW))),
|
||||
const_uint_0_);
|
||||
spv::Id guest_position_z_mul_w =
|
||||
builder_->createBinOp(spv::OpFMul, type_float_, position_z, position_w);
|
||||
builder_->addDecoration(guest_position_z_mul_w,
|
||||
spv::DecorationNoContraction);
|
||||
position_z =
|
||||
builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w,
|
||||
guest_position_z_mul_w, position_z);
|
||||
|
||||
// Build XYZ of the position with W format handled.
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(position_xy);
|
||||
composite_construct_op->addIdOperand(position_z);
|
||||
position_xyz = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the NDC scale and offset for guest to host viewport transformation.
|
||||
|
@ -1339,26 +1510,182 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
|
|||
ndc_offset_mul_w);
|
||||
builder_->addDecoration(position_xyz, spv::DecorationNoContraction);
|
||||
|
||||
// Store the position converted to the host.
|
||||
spv::Id position;
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(position_xyz);
|
||||
composite_construct_op->addIdOperand(position_w);
|
||||
position = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
// Write the point size.
|
||||
if (output_point_size_ != spv::NoResult) {
|
||||
spv::Id point_size;
|
||||
if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b001) {
|
||||
assert_true(var_main_point_size_edge_flag_kill_vertex_ != spv::NoResult);
|
||||
id_vector_temp_.clear();
|
||||
// X vector component.
|
||||
id_vector_temp_.push_back(const_int_0_);
|
||||
point_size = builder_->createLoad(
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassFunction,
|
||||
var_main_point_size_edge_flag_kill_vertex_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
} else {
|
||||
// Not statically overridden - write a negative value.
|
||||
point_size = builder_->makeFloatConstant(-1.0f);
|
||||
}
|
||||
builder_->createStore(point_size, output_point_size_);
|
||||
}
|
||||
|
||||
Modification shader_modification = GetSpirvShaderModification();
|
||||
|
||||
// Expand the point sprite.
|
||||
if (shader_modification.vertex.host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
|
||||
// Top-left, bottom-left, top-right, bottom-right order (chosen arbitrarily,
|
||||
// simply based on counterclockwise meaning front with
|
||||
// frontFace = VkFrontFace(0), but faceness is ignored for non-polygon
|
||||
// primitive types).
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(2);
|
||||
id_vector_temp_.push_back(builder_->makeUintConstant(0b10));
|
||||
id_vector_temp_.push_back(builder_->makeUintConstant(0b01));
|
||||
spv::Id point_vertex_positive = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool2_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint2_,
|
||||
builder_->smearScalar(spv::NoPrecision,
|
||||
builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_uint_,
|
||||
builder_->createLoad(input_vertex_index_,
|
||||
spv::NoPrecision)),
|
||||
type_uint2_),
|
||||
builder_->createCompositeConstruct(type_uint2_, id_vector_temp_)),
|
||||
SpirvSmearScalarResultOrConstant(const_uint_0_, type_uint2_));
|
||||
|
||||
// Load the point diameter in guest pixels, with the override from the
|
||||
// vertex shader if provided.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantPointConstantDiameter));
|
||||
spv::Id point_guest_diameter = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b001) {
|
||||
assert_true(var_main_point_size_edge_flag_kill_vertex_ != spv::NoResult);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(const_int_0_);
|
||||
spv::Id point_vertex_diameter = builder_->createLoad(
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassFunction,
|
||||
var_main_point_size_edge_flag_kill_vertex_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
// The vertex shader's header writes -1.0 to point_size by default, so any
|
||||
// non-negative value means that it was overwritten by the translated
|
||||
// vertex shader, and needs to be used instead of the constant size. The
|
||||
// per-vertex diameter has already been clamped earlier in translation
|
||||
// (combined with making it non-negative).
|
||||
point_guest_diameter = builder_->createTriOp(
|
||||
spv::OpSelect, type_float2_,
|
||||
builder_->smearScalar(
|
||||
spv::NoPrecision,
|
||||
builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_,
|
||||
point_vertex_diameter, const_float_0_),
|
||||
type_bool2_),
|
||||
builder_->smearScalar(spv::NoPrecision, point_vertex_diameter,
|
||||
type_float2_),
|
||||
point_guest_diameter);
|
||||
}
|
||||
// Transform the diameter in the guest screen coordinates to radius in the
|
||||
// normalized device coordinates.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(
|
||||
kSystemConstantPointScreenDiameterToNdcRadius));
|
||||
spv::Id point_radius = builder_->createBinOp(
|
||||
spv::OpFMul, type_float2_, point_guest_diameter,
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision));
|
||||
builder_->addDecoration(point_radius, spv::DecorationNoContraction);
|
||||
// Transform the radius from the normalized device coordinates to the clip
|
||||
// space.
|
||||
point_radius = builder_->createBinOp(spv::OpVectorTimesScalar, type_float2_,
|
||||
point_radius, position_w);
|
||||
builder_->addDecoration(point_radius, spv::DecorationNoContraction);
|
||||
|
||||
// Apply the direction of expansion for the current host vertex.
|
||||
spv::Id point_radius_negative =
|
||||
builder_->createUnaryOp(spv::OpFNegate, type_float2_, point_radius);
|
||||
builder_->addDecoration(point_radius_negative,
|
||||
spv::DecorationNoContraction);
|
||||
// Expand the point sprite.
|
||||
uint_vector_temp_.clear();
|
||||
uint_vector_temp_.reserve(2);
|
||||
uint_vector_temp_.push_back(0);
|
||||
uint_vector_temp_.push_back(1);
|
||||
spv::Id point_position_xy = builder_->createBinOp(
|
||||
spv::OpFAdd, type_float2_,
|
||||
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
|
||||
position_xyz, uint_vector_temp_),
|
||||
builder_->createTriOp(spv::OpSelect, type_float2_,
|
||||
point_vertex_positive, point_radius,
|
||||
point_radius_negative));
|
||||
builder_->addDecoration(point_position_xy, spv::DecorationNoContraction);
|
||||
// Store the position.
|
||||
spv::Id position;
|
||||
{
|
||||
// Bypass the `getNumTypeConstituents(typeId) == (int)constituents.size()`
|
||||
// assertion in createCompositeConstruct, OpCompositeConstruct can
|
||||
// construct vectors not only from scalars, but also from other vectors.
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(point_position_xy);
|
||||
composite_construct_op->addIdOperand(
|
||||
builder_->createCompositeExtract(position_xyz, type_float_, 2));
|
||||
composite_construct_op->addIdOperand(position_w);
|
||||
position = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
}
|
||||
builder_->createStore(position, position_ptr);
|
||||
|
||||
// Write the point coordinates.
|
||||
if (output_point_coordinates_ != spv::NoResult) {
|
||||
builder_->createStore(
|
||||
builder_->createTriOp(spv::OpSelect, type_float2_,
|
||||
point_vertex_positive, const_float2_1_,
|
||||
const_float2_0_),
|
||||
output_point_coordinates_);
|
||||
}
|
||||
|
||||
// TODO(Triang3l): For points, handle ps_ucp_mode (take the guest clip space
|
||||
// coordinates instead of the host ones, calculate the distances to the user
|
||||
// clip planes, cull using the distance from the center for modes 0, 1 and
|
||||
// 2, cull and clip per-vertex for modes 2 and 3) in clip and cull
|
||||
// distances.
|
||||
} else {
|
||||
// Store the position converted to the host.
|
||||
spv::Id position;
|
||||
{
|
||||
// Bypass the `getNumTypeConstituents(typeId) == (int)constituents.size()`
|
||||
// assertion in createCompositeConstruct, OpCompositeConstruct can
|
||||
// construct vectors not only from scalars, but also from other vectors.
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(position_xyz);
|
||||
composite_construct_op->addIdOperand(position_w);
|
||||
position = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
}
|
||||
builder_->createStore(position, position_ptr);
|
||||
}
|
||||
builder_->createStore(position, position_ptr);
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||
// Interpolator inputs.
|
||||
Modification shader_modification = GetSpirvShaderModification();
|
||||
|
||||
uint32_t input_location = 0;
|
||||
|
||||
// Interpolator inputs.
|
||||
{
|
||||
uint32_t interpolator_location = 0;
|
||||
uint32_t interpolators_remaining = GetModificationInterpolatorMask();
|
||||
uint32_t interpolator_index;
|
||||
while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) {
|
||||
|
@ -1368,28 +1695,41 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
|||
fmt::format("xe_in_interpolator_{}", interpolator_index).c_str());
|
||||
input_output_interpolators_[interpolator_index] = interpolator;
|
||||
builder_->addDecoration(interpolator, spv::DecorationLocation,
|
||||
int(interpolator_location));
|
||||
int(input_location));
|
||||
if (shader_modification.pixel.interpolators_centroid &
|
||||
(UINT32_C(1) << interpolator_index)) {
|
||||
builder_->addDecoration(interpolator, spv::DecorationCentroid);
|
||||
}
|
||||
main_interface_.push_back(interpolator);
|
||||
++interpolator_location;
|
||||
++input_location;
|
||||
}
|
||||
}
|
||||
|
||||
bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX;
|
||||
|
||||
// Point coordinate input.
|
||||
if (shader_modification.pixel.param_gen_point) {
|
||||
if (param_gen_needed) {
|
||||
input_point_coordinates_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassInput,
|
||||
type_float2_, "xe_in_point_coordinates");
|
||||
builder_->addDecoration(input_point_coordinates_, spv::DecorationLocation,
|
||||
int(input_location));
|
||||
main_interface_.push_back(input_point_coordinates_);
|
||||
}
|
||||
++input_location;
|
||||
}
|
||||
|
||||
// Fragment coordinates.
|
||||
// TODO(Triang3l): More conditions - fragment shader interlock render backend,
|
||||
// alpha to coverage (if RT 0 is written, and there's no early depth /
|
||||
// stencil), depth writing in the fragment shader (per-sample if supported).
|
||||
if (param_gen_needed) {
|
||||
input_fragment_coord_ = builder_->createVariable(
|
||||
input_fragment_coordinates_ = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord");
|
||||
builder_->addDecoration(input_fragment_coord_, spv::DecorationBuiltIn,
|
||||
builder_->addDecoration(input_fragment_coordinates_, spv::DecorationBuiltIn,
|
||||
spv::BuiltInFragCoord);
|
||||
main_interface_.push_back(input_fragment_coord_);
|
||||
main_interface_.push_back(input_fragment_coordinates_);
|
||||
}
|
||||
|
||||
// Is front facing.
|
||||
|
@ -1473,13 +1813,14 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
|||
spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31);
|
||||
// TODO(Triang3l): Resolution scale inversion.
|
||||
// X - pixel X .0 in the magnitude, is back-facing in the sign bit.
|
||||
assert_true(input_fragment_coord_ != spv::NoResult);
|
||||
assert_true(input_fragment_coordinates_ != spv::NoResult);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(const_int_0_);
|
||||
spv::Id param_gen_x = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassInput,
|
||||
input_fragment_coord_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
spv::Id param_gen_x =
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassInput,
|
||||
input_fragment_coordinates_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(param_gen_x);
|
||||
param_gen_x = builder_->createBuiltinCall(
|
||||
|
@ -1514,10 +1855,11 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
|||
// Y - pixel Y .0 in the magnitude, is point in the sign bit.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(1));
|
||||
spv::Id param_gen_y = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassInput,
|
||||
input_fragment_coord_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
spv::Id param_gen_y =
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassInput,
|
||||
input_fragment_coordinates_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(param_gen_y);
|
||||
param_gen_y = builder_->createBuiltinCall(
|
||||
|
@ -1535,10 +1877,25 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
|||
const_sign_bit));
|
||||
}
|
||||
// Z - point S in the magnitude, is line in the sign bit.
|
||||
spv::Id param_gen_z;
|
||||
// W - point T in the magnitude.
|
||||
spv::Id param_gen_z, param_gen_w;
|
||||
if (modification.pixel.param_gen_point) {
|
||||
// TODO(Triang3l): Point coordinates.
|
||||
param_gen_z = const_float_0_;
|
||||
assert_true(input_point_coordinates_ != spv::NoResult);
|
||||
// Saturate to avoid negative point coordinates if the center of the pixel
|
||||
// is not covered, and extrapolation is done.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(3);
|
||||
id_vector_temp_.push_back(
|
||||
builder_->createLoad(input_point_coordinates_, spv::NoPrecision));
|
||||
id_vector_temp_.push_back(const_float2_0_);
|
||||
id_vector_temp_.push_back(const_float2_1_);
|
||||
spv::Id param_gen_point_coordinates =
|
||||
builder_->createBuiltinCall(type_float2_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450NClamp, id_vector_temp_);
|
||||
param_gen_z = builder_->createCompositeExtract(
|
||||
param_gen_point_coordinates, type_float_, 0);
|
||||
param_gen_w = builder_->createCompositeExtract(
|
||||
param_gen_point_coordinates, type_float_, 1);
|
||||
} else {
|
||||
param_gen_z = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_float_,
|
||||
|
@ -1552,10 +1909,8 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
|||
builder_->makeUintConstant(kSysFlag_PrimitiveLine)),
|
||||
const_uint_0_),
|
||||
const_sign_bit, const_uint_0_));
|
||||
param_gen_w = const_float_0_;
|
||||
}
|
||||
// W - point T in the magnitude.
|
||||
// TODO(Triang3l): Point coordinates.
|
||||
spv::Id param_gen_w = const_float_0_;
|
||||
// Store the pixel parameters.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(4);
|
||||
|
@ -1927,15 +2282,20 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
target_pointer = input_output_interpolators_[result.storage_index];
|
||||
// Unused interpolators are spv::NoResult in input_output_interpolators_.
|
||||
} break;
|
||||
case InstructionStorageTarget::kPosition:
|
||||
case InstructionStorageTarget::kPosition: {
|
||||
assert_true(is_vertex_shader());
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.push_back(
|
||||
builder_->makeIntConstant(kOutputPerVertexMemberPosition));
|
||||
target_pointer = builder_->createAccessChain(
|
||||
spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_);
|
||||
break;
|
||||
case InstructionStorageTarget::kColor:
|
||||
} break;
|
||||
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: {
|
||||
assert_true(is_vertex_shader());
|
||||
assert_zero(used_write_mask & 0b1000);
|
||||
target_pointer = var_main_point_size_edge_flag_kill_vertex_;
|
||||
} break;
|
||||
case InstructionStorageTarget::kColor: {
|
||||
assert_true(is_pixel_shader());
|
||||
assert_not_zero(used_write_mask);
|
||||
assert_true(current_shader().writes_color_target(result.storage_index));
|
||||
|
@ -1944,7 +2304,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
// an empty write mask without independent blending.
|
||||
// TODO(Triang3l): Store the alpha of the first output in this case for
|
||||
// alpha test and alpha to coverage.
|
||||
break;
|
||||
} break;
|
||||
default:
|
||||
// TODO(Triang3l): All storage targets.
|
||||
break;
|
||||
|
@ -2179,6 +2539,57 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (result.storage_target ==
|
||||
InstructionStorageTarget::kPointSizeEdgeFlagKillVertex &&
|
||||
used_write_mask & 0b001) {
|
||||
// Make the point size non-negative as negative is used to indicate that the
|
||||
// default size must be used, and also clamp it to the bounds the way the
|
||||
// R400 (Adreno 200, to be more precise) hardware clamps it (functionally
|
||||
// like a signed 32-bit integer, -NaN and -Infinity...-0 to the minimum,
|
||||
// +NaN to the maximum).
|
||||
spv::Id point_size = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createCompositeExtract(value_to_store, type_float_, 0));
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantPointVertexDiameterMin));
|
||||
spv::Id point_vertex_diameter_min = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_,
|
||||
id_vector_temp_util_),
|
||||
spv::NoPrecision));
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.reserve(2);
|
||||
id_vector_temp_util_.push_back(point_vertex_diameter_min);
|
||||
id_vector_temp_util_.push_back(point_size);
|
||||
point_size =
|
||||
builder_->createBuiltinCall(type_int_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450SMax, id_vector_temp_util_);
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantPointVertexDiameterMax));
|
||||
spv::Id point_vertex_diameter_max = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_,
|
||||
id_vector_temp_util_),
|
||||
spv::NoPrecision));
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.reserve(2);
|
||||
id_vector_temp_util_.push_back(point_vertex_diameter_max);
|
||||
id_vector_temp_util_.push_back(point_size);
|
||||
point_size =
|
||||
builder_->createBuiltinCall(type_int_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450SMin, id_vector_temp_util_);
|
||||
value_to_store = builder_->createCompositeInsert(
|
||||
builder_->createUnaryOp(spv::OpBitcast, type_float_, point_size),
|
||||
value_to_store, type_float3_, 0);
|
||||
}
|
||||
|
||||
builder_->createStore(value_to_store, target_pointer);
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
|
||||
// prototyping stage (easier to do small granular updates with an
|
||||
// incremental counter).
|
||||
static constexpr uint32_t kVersion = 5;
|
||||
static constexpr uint32_t kVersion = 6;
|
||||
|
||||
enum class DepthStencilMode : uint32_t {
|
||||
kNoModifiers,
|
||||
|
@ -50,6 +50,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
// Interpolators written by the vertex shader and needed by the pixel
|
||||
// shader.
|
||||
uint32_t interpolator_mask : xenos::kMaxInterpolators;
|
||||
// For HostVertexShaderType kPointListAsTriangleStrip, whether to output
|
||||
// the point coordinates.
|
||||
// For other HostVertexShaderTypes (though truly reachable only for
|
||||
// kVertex), whether to output the point size.
|
||||
uint32_t output_point_parameters : 1;
|
||||
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
|
||||
uint32_t dynamic_addressable_register_count : 8;
|
||||
// Pipeline stage and input configuration.
|
||||
|
@ -145,10 +150,15 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
int32_t vertex_base_index;
|
||||
|
||||
float ndc_scale[3];
|
||||
uint32_t padding_ndc_scale;
|
||||
float point_vertex_diameter_min;
|
||||
|
||||
float ndc_offset[3];
|
||||
uint32_t padding_ndc_offset;
|
||||
float point_vertex_diameter_max;
|
||||
|
||||
float point_constant_diameter[2];
|
||||
// Diameter in guest screen coordinates > radius (0.5 * diameter) in the NDC
|
||||
// for the host viewport.
|
||||
float point_screen_diameter_to_ndc_radius[2];
|
||||
|
||||
// Each byte contains post-swizzle TextureSign values for each of the needed
|
||||
// components of each of the 32 used texture fetch constants.
|
||||
|
@ -603,7 +613,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
kSystemConstantVertexIndexEndian,
|
||||
kSystemConstantVertexBaseIndex,
|
||||
kSystemConstantNdcScale,
|
||||
kSystemConstantPointVertexDiameterMin,
|
||||
kSystemConstantNdcOffset,
|
||||
kSystemConstantPointVertexDiameterMax,
|
||||
kSystemConstantPointConstantDiameter,
|
||||
kSystemConstantPointScreenDiameterToNdcRadius,
|
||||
kSystemConstantTextureSwizzledSigns,
|
||||
kSystemConstantTextureSwizzles,
|
||||
kSystemConstantAlphaTestReference,
|
||||
|
@ -627,8 +641,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
spv::Id input_vertex_index_;
|
||||
// VS as TES only - int.
|
||||
spv::Id input_primitive_id_;
|
||||
// PS, only when needed - float2.
|
||||
spv::Id input_point_coordinates_;
|
||||
// PS, only when needed - float4.
|
||||
spv::Id input_fragment_coord_;
|
||||
spv::Id input_fragment_coordinates_;
|
||||
// PS, only when needed - bool.
|
||||
spv::Id input_front_facing_;
|
||||
|
||||
|
@ -643,6 +659,12 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
// all).
|
||||
std::array<spv::Id, xenos::kMaxInterpolators> input_output_interpolators_;
|
||||
|
||||
// VS, only for HostVertexShaderType::kPointListAsTriangleStrip when needed
|
||||
// for the PS - float2.
|
||||
spv::Id output_point_coordinates_;
|
||||
// VS, only when needed - float.
|
||||
spv::Id output_point_size_;
|
||||
|
||||
enum OutputPerVertexMember : unsigned int {
|
||||
kOutputPerVertexMemberPosition,
|
||||
kOutputPerVertexMemberCount,
|
||||
|
|
|
@ -1296,18 +1296,14 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
|
|||
builder_->addDecoration(face, spv::DecorationNoContraction);
|
||||
}
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(2);
|
||||
id_vector_temp_.reserve(3);
|
||||
id_vector_temp_.push_back(face);
|
||||
id_vector_temp_.push_back(const_float_0_);
|
||||
id_vector_temp_.push_back(face);
|
||||
face = builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450NMax, id_vector_temp_);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(2);
|
||||
id_vector_temp_.push_back(builder_->makeFloatConstant(5.0f));
|
||||
id_vector_temp_.push_back(face);
|
||||
face = builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450FMin, id_vector_temp_);
|
||||
face = builder_->createUnaryOp(spv::OpConvertFToU, type_uint_, face);
|
||||
face = builder_->createUnaryOp(
|
||||
spv::OpConvertFToU, type_uint_,
|
||||
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450NClamp, id_vector_temp_));
|
||||
// Split the face index into the axis and the sign.
|
||||
spv::Id const_uint_1 = builder_->makeUintConstant(1);
|
||||
spv::Id face_axis = builder_->createBinOp(
|
||||
|
|
|
@ -2171,7 +2171,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
// TODO(Triang3l): Tessellation, geometry-type-specific vertex shader,
|
||||
// vertex shader as compute.
|
||||
if (primitive_processing_result.host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex) {
|
||||
Shader::HostVertexShaderType::kVertex &&
|
||||
primitive_processing_result.host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2179,7 +2181,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
vertex_shader_modification =
|
||||
pipeline_cache_->GetCurrentVertexShaderModification(
|
||||
*vertex_shader, primitive_processing_result.host_vertex_shader_type,
|
||||
interpolator_mask);
|
||||
interpolator_mask, ps_param_gen_pos != UINT32_MAX);
|
||||
pixel_shader_modification =
|
||||
pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification(
|
||||
*pixel_shader, interpolator_mask, ps_param_gen_pos)
|
||||
|
@ -2348,6 +2350,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
||||
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
||||
const VkPhysicalDeviceLimits& device_limits =
|
||||
provider.device_properties().limits;
|
||||
|
||||
|
@ -2382,11 +2385,23 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
UpdateDynamicState(viewport_info, primitive_polygonal,
|
||||
normalized_depth_control);
|
||||
|
||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||
|
||||
// Whether to load the guest 32-bit (usually big-endian) vertex index
|
||||
// indirectly in the vertex shader if full 32-bit indices are not supported by
|
||||
// the host.
|
||||
bool shader_32bit_index_dma =
|
||||
!device_features.fullDrawIndexUint32 &&
|
||||
primitive_processing_result.index_buffer_type ==
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kGuestDMA &&
|
||||
vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32 &&
|
||||
primitive_processing_result.host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
|
||||
// Update system constants before uploading them.
|
||||
bool vertex_shader_index_load;
|
||||
UpdateSystemConstantValues(primitive_polygonal, primitive_processing_result,
|
||||
viewport_info, used_texture_mask,
|
||||
vertex_shader_index_load);
|
||||
shader_32bit_index_dma, viewport_info,
|
||||
used_texture_mask);
|
||||
|
||||
// Update uniform buffers and descriptor sets after binding the pipeline with
|
||||
// the new layout.
|
||||
|
@ -2453,13 +2468,13 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
// Draw.
|
||||
if (primitive_processing_result.index_buffer_type ==
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kNone ||
|
||||
vertex_shader_index_load) {
|
||||
shader_32bit_index_dma) {
|
||||
deferred_command_buffer_.CmdVkDraw(
|
||||
primitive_processing_result.host_draw_vertex_count, 1, 0, 0);
|
||||
} else {
|
||||
std::pair<VkBuffer, VkDeviceSize> index_buffer;
|
||||
switch (primitive_processing_result.index_buffer_type) {
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kGuest:
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kGuestDMA:
|
||||
index_buffer.first = shared_memory_->buffer();
|
||||
index_buffer.second = primitive_processing_result.guest_index_base;
|
||||
break;
|
||||
|
@ -2467,7 +2482,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
index_buffer = primitive_processor_->GetConvertedIndexBuffer(
|
||||
primitive_processing_result.host_index_buffer_handle);
|
||||
break;
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin:
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForAuto:
|
||||
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForDMA:
|
||||
index_buffer = primitive_processor_->GetBuiltinIndexBuffer(
|
||||
primitive_processing_result.host_index_buffer_handle);
|
||||
break;
|
||||
|
@ -3342,8 +3358,8 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
|||
void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||
bool primitive_polygonal,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
|
||||
bool& vertex_shader_index_load_out) {
|
||||
bool shader_32bit_index_dma, const draw_util::ViewportInfo& viewport_info,
|
||||
uint32_t used_texture_mask) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
@ -3367,51 +3383,17 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
// Flags.
|
||||
uint32_t flags = 0;
|
||||
// Vertex index shader loading.
|
||||
bool vertex_shader_index_load = false;
|
||||
// Only for ProcessedIndexBufferType kGuest since kHostConverted indices may
|
||||
// be not loaded into the GPU memory (only read on the CPU), though
|
||||
// kHostConverted must never be used for point lists and rectangle lists
|
||||
// without geometry shaders anyway. For regular 32-bit index fetching without
|
||||
// fullDrawIndexUint32, kHostConverted indices are already byte-swapped and
|
||||
// truncated to 24 bits, so indirect fetch is not needed.
|
||||
if (shader_32bit_index_dma) {
|
||||
flags |= SpirvShaderTranslator::kSysFlag_VertexIndexLoad;
|
||||
}
|
||||
if (primitive_processing_result.index_buffer_type ==
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kGuest) {
|
||||
switch (primitive_processing_result.host_vertex_shader_type) {
|
||||
case Shader::HostVertexShaderType::kVertex: {
|
||||
// For guest (usually big-endian) 32-bit indices when they're not
|
||||
// supported by the device.
|
||||
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
|
||||
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
||||
const VkPhysicalDeviceFeatures& device_features =
|
||||
provider.device_features();
|
||||
if (!device_features.fullDrawIndexUint32) {
|
||||
vertex_shader_index_load = true;
|
||||
flags |= SpirvShaderTranslator::kSysFlag_VertexIndexLoad;
|
||||
}
|
||||
}
|
||||
} break;
|
||||
// kMemexportCompute never comes out of the PrimitiveProcessor, as
|
||||
// memexport compute shaders are executed alongside their vertex
|
||||
// counterparts, since they may still result in drawing.
|
||||
case Shader::HostVertexShaderType::kPointListAsTriangleStrip:
|
||||
case Shader::HostVertexShaderType::kRectangleListAsTriangleStrip: {
|
||||
// Always loading the guest index buffer indirectly if it's used, as
|
||||
// host indexing contains a part needed specifically for the host for
|
||||
// the construction of the primitive - host vertices don't map 1:1 to
|
||||
// guest ones.
|
||||
vertex_shader_index_load = true;
|
||||
flags |=
|
||||
SpirvShaderTranslator::kSysFlag_ComputeOrPrimitiveVertexIndexLoad;
|
||||
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
|
||||
flags |= SpirvShaderTranslator ::
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForDMA) {
|
||||
flags |= SpirvShaderTranslator::kSysFlag_ComputeOrPrimitiveVertexIndexLoad;
|
||||
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
|
||||
flags |= SpirvShaderTranslator ::
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit;
|
||||
}
|
||||
}
|
||||
vertex_shader_index_load_out = vertex_shader_index_load;
|
||||
// W0 division control.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
||||
|
@ -3466,9 +3448,9 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian !=
|
||||
primitive_processing_result.host_index_endian;
|
||||
primitive_processing_result.host_shader_index_endian;
|
||||
system_constants_.vertex_index_endian =
|
||||
primitive_processing_result.host_index_endian;
|
||||
primitive_processing_result.host_shader_index_endian;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
|
@ -3482,6 +3464,49 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
|
||||
}
|
||||
|
||||
// Point size.
|
||||
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
|
||||
float point_vertex_diameter_min =
|
||||
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
|
||||
float point_vertex_diameter_max =
|
||||
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
|
||||
float point_constant_diameter_x =
|
||||
float(pa_su_point_size.width) * (2.0f / 16.0f);
|
||||
float point_constant_diameter_y =
|
||||
float(pa_su_point_size.height) * (2.0f / 16.0f);
|
||||
dirty |= system_constants_.point_vertex_diameter_min !=
|
||||
point_vertex_diameter_min;
|
||||
dirty |= system_constants_.point_vertex_diameter_max !=
|
||||
point_vertex_diameter_max;
|
||||
dirty |= system_constants_.point_constant_diameter[0] !=
|
||||
point_constant_diameter_x;
|
||||
dirty |= system_constants_.point_constant_diameter[1] !=
|
||||
point_constant_diameter_y;
|
||||
system_constants_.point_vertex_diameter_min = point_vertex_diameter_min;
|
||||
system_constants_.point_vertex_diameter_max = point_vertex_diameter_max;
|
||||
system_constants_.point_constant_diameter[0] = point_constant_diameter_x;
|
||||
system_constants_.point_constant_diameter[1] = point_constant_diameter_y;
|
||||
// 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter
|
||||
// to radius conversion to avoid multiplying the per-vertex diameter by an
|
||||
// additional constant in the shader.
|
||||
float point_screen_diameter_to_ndc_radius_x =
|
||||
(/* 0.5f * 2.0f * */ float(texture_cache_->draw_resolution_scale_x())) /
|
||||
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
||||
float point_screen_diameter_to_ndc_radius_y =
|
||||
(/* 0.5f * 2.0f * */ float(texture_cache_->draw_resolution_scale_y())) /
|
||||
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] !=
|
||||
point_screen_diameter_to_ndc_radius_y;
|
||||
system_constants_.point_screen_diameter_to_ndc_radius[0] =
|
||||
point_screen_diameter_to_ndc_radius_x;
|
||||
system_constants_.point_screen_diameter_to_ndc_radius[1] =
|
||||
point_screen_diameter_to_ndc_radius_y;
|
||||
}
|
||||
|
||||
// Texture signedness / gamma.
|
||||
{
|
||||
uint32_t textures_remaining = used_texture_mask;
|
||||
|
|
|
@ -436,8 +436,8 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
void UpdateSystemConstantValues(
|
||||
bool primitive_polygonal,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
|
||||
bool& vertex_shader_index_load_out);
|
||||
bool shader_32bit_index_dma, const draw_util::ViewportInfo& viewport_info,
|
||||
uint32_t used_texture_mask);
|
||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader);
|
||||
// Allocates a descriptor set and fills one or two VkWriteDescriptorSet
|
||||
|
|
|
@ -118,7 +118,7 @@ VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
SpirvShaderTranslator::Modification
|
||||
VulkanPipelineCache::GetCurrentVertexShaderModification(
|
||||
const Shader& shader, Shader::HostVertexShaderType host_vertex_shader_type,
|
||||
uint32_t interpolator_mask) const {
|
||||
uint32_t interpolator_mask, bool ps_param_gen_used) const {
|
||||
assert_true(shader.type() == xenos::ShaderType::kVertex);
|
||||
assert_true(shader.is_ucode_analyzed());
|
||||
const auto& regs = register_file_;
|
||||
|
@ -133,6 +133,16 @@ VulkanPipelineCache::GetCurrentVertexShaderModification(
|
|||
|
||||
modification.vertex.interpolator_mask = interpolator_mask;
|
||||
|
||||
if (host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
|
||||
modification.vertex.output_point_parameters = uint32_t(ps_param_gen_used);
|
||||
} else {
|
||||
modification.vertex.output_point_parameters =
|
||||
uint32_t((shader.writes_point_size_edge_flag_kill_vertex() & 0b001) &&
|
||||
regs.Get<reg::VGT_DRAW_INITIATOR>().prim_type ==
|
||||
xenos::PrimitiveType::kPointList);
|
||||
}
|
||||
|
||||
return modification;
|
||||
}
|
||||
|
||||
|
@ -284,6 +294,8 @@ bool VulkanPipelineCache::ConfigurePipeline(
|
|||
if (GetGeometryShaderKey(
|
||||
description.geometry_shader,
|
||||
SpirvShaderTranslator::Modification(vertex_shader->modification()),
|
||||
SpirvShaderTranslator::Modification(
|
||||
pixel_shader ? pixel_shader->modification() : 0),
|
||||
geometry_shader_key)) {
|
||||
geometry_shader = GetGeometryShader(geometry_shader_key);
|
||||
if (geometry_shader == VK_NULL_HANDLE) {
|
||||
|
@ -496,6 +508,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
|||
PipelinePrimitiveTopology primitive_topology;
|
||||
switch (primitive_processing_result.host_primitive_type) {
|
||||
case xenos::PrimitiveType::kPointList:
|
||||
geometry_shader = PipelineGeometryShader::kPointList;
|
||||
primitive_topology = PipelinePrimitiveTopology::kPointList;
|
||||
break;
|
||||
case xenos::PrimitiveType::kLineList:
|
||||
|
@ -815,10 +828,22 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet(
|
|||
bool VulkanPipelineCache::GetGeometryShaderKey(
|
||||
PipelineGeometryShader geometry_shader_type,
|
||||
SpirvShaderTranslator::Modification vertex_shader_modification,
|
||||
SpirvShaderTranslator::Modification pixel_shader_modification,
|
||||
GeometryShaderKey& key_out) {
|
||||
if (geometry_shader_type == PipelineGeometryShader::kNone) {
|
||||
return false;
|
||||
}
|
||||
// For kPointListAsTriangleStrip, output_point_parameters has a different
|
||||
// meaning (the coordinates, not the size). However, the AsTriangleStrip host
|
||||
// vertex shader types are needed specifically when geometry shaders are not
|
||||
// supported as fallbacks.
|
||||
if (vertex_shader_modification.vertex.host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kPointListAsTriangleStrip ||
|
||||
vertex_shader_modification.vertex.host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kRectangleListAsTriangleStrip) {
|
||||
assert_always();
|
||||
return false;
|
||||
}
|
||||
GeometryShaderKey key;
|
||||
key.type = geometry_shader_type;
|
||||
// TODO(Triang3l): Once all needed inputs and outputs are added, uncomment the
|
||||
|
@ -832,9 +857,8 @@ bool VulkanPipelineCache::GetGeometryShaderKey(
|
|||
key.has_vertex_kill_and =
|
||||
/* vertex_shader_modification.vertex.vertex_kill_and */ 0;
|
||||
key.has_point_size =
|
||||
/* vertex_shader_modification.vertex.output_point_size */ 0;
|
||||
key.has_point_coordinates =
|
||||
/* pixel_shader_modification.pixel.param_gen_point */ 0;
|
||||
vertex_shader_modification.vertex.output_point_parameters;
|
||||
key.has_point_coordinates = pixel_shader_modification.pixel.param_gen_point;
|
||||
key_out = key;
|
||||
return true;
|
||||
}
|
||||
|
@ -853,6 +877,13 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
spv::ExecutionMode output_primitive_execution_mode = spv::ExecutionMode(0);
|
||||
uint32_t output_max_vertices = 0;
|
||||
switch (key.type) {
|
||||
case PipelineGeometryShader::kPointList:
|
||||
// Point to a strip of 2 triangles.
|
||||
input_primitive_execution_mode = spv::ExecutionModeInputPoints;
|
||||
input_primitive_vertex_count = 1;
|
||||
output_primitive_execution_mode = spv::ExecutionModeOutputTriangleStrip;
|
||||
output_max_vertices = 4;
|
||||
break;
|
||||
case PipelineGeometryShader::kRectangleList:
|
||||
// Triangle to a strip of 2 triangles.
|
||||
input_primitive_execution_mode = spv::ExecutionModeTriangles;
|
||||
|
@ -901,6 +932,7 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
spv::Id type_bool4 = builder.makeVectorType(type_bool, 4);
|
||||
spv::Id type_int = builder.makeIntType(32);
|
||||
spv::Id type_float = builder.makeFloatType(32);
|
||||
spv::Id type_float2 = builder.makeVectorType(type_float, 2);
|
||||
spv::Id type_float4 = builder.makeVectorType(type_float, 4);
|
||||
spv::Id type_clip_distances =
|
||||
clip_distance_count
|
||||
|
@ -912,9 +944,54 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
? builder.makeArrayType(
|
||||
type_float, builder.makeUintConstant(cull_distance_count), 0)
|
||||
: spv::NoType;
|
||||
spv::Id type_point_coordinates = key.has_point_coordinates
|
||||
? builder.makeVectorType(type_float, 2)
|
||||
: spv::NoType;
|
||||
|
||||
// System constants.
|
||||
// For points:
|
||||
// - float2 point_constant_diameter
|
||||
// - float2 point_screen_diameter_to_ndc_radius
|
||||
enum PointConstant : uint32_t {
|
||||
kPointConstantConstantDiameter,
|
||||
kPointConstantScreenDiameterToNdcRadius,
|
||||
kPointConstantCount,
|
||||
};
|
||||
spv::Id type_system_constants = spv::NoType;
|
||||
if (key.type == PipelineGeometryShader::kPointList) {
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.resize(kPointConstantCount);
|
||||
id_vector_temp[kPointConstantConstantDiameter] = type_float2;
|
||||
id_vector_temp[kPointConstantScreenDiameterToNdcRadius] = type_float2;
|
||||
type_system_constants =
|
||||
builder.makeStructType(id_vector_temp, "XeSystemConstants");
|
||||
builder.addMemberName(type_system_constants, kPointConstantConstantDiameter,
|
||||
"point_constant_diameter");
|
||||
builder.addMemberDecoration(
|
||||
type_system_constants, kPointConstantConstantDiameter,
|
||||
spv::DecorationOffset,
|
||||
int(offsetof(SpirvShaderTranslator::SystemConstants,
|
||||
point_constant_diameter)));
|
||||
builder.addMemberName(type_system_constants,
|
||||
kPointConstantScreenDiameterToNdcRadius,
|
||||
"point_screen_diameter_to_ndc_radius");
|
||||
builder.addMemberDecoration(
|
||||
type_system_constants, kPointConstantScreenDiameterToNdcRadius,
|
||||
spv::DecorationOffset,
|
||||
int(offsetof(SpirvShaderTranslator::SystemConstants,
|
||||
point_screen_diameter_to_ndc_radius)));
|
||||
}
|
||||
spv::Id uniform_system_constants = spv::NoResult;
|
||||
if (type_system_constants != spv::NoType) {
|
||||
builder.addDecoration(type_system_constants, spv::DecorationBlock);
|
||||
uniform_system_constants = builder.createVariable(
|
||||
spv::NoPrecision, spv::StorageClassUniform, type_system_constants,
|
||||
"xe_uniform_system_constants");
|
||||
builder.addDecoration(uniform_system_constants,
|
||||
spv::DecorationDescriptorSet,
|
||||
int(SpirvShaderTranslator::kDescriptorSetConstants));
|
||||
builder.addDecoration(uniform_system_constants, spv::DecorationBinding,
|
||||
int(SpirvShaderTranslator::kConstantBufferSystem));
|
||||
// Generating SPIR-V 1.0, no need to add bindings to the entry point's
|
||||
// interface until SPIR-V 1.4.
|
||||
}
|
||||
|
||||
// Inputs and outputs - matching glslang order, in gl_PerVertex gl_in[],
|
||||
// user-defined outputs, user-defined inputs, out gl_PerVertex.
|
||||
|
@ -977,6 +1054,8 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
type_array_in_gl_per_vertex, "gl_in");
|
||||
main_interface.push_back(in_gl_per_vertex);
|
||||
|
||||
uint32_t output_location = 0;
|
||||
|
||||
// Interpolators outputs.
|
||||
std::array<spv::Id, xenos::kMaxInterpolators> out_interpolators;
|
||||
for (uint32_t i = 0; i < key.interpolator_count; ++i) {
|
||||
|
@ -984,23 +1063,28 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
spv::NoPrecision, spv::StorageClassOutput, type_float4,
|
||||
fmt::format("xe_out_interpolator_{}", i).c_str());
|
||||
out_interpolators[i] = out_interpolator;
|
||||
builder.addDecoration(out_interpolator, spv::DecorationLocation, i);
|
||||
builder.addDecoration(out_interpolator, spv::DecorationLocation,
|
||||
int(output_location));
|
||||
builder.addDecoration(out_interpolator, spv::DecorationInvariant);
|
||||
main_interface.push_back(out_interpolator);
|
||||
++output_location;
|
||||
}
|
||||
|
||||
// Point coordinate output.
|
||||
spv::Id out_point_coordinates = spv::NoResult;
|
||||
if (key.has_point_coordinates) {
|
||||
out_point_coordinates = builder.createVariable(
|
||||
spv::NoPrecision, spv::StorageClassOutput, type_point_coordinates,
|
||||
"xe_out_point_coordinates");
|
||||
out_point_coordinates =
|
||||
builder.createVariable(spv::NoPrecision, spv::StorageClassOutput,
|
||||
type_float2, "xe_out_point_coordinates");
|
||||
builder.addDecoration(out_point_coordinates, spv::DecorationLocation,
|
||||
key.interpolator_count);
|
||||
int(output_location));
|
||||
builder.addDecoration(out_point_coordinates, spv::DecorationInvariant);
|
||||
main_interface.push_back(out_point_coordinates);
|
||||
++output_location;
|
||||
}
|
||||
|
||||
uint32_t input_location = 0;
|
||||
|
||||
// Interpolator inputs.
|
||||
std::array<spv::Id, xenos::kMaxInterpolators> in_interpolators;
|
||||
for (uint32_t i = 0; i < key.interpolator_count; ++i) {
|
||||
|
@ -1010,8 +1094,10 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
0),
|
||||
fmt::format("xe_in_interpolator_{}", i).c_str());
|
||||
in_interpolators[i] = in_interpolator;
|
||||
builder.addDecoration(in_interpolator, spv::DecorationLocation, i);
|
||||
builder.addDecoration(in_interpolator, spv::DecorationLocation,
|
||||
int(input_location));
|
||||
main_interface.push_back(in_interpolator);
|
||||
++input_location;
|
||||
}
|
||||
|
||||
// Point size input.
|
||||
|
@ -1023,8 +1109,9 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
0),
|
||||
"xe_in_point_size");
|
||||
builder.addDecoration(in_point_size, spv::DecorationLocation,
|
||||
key.interpolator_count);
|
||||
int(input_location));
|
||||
main_interface.push_back(in_point_size);
|
||||
++input_location;
|
||||
}
|
||||
|
||||
// out gl_PerVertex.
|
||||
|
@ -1198,6 +1285,231 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
}
|
||||
|
||||
switch (key.type) {
|
||||
case PipelineGeometryShader::kPointList: {
|
||||
// Expand the point sprite, with left-to-right, top-to-bottom UVs.
|
||||
|
||||
spv::Id const_int_0 = builder.makeIntConstant(0);
|
||||
spv::Id const_int_1 = builder.makeIntConstant(1);
|
||||
spv::Id const_float_0 = builder.makeFloatConstant(0.0f);
|
||||
|
||||
// Load the point diameter in guest pixels.
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.reserve(2);
|
||||
id_vector_temp.push_back(
|
||||
builder.makeIntConstant(int32_t(kPointConstantConstantDiameter)));
|
||||
id_vector_temp.push_back(const_int_0);
|
||||
spv::Id point_guest_diameter_x = builder.createLoad(
|
||||
builder.createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants, id_vector_temp),
|
||||
spv::NoPrecision);
|
||||
id_vector_temp.back() = const_int_1;
|
||||
spv::Id point_guest_diameter_y = builder.createLoad(
|
||||
builder.createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants, id_vector_temp),
|
||||
spv::NoPrecision);
|
||||
if (key.has_point_size) {
|
||||
// The vertex shader's header writes -1.0 to point_size by default, so
|
||||
// any non-negative value means that it was overwritten by the
|
||||
// translated vertex shader, and needs to be used instead of the
|
||||
// constant size. The per-vertex diameter is already clamped in the
|
||||
// vertex shader (combined with making it non-negative).
|
||||
id_vector_temp.clear();
|
||||
// 0 is the input primitive vertex index.
|
||||
id_vector_temp.push_back(const_int_0);
|
||||
spv::Id point_vertex_diameter = builder.createLoad(
|
||||
builder.createAccessChain(spv::StorageClassInput, in_point_size,
|
||||
id_vector_temp),
|
||||
spv::NoPrecision);
|
||||
spv::Id point_vertex_diameter_written =
|
||||
builder.createBinOp(spv::OpFOrdGreaterThanEqual, type_bool,
|
||||
point_vertex_diameter, const_float_0);
|
||||
point_guest_diameter_x = builder.createTriOp(
|
||||
spv::OpSelect, type_float, point_vertex_diameter_written,
|
||||
point_vertex_diameter, point_guest_diameter_x);
|
||||
point_guest_diameter_y = builder.createTriOp(
|
||||
spv::OpSelect, type_float, point_vertex_diameter_written,
|
||||
point_vertex_diameter, point_guest_diameter_y);
|
||||
}
|
||||
|
||||
// 4D5307F1 has zero-size snowflakes, drop them quicker, and also drop
|
||||
// points with a constant size of zero since point lists may also be used
|
||||
// as just "compute" with memexport.
|
||||
spv::Id point_size_not_zero = builder.createBinOp(
|
||||
spv::OpLogicalAnd, type_bool,
|
||||
builder.createBinOp(spv::OpFOrdGreaterThan, type_bool,
|
||||
point_guest_diameter_x, const_float_0),
|
||||
builder.createBinOp(spv::OpFOrdGreaterThan, type_bool,
|
||||
point_guest_diameter_y, const_float_0));
|
||||
spv::Block& point_size_zero_predecessor = *builder.getBuildPoint();
|
||||
spv::Block& point_size_zero_then_block = builder.makeNewBlock();
|
||||
spv::Block& point_size_zero_merge_block = builder.makeNewBlock();
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> selection_merge_op(
|
||||
std::make_unique<spv::Instruction>(spv::OpSelectionMerge));
|
||||
selection_merge_op->addIdOperand(point_size_zero_merge_block.getId());
|
||||
selection_merge_op->addImmediateOperand(
|
||||
spv::SelectionControlDontFlattenMask);
|
||||
point_size_zero_predecessor.addInstruction(
|
||||
std::move(selection_merge_op));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> branch_conditional_op(
|
||||
std::make_unique<spv::Instruction>(spv::OpBranchConditional));
|
||||
branch_conditional_op->addIdOperand(point_size_not_zero);
|
||||
branch_conditional_op->addIdOperand(
|
||||
point_size_zero_merge_block.getId());
|
||||
branch_conditional_op->addIdOperand(point_size_zero_then_block.getId());
|
||||
branch_conditional_op->addImmediateOperand(2);
|
||||
branch_conditional_op->addImmediateOperand(1);
|
||||
point_size_zero_predecessor.addInstruction(
|
||||
std::move(branch_conditional_op));
|
||||
}
|
||||
point_size_zero_then_block.addPredecessor(&point_size_zero_predecessor);
|
||||
point_size_zero_merge_block.addPredecessor(&point_size_zero_predecessor);
|
||||
builder.setBuildPoint(&point_size_zero_then_block);
|
||||
builder.createNoResultOp(spv::OpReturn);
|
||||
builder.setBuildPoint(&point_size_zero_merge_block);
|
||||
|
||||
// Transform the diameter in the guest screen coordinates to radius in the
|
||||
// normalized device coordinates, and then to the clip space by
|
||||
// multiplying by W.
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.reserve(2);
|
||||
id_vector_temp.push_back(builder.makeIntConstant(
|
||||
int32_t(kPointConstantScreenDiameterToNdcRadius)));
|
||||
id_vector_temp.push_back(const_int_0);
|
||||
spv::Id point_radius_x = builder.createBinOp(
|
||||
spv::OpFMul, type_float, point_guest_diameter_x,
|
||||
builder.createLoad(builder.createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants,
|
||||
id_vector_temp),
|
||||
spv::NoPrecision));
|
||||
builder.addDecoration(point_radius_x, spv::DecorationNoContraction);
|
||||
id_vector_temp.back() = const_int_1;
|
||||
spv::Id point_radius_y = builder.createBinOp(
|
||||
spv::OpFMul, type_float, point_guest_diameter_y,
|
||||
builder.createLoad(builder.createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants,
|
||||
id_vector_temp),
|
||||
spv::NoPrecision));
|
||||
builder.addDecoration(point_radius_y, spv::DecorationNoContraction);
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.reserve(2);
|
||||
// 0 is the input primitive vertex index.
|
||||
id_vector_temp.push_back(const_int_0);
|
||||
id_vector_temp.push_back(const_member_in_gl_per_vertex_position);
|
||||
spv::Id point_position = builder.createLoad(
|
||||
builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex,
|
||||
id_vector_temp),
|
||||
spv::NoPrecision);
|
||||
spv::Id point_w =
|
||||
builder.createCompositeExtract(point_position, type_float, 3);
|
||||
point_radius_x =
|
||||
builder.createBinOp(spv::OpFMul, type_float, point_radius_x, point_w);
|
||||
builder.addDecoration(point_radius_x, spv::DecorationNoContraction);
|
||||
point_radius_y =
|
||||
builder.createBinOp(spv::OpFMul, type_float, point_radius_y, point_w);
|
||||
builder.addDecoration(point_radius_y, spv::DecorationNoContraction);
|
||||
|
||||
// Load the inputs for the guest point.
|
||||
// Interpolators.
|
||||
std::array<spv::Id, xenos::kMaxInterpolators> point_interpolators;
|
||||
id_vector_temp.clear();
|
||||
// 0 is the input primitive vertex index.
|
||||
id_vector_temp.push_back(const_int_0);
|
||||
for (uint32_t i = 0; i < key.interpolator_count; ++i) {
|
||||
point_interpolators[i] = builder.createLoad(
|
||||
builder.createAccessChain(spv::StorageClassInput,
|
||||
in_interpolators[i], id_vector_temp),
|
||||
spv::NoPrecision);
|
||||
}
|
||||
// Positions.
|
||||
spv::Id point_x =
|
||||
builder.createCompositeExtract(point_position, type_float, 0);
|
||||
spv::Id point_y =
|
||||
builder.createCompositeExtract(point_position, type_float, 1);
|
||||
std::array<spv::Id, 2> point_edge_x, point_edge_y;
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
spv::Op point_radius_add_op = i ? spv::OpFAdd : spv::OpFSub;
|
||||
point_edge_x[i] = builder.createBinOp(point_radius_add_op, type_float,
|
||||
point_x, point_radius_x);
|
||||
builder.addDecoration(point_edge_x[i], spv::DecorationNoContraction);
|
||||
point_edge_y[i] = builder.createBinOp(point_radius_add_op, type_float,
|
||||
point_y, point_radius_y);
|
||||
builder.addDecoration(point_edge_y[i], spv::DecorationNoContraction);
|
||||
};
|
||||
spv::Id point_z =
|
||||
builder.createCompositeExtract(point_position, type_float, 2);
|
||||
// Clip distances.
|
||||
spv::Id point_clip_distances = spv::NoResult;
|
||||
if (clip_distance_count) {
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.reserve(2);
|
||||
// 0 is the input primitive vertex index.
|
||||
id_vector_temp.push_back(const_int_0);
|
||||
id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance);
|
||||
point_clip_distances = builder.createLoad(
|
||||
builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex,
|
||||
id_vector_temp),
|
||||
spv::NoPrecision);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
// Same interpolators for the entire sprite.
|
||||
for (uint32_t j = 0; j < key.interpolator_count; ++j) {
|
||||
builder.createStore(point_interpolators[j], out_interpolators[j]);
|
||||
}
|
||||
// Top-left, bottom-left, top-right, bottom-right order (chosen
|
||||
// arbitrarily, simply based on counterclockwise meaning front with
|
||||
// frontFace = VkFrontFace(0), but faceness is ignored for non-polygon
|
||||
// primitive types).
|
||||
uint32_t point_vertex_x = i >> 1;
|
||||
uint32_t point_vertex_y = i & 1;
|
||||
// Point coordinates.
|
||||
if (key.has_point_coordinates) {
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.reserve(2);
|
||||
id_vector_temp.push_back(
|
||||
builder.makeFloatConstant(float(point_vertex_x)));
|
||||
id_vector_temp.push_back(
|
||||
builder.makeFloatConstant(float(point_vertex_y)));
|
||||
builder.createStore(
|
||||
builder.makeCompositeConstant(type_float2, id_vector_temp),
|
||||
out_point_coordinates);
|
||||
}
|
||||
// Position.
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.reserve(4);
|
||||
id_vector_temp.push_back(point_edge_x[point_vertex_x]);
|
||||
id_vector_temp.push_back(point_edge_y[point_vertex_y]);
|
||||
id_vector_temp.push_back(point_z);
|
||||
id_vector_temp.push_back(point_w);
|
||||
spv::Id point_vertex_position =
|
||||
builder.createCompositeConstruct(type_float4, id_vector_temp);
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.push_back(const_member_out_gl_per_vertex_position);
|
||||
builder.createStore(
|
||||
point_vertex_position,
|
||||
builder.createAccessChain(spv::StorageClassOutput,
|
||||
out_gl_per_vertex, id_vector_temp));
|
||||
// Clip distances.
|
||||
// TODO(Triang3l): Handle ps_ucp_mode properly, clip expanded points if
|
||||
// needed.
|
||||
if (clip_distance_count) {
|
||||
id_vector_temp.clear();
|
||||
id_vector_temp.push_back(
|
||||
const_member_out_gl_per_vertex_clip_distance);
|
||||
builder.createStore(
|
||||
point_clip_distances,
|
||||
builder.createAccessChain(spv::StorageClassOutput,
|
||||
out_gl_per_vertex, id_vector_temp));
|
||||
}
|
||||
// Emit the vertex.
|
||||
builder.createNoResultOp(spv::OpEmitVertex);
|
||||
}
|
||||
builder.createNoResultOp(spv::OpEndPrimitive);
|
||||
} break;
|
||||
|
||||
case PipelineGeometryShader::kRectangleList: {
|
||||
// Construct a strip with the fourth vertex generated by mirroring a
|
||||
// vertex across the longest edge (the diagonal).
|
||||
|
@ -1308,8 +1620,8 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
id_vector_temp.reserve(2);
|
||||
id_vector_temp.push_back(const_float_0);
|
||||
id_vector_temp.push_back(const_float_0);
|
||||
const_point_coordinates_zero = builder.makeCompositeConstant(
|
||||
type_point_coordinates, id_vector_temp);
|
||||
const_point_coordinates_zero =
|
||||
builder.makeCompositeConstant(type_float2, id_vector_temp);
|
||||
}
|
||||
|
||||
// Emit the triangle in the strip that consists of the original vertices.
|
||||
|
@ -1491,8 +1803,8 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) {
|
|||
id_vector_temp.reserve(2);
|
||||
id_vector_temp.push_back(const_float_0);
|
||||
id_vector_temp.push_back(const_float_0);
|
||||
const_point_coordinates_zero = builder.makeCompositeConstant(
|
||||
type_point_coordinates, id_vector_temp);
|
||||
const_point_coordinates_zero =
|
||||
builder.makeCompositeConstant(type_float2, id_vector_temp);
|
||||
}
|
||||
|
||||
// Build the triangle strip from the original quad vertices in the
|
||||
|
|
|
@ -71,7 +71,7 @@ class VulkanPipelineCache {
|
|||
SpirvShaderTranslator::Modification GetCurrentVertexShaderModification(
|
||||
const Shader& shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type,
|
||||
uint32_t interpolator_mask) const;
|
||||
uint32_t interpolator_mask, bool ps_param_gen_used) const;
|
||||
SpirvShaderTranslator::Modification GetCurrentPixelShaderModification(
|
||||
const Shader& shader, uint32_t interpolator_mask,
|
||||
uint32_t param_gen_pos) const;
|
||||
|
@ -92,6 +92,7 @@ class VulkanPipelineCache {
|
|||
private:
|
||||
enum class PipelineGeometryShader : uint32_t {
|
||||
kNone,
|
||||
kPointList,
|
||||
kRectangleList,
|
||||
kQuadList,
|
||||
};
|
||||
|
@ -267,6 +268,7 @@ class VulkanPipelineCache {
|
|||
static bool GetGeometryShaderKey(
|
||||
PipelineGeometryShader geometry_shader_type,
|
||||
SpirvShaderTranslator::Modification vertex_shader_modification,
|
||||
SpirvShaderTranslator::Modification pixel_shader_modification,
|
||||
GeometryShaderKey& key_out);
|
||||
VkShaderModule GetGeometryShader(GeometryShaderKey key);
|
||||
|
||||
|
|
|
@ -27,17 +27,18 @@ namespace vulkan {
|
|||
VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); }
|
||||
|
||||
bool VulkanPrimitiveProcessor::Initialize() {
|
||||
// TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch.
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
||||
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
|
||||
device_portability_subset_features =
|
||||
provider.device_portability_subset_features();
|
||||
if (!InitializeCommon(true,
|
||||
if (!InitializeCommon(device_features.fullDrawIndexUint32,
|
||||
!device_portability_subset_features ||
|
||||
device_portability_subset_features->triangleFans,
|
||||
false, device_features.geometryShader)) {
|
||||
false, device_features.geometryShader,
|
||||
device_features.geometryShader,
|
||||
device_features.geometryShader)) {
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
|
@ -128,9 +129,9 @@ void VulkanPrimitiveProcessor::EndFrame() {
|
|||
frame_index_buffers_.clear();
|
||||
}
|
||||
|
||||
bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count, std::function<void(uint16_t*)> fill_callback) {
|
||||
assert_not_zero(index_count);
|
||||
bool VulkanPrimitiveProcessor::InitializeBuiltinIndexBuffer(
|
||||
size_t size_bytes, std::function<void(void*)> fill_callback) {
|
||||
assert_not_zero(size_bytes);
|
||||
assert_true(builtin_index_buffer_ == VK_NULL_HANDLE);
|
||||
assert_true(builtin_index_buffer_memory_ == VK_NULL_HANDLE);
|
||||
assert_true(builtin_index_buffer_upload_ == VK_NULL_HANDLE);
|
||||
|
@ -141,7 +142,7 @@ bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
builtin_index_buffer_size_ = VkDeviceSize(sizeof(uint16_t) * index_count);
|
||||
builtin_index_buffer_size_ = VkDeviceSize(size_bytes);
|
||||
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
|
||||
provider, builtin_index_buffer_size_,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
|
@ -149,8 +150,8 @@ bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
builtin_index_buffer_memory_)) {
|
||||
XELOGE(
|
||||
"Vulkan primitive processor: Failed to create the built-in index "
|
||||
"buffer GPU resource with {} 16-bit indices",
|
||||
index_count);
|
||||
"buffer GPU resource with {} bytes",
|
||||
size_bytes);
|
||||
return false;
|
||||
}
|
||||
uint32_t upload_memory_type;
|
||||
|
@ -162,8 +163,8 @@ bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
&upload_memory_type)) {
|
||||
XELOGE(
|
||||
"Vulkan primitive processor: Failed to create the built-in index "
|
||||
"buffer upload resource with {} 16-bit indices",
|
||||
index_count);
|
||||
"buffer upload resource with {} bytes",
|
||||
size_bytes);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
|
@ -176,8 +177,8 @@ bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
VK_WHOLE_SIZE, 0, &mapping) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"Vulkan primitive processor: Failed to map the built-in index buffer "
|
||||
"upload resource with {} 16-bit indices",
|
||||
index_count);
|
||||
"upload resource with {} bytes",
|
||||
size_bytes);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_upload_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
|
@ -188,7 +189,7 @@ bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
|||
builtin_index_buffer_memory_);
|
||||
return false;
|
||||
}
|
||||
fill_callback(reinterpret_cast<uint16_t*>(mapping));
|
||||
fill_callback(mapping);
|
||||
ui::vulkan::util::FlushMappedMemoryRange(
|
||||
provider, builtin_index_buffer_memory_, upload_memory_type);
|
||||
dfn.vkUnmapMemory(device, builtin_index_buffer_upload_memory_);
|
||||
|
|
|
@ -56,9 +56,8 @@ class VulkanPrimitiveProcessor final : public PrimitiveProcessor {
|
|||
}
|
||||
|
||||
protected:
|
||||
bool InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count,
|
||||
std::function<void(uint16_t*)> fill_callback) override;
|
||||
bool InitializeBuiltinIndexBuffer(
|
||||
size_t size_bytes, std::function<void(void*)> fill_callback) override;
|
||||
|
||||
void* RequestHostConvertedIndexBufferForCurrentFrame(
|
||||
xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd,
|
||||
|
|
Loading…
Reference in New Issue