[Vulkan] Handle primitive reset index on copy

This commit is contained in:
DrChat 2018-02-16 18:31:43 -06:00
parent 776b5eff36
commit 1dba231049
2 changed files with 98 additions and 15 deletions

View File

@ -22,6 +22,74 @@ namespace xe {
namespace gpu { namespace gpu {
namespace vulkan { namespace vulkan {
#if XE_ARCH_AMD64
void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
uint16_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
__m128i shufmask =
_mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07,
0x04, 0x05, 0x02, 0x03, 0x00, 0x01);
__m128i cmpval = _mm_set1_epi16(cmp_value);
size_t i;
for (i = 0; i + 8 <= count; i += 8) {
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
__m128i mask = _mm_cmpeq_epi16(output, cmpval);
output = _mm_or_si128(output, mask);
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
uint32_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
__m128i shufmask =
_mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05,
0x06, 0x07, 0x00, 0x01, 0x02, 0x03);
__m128i cmpval = _mm_set1_epi32(cmp_value);
size_t i;
for (i = 0; i + 4 <= count; i += 4) {
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
__m128i mask = _mm_cmpeq_epi32(output, cmpval);
output = _mm_or_si128(output, mask);
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
#else
void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
uint16_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
for (size_t i = 0; i < count; ++i) {
uint16_t value = byte_swap(src[i]);
dest[i] = value == cmp_value ? 0xFFFF : value;
}
}
void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
uint32_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
for (size_t i = 0; i < count; ++i) {
uint32_t value = byte_swap(src[i]);
dest[i] = value == cmp_value ? 0xFFFFFFFF : value;
}
}
#endif
using xe::ui::vulkan::CheckResult; using xe::ui::vulkan::CheckResult;
constexpr VkDeviceSize kConstantRegisterUniformRange = constexpr VkDeviceSize kConstantRegisterUniformRange =
@ -293,17 +361,36 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
const void* source_ptr = memory_->TranslatePhysical(source_addr); const void* source_ptr = memory_->TranslatePhysical(source_addr);
// Copy data into the buffer. uint32_t prim_reset_index =
// TODO(benvanik): get min/max indices and pass back? register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
bool prim_reset_enabled =
!!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21));
// Copy data into the buffer. If primitive reset is enabled, translate any
// primitive reset indices to something Vulkan understands.
// TODO(benvanik): memcpy then use compute shaders to swap? // TODO(benvanik): memcpy then use compute shaders to swap?
if (format == IndexFormat::kInt16) { if (prim_reset_enabled) {
// Endian::k8in16, swap half-words. if (format == IndexFormat::kInt16) {
xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset, // Endian::k8in16, swap half-words.
source_ptr, source_length / 2); copy_cmp_swap_16_unaligned(
} else if (format == IndexFormat::kInt32) { transient_buffer_->host_base() + offset, source_ptr,
// Endian::k8in32, swap words. static_cast<uint16_t>(prim_reset_index), source_length / 2);
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, } else if (format == IndexFormat::kInt32) {
source_ptr, source_length / 4); // Endian::k8in32, swap words.
copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset,
source_ptr, prim_reset_index,
source_length / 4);
}
} else {
if (format == IndexFormat::kInt16) {
// Endian::k8in16, swap half-words.
xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 2);
} else if (format == IndexFormat::kInt32) {
// Endian::k8in32, swap words.
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4);
}
} }
transient_buffer_->Flush(offset, source_length); transient_buffer_->Flush(offset, source_length);

View File

@ -1199,16 +1199,12 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState(
// glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); // glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
// } // }
// Primitive restart index is handled in the buffer cache.
if (regs.pa_su_sc_mode_cntl & (1 << 21)) { if (regs.pa_su_sc_mode_cntl & (1 << 21)) {
state_info.primitiveRestartEnable = VK_TRUE; state_info.primitiveRestartEnable = VK_TRUE;
} else { } else {
state_info.primitiveRestartEnable = VK_FALSE; state_info.primitiveRestartEnable = VK_FALSE;
} }
// TODO(benvanik): no way to specify in Vulkan?
assert_true(regs.multi_prim_ib_reset_index == 0xFFFF ||
regs.multi_prim_ib_reset_index == 0xFFFFFF ||
regs.multi_prim_ib_reset_index == 0xFFFFFFFF);
// glPrimitiveRestartIndex(regs.multi_prim_ib_reset_index);
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
} }