rsx: Fix endianness order when immediate mode register is updated, but used as register lookup

- Simplify the code by unifying all the register-backed memory
This commit is contained in:
kd-11 2018-08-28 18:19:26 +03:00 committed by kd-11
parent 9acaee563b
commit 6399833182
7 changed files with 54 additions and 48 deletions

View File

@ -435,7 +435,7 @@ namespace
} }
} }
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride) void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness)
{ {
verify(HERE), (vector_element_count > 0); verify(HERE), (vector_element_count > 0);
const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count); const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count);
@ -460,6 +460,8 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
#if !DEBUG_VERTEX_STREAMING #if !DEBUG_VERTEX_STREAMING
if (swap_endianness)
{
if (real_count >= count || real_count == 1) if (real_count >= count || real_count == 1)
{ {
if (attribute_src_stride == dst_stride && src_read_stride == dst_stride) if (attribute_src_stride == dst_stride && src_read_stride == dst_stride)
@ -467,6 +469,7 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
else else
use_stream_with_stride = true; use_stream_with_stride = true;
} }
}
#endif #endif
@ -492,8 +495,10 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
stream_data_to_memory_swapped_u16(raw_dst_span.data(), src_ptr.data(), count, attribute_src_stride); stream_data_to_memory_swapped_u16(raw_dst_span.data(), src_ptr.data(), count, attribute_src_stride);
else if (use_stream_with_stride) else if (use_stream_with_stride)
stream_data_to_memory_swapped_u16_non_continuous(raw_dst_span.data(), src_ptr.data(), count, dst_stride, attribute_src_stride); stream_data_to_memory_swapped_u16_non_continuous(raw_dst_span.data(), src_ptr.data(), count, dst_stride, attribute_src_stride);
else else if (swap_endianness)
copy_whole_attribute_array<be_t<u16>, u16>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count); copy_whole_attribute_array<be_t<u16>, u16>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count);
else
copy_whole_attribute_array<u16, u16>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count);
return; return;
} }
@ -503,8 +508,10 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
stream_data_to_memory_swapped_u32(raw_dst_span.data(), src_ptr.data(), count, attribute_src_stride); stream_data_to_memory_swapped_u32(raw_dst_span.data(), src_ptr.data(), count, attribute_src_stride);
else if (use_stream_with_stride) else if (use_stream_with_stride)
stream_data_to_memory_swapped_u32_non_continuous(raw_dst_span.data(), src_ptr.data(), count, dst_stride, attribute_src_stride); stream_data_to_memory_swapped_u32_non_continuous(raw_dst_span.data(), src_ptr.data(), count, dst_stride, attribute_src_stride);
else else if (swap_endianness)
copy_whole_attribute_array<be_t<u32>, u32>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count); copy_whole_attribute_array<be_t<u32>, u32>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count);
else
copy_whole_attribute_array<u32, u32>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count);
return; return;
} }
@ -513,10 +520,11 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
gsl::span<u16> dst_span = as_span_workaround<u16>(raw_dst_span); gsl::span<u16> dst_span = as_span_workaround<u16>(raw_dst_span);
for (u32 i = 0; i < count; ++i) for (u32 i = 0; i < count; ++i)
{ {
be_t<u32> src_value; u32 src_value;
memcpy(&src_value, memcpy(&src_value, src_ptr.subspan(attribute_src_stride * i).data(), sizeof(u32));
src_ptr.subspan(attribute_src_stride * i).data(),
sizeof(be_t<u32>)); if (swap_endianness) src_value = se_storage<u32>::swap(src_value);
const auto& decoded_vector = decode_cmp_vector(src_value); const auto& decoded_vector = decode_cmp_vector(src_value);
dst_span[i * dst_stride / sizeof(u16)] = decoded_vector[0]; dst_span[i * dst_stride / sizeof(u16)] = decoded_vector[0];
dst_span[i * dst_stride / sizeof(u16) + 1] = decoded_vector[1]; dst_span[i * dst_stride / sizeof(u16) + 1] = decoded_vector[1];

View File

@ -10,7 +10,7 @@
* Write count vertex attributes from src_ptr. * Write count vertex attributes from src_ptr.
* src_ptr array layout is deduced from the type, vector element count and src_stride arguments. * src_ptr array layout is deduced from the type, vector element count and src_stride arguments.
*/ */
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride); void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness);
/* /*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false. * If primitive mode is not supported and need to be emulated (using an index buffer) returns false.

View File

@ -167,7 +167,7 @@ namespace
gsl::span<gsl::byte> mapped_buffer_span = { gsl::span<gsl::byte> mapped_buffer_span = {
(gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)}; (gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)};
write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count, write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count,
vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size); vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));

View File

@ -1292,7 +1292,7 @@ namespace rsx
{ {
const rsx::data_array_format_info& info = state.vertex_arrays_info[index]; const rsx::data_array_format_info& info = state.vertex_arrays_info[index];
result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(), result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(),
get_raw_vertex_buffer(info, state.vertex_data_base_offset(), vertex_ranges), index}); get_raw_vertex_buffer(info, state.vertex_data_base_offset(), vertex_ranges), index, true});
continue; continue;
} }
@ -1302,7 +1302,7 @@ namespace rsx
const u8 element_size = info.size * sizeof(u32); const u8 element_size = info.size * sizeof(u32);
gsl::span<const gsl::byte> vertex_src = { (const gsl::byte*)vertex_push_buffers[index].data.data(), vertex_push_buffers[index].vertex_count * element_size }; gsl::span<const gsl::byte> vertex_src = { (const gsl::byte*)vertex_push_buffers[index].data.data(), vertex_push_buffers[index].vertex_count * element_size };
result.push_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index }); result.push_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index, false });
continue; continue;
} }
@ -2416,7 +2416,7 @@ namespace rsx
s32 size = 0; s32 size = 0;
s32 attributes = 0; s32 attributes = 0;
bool is_be_type = true; bool swap_u8_types = false;
if (layout.attribute_placement[index] == attribute_buffer_placement::transient) if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
{ {
@ -2429,12 +2429,14 @@ namespace rsx
attributes = layout.interleaved_blocks[0].attribute_stride; attributes = layout.interleaved_blocks[0].attribute_stride;
attributes |= default_frequency_mask | volatile_storage_mask; attributes |= default_frequency_mask | volatile_storage_mask;
is_be_type = false; // [NPEA90002] Grass is rendered via inline array
// Expects swapped bytes for u8 types
swap_u8_types = true;
} }
else else
{ {
//Data is either from an immediate render or register input // Data is either from an immediate render or register input
//Immediate data overrides register input // Immediate data overrides register input
if (rsx::method_registers.current_draw_clause.is_immediate_draw && if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
vertex_push_buffers[index].vertex_count > 1) vertex_push_buffers[index].vertex_count > 1)
@ -2446,11 +2448,13 @@ namespace rsx
attributes = rsx::get_vertex_type_size_on_host(type, size); attributes = rsx::get_vertex_type_size_on_host(type, size);
attributes |= default_frequency_mask | volatile_storage_mask; attributes |= default_frequency_mask | volatile_storage_mask;
is_be_type = true; // RDR intro contains text passed via immediate render mode
// Expects swapped bytes for u8 types
swap_u8_types = true;
} }
else else
{ {
//Register // Register
const auto& info = rsx::method_registers.register_vertex_info[index]; const auto& info = rsx::method_registers.register_vertex_info[index];
type = info.type; type = info.type;
size = info.size; size = info.size;
@ -2458,7 +2462,8 @@ namespace rsx
attributes = rsx::get_vertex_type_size_on_host(type, size); attributes = rsx::get_vertex_type_size_on_host(type, size);
attributes |= volatile_storage_mask; attributes |= volatile_storage_mask;
is_be_type = false; // Resistance intro expects u8 types in native order
// swap_u8_types = false;
} }
} }
} }
@ -2478,8 +2483,10 @@ namespace rsx
{ {
case 0: case 0:
case 1: case 1:
{
attributes |= default_frequency_mask; attributes |= default_frequency_mask;
break; break;
}
default: default:
{ {
if (modulo_mask & (1 << index)) if (modulo_mask & (1 << index))
@ -2487,25 +2494,31 @@ namespace rsx
attributes |= repeating_frequency_mask; attributes |= repeating_frequency_mask;
attributes |= (frequency << 13) & input_divisor_mask; attributes |= (frequency << 13) & input_divisor_mask;
break;
} }
} }
} }
} //end attribute placement check } //end attribute placement check
// If data is passed via registers, it is already received in little endian
const bool is_be_type = (layout.attribute_placement[index] != attribute_buffer_placement::transient);
bool to_swap_bytes = is_be_type;
switch (type) switch (type)
{ {
case rsx::vertex_base_type::cmp: case rsx::vertex_base_type::cmp:
// Compressed 4 components into one 4-byte value
size = 1; size = 1;
//fall through
default:
if (is_be_type) attributes |= swap_storage_mask;
break; break;
case rsx::vertex_base_type::ub: case rsx::vertex_base_type::ub:
case rsx::vertex_base_type::ub256: case rsx::vertex_base_type::ub256:
if (!is_be_type) attributes |= swap_storage_mask; // These are single byte formats, but inverted order (BGRA vs ARGB) when passed via registers
to_swap_bytes = swap_u8_types;
break; break;
} }
if (to_swap_bytes) attributes |= swap_storage_mask;
buffer[index * 4 + 0] = static_cast<s32>(type); buffer[index * 4 + 0] = static_cast<s32>(type);
buffer[index * 4 + 1] = size; buffer[index * 4 + 1] = size;
buffer[index * 4 + 2] = offset_in_block[index]; buffer[index * 4 + 2] = offset_in_block[index];
@ -2529,7 +2542,7 @@ namespace rsx
return; return;
} }
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data //NOTE: Order is important! Transient layout is always push_buffers followed by register data
if (draw_call.is_immediate_draw) if (draw_call.is_immediate_draw)
{ {
//NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory //NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory

View File

@ -138,6 +138,7 @@ namespace rsx
u8 stride; u8 stride;
gsl::span<const gsl::byte> data; gsl::span<const gsl::byte> data;
u8 index; u8 index;
bool is_be;
}; };
struct vertex_array_register struct vertex_array_register

View File

@ -226,7 +226,10 @@ namespace rsx
const auto vtype = vertex_data_type_from_element_type<type>::type; const auto vtype = vertex_data_type_from_element_type<type>::type;
if (rsx->in_begin_end) if (rsx->in_begin_end)
{
// Update to immediate mode register/array, aliasing with the register view
rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg); rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg);
}
auto& info = rsx::method_registers.register_vertex_info[attribute_index]; auto& info = rsx::method_registers.register_vertex_info[attribute_index];

View File

@ -106,26 +106,7 @@ struct push_buffer_vertex_info
attribute_mask |= element_mask; attribute_mask |= element_mask;
void* dst = data.data() + ((vertex_count - 1) * vertex_size) + sub_index; void* dst = data.data() + ((vertex_count - 1) * vertex_size) + sub_index;
//NOTE: Endianness on wide types is converted to BE here because unified upload code assumes input in BE
//TODO: Implement fast LE source inputs and remove the byteswap
switch (type)
{
case vertex_base_type::f:
*(u32*)dst = se_storage<u32>::swap(arg);
break;
case vertex_base_type::ub:
case vertex_base_type::ub256:
*(u32*)dst = arg; *(u32*)dst = arg;
break;
case vertex_base_type::s1:
case vertex_base_type::s32k:
((u16*)dst)[0] = se_storage<u16>::swap((u16)(arg & 0xffff));
((u16*)dst)[1] = se_storage<u16>::swap((u16)(arg >> 16));
break;
default:
fmt::throw_exception("Unsupported vertex base type %d", (u8)type);
}
} }
}; };