Merge pull request #1383 from vlj/rsx

rsx/common/d3d12/gl: Use separate vertex array/vertex register states.
This commit is contained in:
Ivan 2015-12-31 11:33:55 +03:00
commit 6811195345
5 changed files with 140 additions and 193 deletions

View File

@ -6,7 +6,7 @@
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc)
{
assert(vertex_array_desc.array);
assert(vertex_array_desc.size > 0);
if (vertex_array_desc.frequency > 1)
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, vertex_array_desc.frequency, index);

View File

@ -45,93 +45,93 @@ void D3D12GSRender::upload_vertex_attributes(const std::vector<std::pair<u32, u3
for (const auto &pair : vertex_ranges)
vertex_count += pair.second;
// First array attribute
u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK];
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (!info.array) // disabled or not a vertex array
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
continue;
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t buffer_size = element_size * vertex_count;
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
for (const auto &range : vertex_ranges)
if (vertex_arrays_info[index].size > 0)
{
write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info);
mapped_buffer = (char*)mapped_buffer + range.second * element_size;
// Active vertex array
const rsx::data_array_format_info &info = vertex_arrays_info[index];
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t buffer_size = element_size * vertex_count;
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
for (const auto &range : vertex_ranges)
{
write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info);
mapped_buffer = (char*)mapped_buffer + range.second * element_size;
}
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
m_vertex_buffer_views.push_back(vertex_buffer_view);
m_timers.m_buffer_upload_size += buffer_size;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_IASet.push_back(IAElement);
}
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
else if (register_vertex_info[index].size > 0)
{
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
m_vertex_buffer_views.push_back(vertex_buffer_view);
// In register vertex attribute
const rsx::data_array_format_info &info = register_vertex_info[index];
m_timers.m_buffer_upload_size += buffer_size;
const std::vector<u8> &data = register_vertex_data[index];
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_IASet.push_back(IAElement);
}
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
// Now immediate vertex buffer
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
size_t buffer_size = data.size();
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
if (info.array)
continue;
if (!info.size) // disabled
continue;
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
memcpy(mapped_buffer, data.data(), data.size());
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
auto &data = vertex_arrays[index];
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
m_vertex_buffer_views.push_back(vertex_buffer_view);
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t buffer_size = data.size();
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
memcpy(mapped_buffer, data.data(), data.size());
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
m_vertex_buffer_views.push_back(vertex_buffer_view);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
m_IASet.push_back(IAElement);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
m_IASet.push_back(IAElement);
}
}
}

View File

@ -366,133 +366,58 @@ void GLGSRender::end()
std::vector<u8> vertex_arrays_data;
size_t vertex_arrays_offsets[rsx::limits::vertex_count];
#if DUMP_VERTEX_DATA
fs::file dump(fs::get_config_dir() + "VertexDataArray.dump", fom::rewrite);
Emu.Pause();
#endif
for (int index = 0; index < rsx::limits::vertex_count; ++index)
const std::string reg_table[] =
{
size_t position = vertex_arrays_data.size();
vertex_arrays_offsets[index] = position;
"in_pos", "in_weight", "in_normal",
"in_diff_color", "in_spec_color",
"in_fog",
"in_point_size", "in_7",
"in_tc0", "in_tc1", "in_tc2", "in_tc3",
"in_tc4", "in_tc5", "in_tc6", "in_tc7"
};
if (vertex_arrays[index].empty())
continue;
size_t size = vertex_arrays[index].size();
vertex_arrays_data.resize(position + size);
memcpy(vertex_arrays_data.data() + position, vertex_arrays[index].data(), size);
#if DUMP_VERTEX_DATA
auto &vertex_info = vertex_arrays_info[index];
dump.write(fmt::format("VertexData[%d]:\n", index));
switch (vertex_info.type)
{
case CELL_GCM_VERTEX_S1:
for (u32 j = 0; j < vertex_arrays[index].size(); j += 2)
{
dump.write(fmt::format("%d\n", *(u16*)&vertex_arrays[index][j]));
if (!(((j + 2) / 2) % vertex_info.size)) dump.write("\n");
}
break;
case CELL_GCM_VERTEX_F:
for (u32 j = 0; j < vertex_arrays[index].size(); j += 4)
{
dump.write(fmt::format("%.01f\n", *(float*)&vertex_arrays[index][j]));
if (!(((j + 4) / 4) % vertex_info.size)) dump.write("\n");
}
break;
case CELL_GCM_VERTEX_SF:
for (u32 j = 0; j < vertex_arrays[index].size(); j += 2)
{
dump.write(fmt::format("%.01f\n", *(float*)&vertex_arrays[index][j]));
if (!(((j + 2) / 2) % vertex_info.size)) dump.write("\n");
}
break;
case CELL_GCM_VERTEX_UB:
for (u32 j = 0; j < vertex_arrays[index].size(); ++j)
{
dump.write(fmt::format("%d\n", vertex_arrays[index][j]));
if (!((j + 1) % vertex_info.size)) dump.write("\n");
}
break;
case CELL_GCM_VERTEX_S32K:
for (u32 j = 0; j < vertex_arrays[index].size(); j += 2)
{
dump.write(fmt::format("%d\n", *(u16*)&vertex_arrays[index][j]));
if (!(((j + 2) / 2) % vertex_info.size)) dump.write("\n");
}
break;
// case CELL_GCM_VERTEX_CMP:
case CELL_GCM_VERTEX_UB256:
for (u32 j = 0; j < vertex_arrays[index].size(); ++j)
{
dump.write(fmt::format("%d\n", vertex_arrays[index][j]));
if (!((j + 1) % vertex_info.size)) dump.write("\n");
}
break;
}
dump.write("\n");
#endif
}
m_vbo.data(vertex_arrays_data.size(), vertex_arrays_data.data());
u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK];
m_vao.bind();
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
auto &vertex_info = vertex_arrays_info[index];
if (!vertex_info.size)
{
//disabled
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
continue;
}
if (vertex_info.type < 1 || vertex_info.type > 7)
{
LOG_ERROR(RSX, "GLGSRender::EnableVertexData: Bad vertex data type (%d)!", vertex_info.type);
continue;
}
static const std::string reg_table[] =
{
"in_pos", "in_weight", "in_normal",
"in_diff_color", "in_spec_color",
"in_fog",
"in_point_size", "in_7",
"in_tc0", "in_tc1", "in_tc2", "in_tc3",
"in_tc4", "in_tc5", "in_tc6", "in_tc7"
};
int location;
//TODO: use attrib input mask register
if (!m_program->attribs.has_location(reg_table[index], &location))
continue;
if (vertex_info.array)
if (vertex_arrays_info[index].size > 0)
{
auto &vertex_info = vertex_arrays_info[index];
// Active vertex array
size_t position = vertex_arrays_data.size();
vertex_arrays_offsets[index] = position;
if (vertex_arrays[index].empty())
continue;
size_t size = vertex_arrays[index].size();
vertex_arrays_data.resize(position + size);
memcpy(vertex_arrays_data.data() + position, vertex_arrays[index].data(), size);
__glcheck m_program->attribs[location] =
(m_vao + vertex_arrays_offsets[index])
.config(gl_types[vertex_info.type], vertex_info.size, gl_normalized[vertex_info.type]);
}
else
else if (register_vertex_info[index].size > 0)
{
auto &vertex_data = vertex_arrays[index];
auto &vertex_data = register_vertex_data[index];
auto &vertex_info = register_vertex_info[index];
switch (vertex_info.type)
{
case CELL_GCM_VERTEX_F:
switch (vertex_info.size)
switch (register_vertex_info[index].size)
{
case 1: apply_attrib_array<f32, 1>(*m_program, location, vertex_data); break;
case 2: apply_attrib_array<f32, 2>(*m_program, location, vertex_data); break;
@ -507,6 +432,8 @@ void GLGSRender::end()
}
}
}
m_vbo.data(vertex_arrays_data.size(), vertex_arrays_data.data());
if (vertex_index_array.empty())
{

View File

@ -82,20 +82,19 @@ namespace rsx
static const size_t element_size = (count * sizeof(type));
static const size_t element_size_in_words = element_size / sizeof(u32);
auto& info = rsx->vertex_arrays_info[index];
auto& info = rsx->register_vertex_info[index];
info.type = vertex_data_type_from_element_type<type>::type;
info.size = count;
info.frequency = 0;
info.stride = 0;
info.array = false;
auto& entry = rsx->vertex_arrays[index];
auto& entry = rsx->register_vertex_data[index];
//find begin of data
size_t begin = id + index * element_size_in_words;
size_t position = entry.size();
size_t position = 0;//entry.size();
entry.resize(position + element_size);
memcpy(entry.data() + position, method_registers + begin, element_size);
@ -170,8 +169,7 @@ namespace rsx
force_inline static void impl(thread* rsx, u32 arg)
{
auto& info = rsx->vertex_arrays_info[index];
info.unpack(arg);
info.array = info.size > 0;
info.unpack_array(arg);
}
};
@ -237,7 +235,7 @@ namespace rsx
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
if (rsx->vertex_arrays_info[i].array)
if (rsx->vertex_arrays_info[i].size > 0)
{
has_array = true;
break;
@ -250,11 +248,11 @@ namespace rsx
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
if (!rsx->vertex_arrays_info[i].size)
if (!rsx->register_vertex_info[i].size)
continue;
u32 count = u32(rsx->vertex_arrays[i].size()) /
rsx::get_vertex_type_size(rsx->vertex_arrays_info[i].type) * rsx->vertex_arrays_info[i].size;
u32 count = u32(rsx->register_vertex_data[i].size()) /
rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size;
if (count < min_count)
min_count = count;
@ -933,7 +931,7 @@ namespace rsx
{
const auto &info = vertex_arrays_info[index];
if (!info.array) // disabled or not a vertex array
if (info.size == 0) // disabled
continue;
auto &data = vertex_arrays[index];
@ -1270,6 +1268,10 @@ namespace rsx
method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] = 0xffffffff;
// Reset vertex attrib array
for (int i = 0; i < limits::vertex_count; i++)
vertex_arrays_info[i].size = 0;
// Construct Textures
for (int i = 0; i < limits::textures_count; i++)
{

View File

@ -334,9 +334,8 @@ namespace rsx
u8 stride = 0;
u8 size = 0;
u8 type = CELL_GCM_VERTEX_F;
bool array = false;
void unpack(u32 data_array_format)
void unpack_array(u32 data_array_format)
{
frequency = data_array_format >> 16;
stride = (data_array_format >> 8) & 0xff;
@ -363,6 +362,25 @@ namespace rsx
rsx::texture textures[limits::textures_count];
rsx::vertex_texture vertex_textures[limits::vertex_textures_count];
/**
* RSX can sources vertex attributes from 2 places:
* - Immediate values passed by NV4097_SET_VERTEX_DATA*_M + ARRAY_ID write.
* For a given ARRAY_ID the last command of this type defines the actual type of the immediate value.
* Since there can be only a single value per ARRAY_ID passed this way, all vertex in the draw call
* shares it.
* - Vertex array values passed by offset/stride/size/format description.
*
* A given ARRAY_ID can have both an immediate value and a vertex array enabled at the same time
* (See After Burner Climax intro cutscene). In such case the vertex array has precedence over the
* immediate value. As soon as the vertex array is disabled (size set to 0) the immediate value
* must be used if the vertex attrib mask request it.
*
* Note that behavior when both vertex array and immediate value system are disabled but vertex attrib mask
* request inputs is unknow.
*/
data_array_format_info register_vertex_info[limits::vertex_count];
std::vector<u8> register_vertex_data[limits::vertex_count];
data_array_format_info vertex_arrays_info[limits::vertex_count];
std::vector<u8> vertex_arrays[limits::vertex_count];
std::vector<u8> vertex_index_array;