diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index c80a62b1d..a4c8fbb4d 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -600,11 +600,21 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); } if (indexed) { + uint32_t index_base = index_buffer_info->guest_base & 0x1FFFFFFF; uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 ? sizeof(uint32_t) : sizeof(uint16_t); - shared_memory_->UseRange(index_buffer_info->guest_base, - index_buffer_info->count * index_size); + index_base &= ~(index_size - 1); + uint32_t index_buffer_size = index_buffer_info->count * index_size; + shared_memory_->UseRange(index_base, index_buffer_size); + D3D12_INDEX_BUFFER_VIEW index_buffer_view; + index_buffer_view.BufferLocation = + shared_memory_->GetGPUAddress() + index_base; + index_buffer_view.SizeInBytes = index_buffer_size; + index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32 + ? DXGI_FORMAT_R32_UINT + : DXGI_FORMAT_R16_UINT; + command_list->IASetIndexBuffer(&index_buffer_view); command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0); } else { command_list->DrawInstanced(index_count, 1, 0, 0); @@ -881,11 +891,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) { // different register. bool gl_clip_space_def = !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); - float ndc_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? 1.0f / 1280.0f : 1.0f; - float ndc_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? 1.0f / 1280.0f : 1.0f; + float ndc_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? 1.0f : 1.0f / 1280.0f; + float ndc_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? 1.0f : 1.0f / 1280.0f; float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; - float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? -1.0f : 0.0f; - float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? -1.0f : 0.0f; + float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; + float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : -1.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; float pixel_half_pixel_offset = 0.0f; if (pa_su_vtx_cntl & (1 << 0)) { @@ -978,14 +988,23 @@ bool D3D12CommandProcessor::UpdateBindings( write_common_constant_views = true; } if (!cbuffer_bindings_bool_loop_.up_to_date) { - uint8_t* bool_loop_constants = constant_buffer_pool_->RequestFull( - 256, nullptr, nullptr, &cbuffer_bindings_bool_loop_.buffer_address); + uint32_t* bool_loop_constants = + reinterpret_cast(constant_buffer_pool_->RequestFull( + 768, nullptr, nullptr, + &cbuffer_bindings_bool_loop_.buffer_address)); if (bool_loop_constants == nullptr) { return false; } - std::memcpy(bool_loop_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - 40 * sizeof(uint32_t)); + // Bool and loop constants are quadrupled to allow dynamic indexing. + for (uint32_t i = 0; i < 40; ++i) { + uint32_t bool_loop_constant = + regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32; + uint32_t* bool_loop_constant_vector = bool_loop_constants + (i << 2); + bool_loop_constant_vector[0] = bool_loop_constant; + bool_loop_constant_vector[1] = bool_loop_constant; + bool_loop_constant_vector[2] = bool_loop_constant; + bool_loop_constant_vector[3] = bool_loop_constant; + } cbuffer_bindings_bool_loop_.up_to_date = true; write_common_constant_views = true; } @@ -1080,7 +1099,7 @@ bool D3D12CommandProcessor::UpdateBindings( // Bool/loop constants (b1). constant_buffer_desc.BufferLocation = cbuffer_bindings_bool_loop_.buffer_address; - constant_buffer_desc.SizeInBytes = 256; + constant_buffer_desc.SizeInBytes = 768; device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += view_handle_size; view_gpu_handle.ptr += view_handle_size; diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 75e07bcf0..4850686d8 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -165,6 +165,8 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // Common declarations. // Only up to 14 constant buffers can be used on binding tiers 1 and 2. + // Bool and loop constants are quadrupled to allow dynamic indexing (constant + // registers are vectors). source.Append( "cbuffer xe_system_constants : register(b0) {\n" " float3 xe_mul_rcp_w;\n" @@ -179,8 +181,8 @@ std::vector HlslShaderTranslator::CompleteTranslation() { "};\n" "\n" "cbuffer xe_loop_bool_constants : register(b1) {\n" - " uint xe_bool_constants[8];\n" - " uint xe_loop_constants[32];\n" + " uint4 xe_bool_constants[8];\n" + " uint4 xe_loop_constants[32];\n" "};\n" "\n" "struct XeFloatConstantPage {\n" @@ -381,7 +383,7 @@ void HlslShaderTranslator::ProcessExecInstructionBegin( EmitSourceDepth("{\n"); break; case ParsedExecInstruction::Type::kConditional: - EmitSourceDepth("if ((xe_bool_constants[%u] & (1u << %uu)) %c= 0u) {\n", + EmitSourceDepth("if ((xe_bool_constants[%u].x & (1u << %uu)) %c= 0u) {\n", instr.bool_constant_index >> 5, instr.bool_constant_index & 31, instr.condition ? '!' : '='); @@ -413,14 +415,14 @@ void HlslShaderTranslator::ProcessLoopStartInstruction( // Setup counter. EmitSourceDepth("xe_loop_count.yzw = xe_loop_count.xyz;\n"); - EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n", + EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u].x & 0xFFu;\n", instr.loop_constant_index); // Setup relative indexing. EmitSourceDepth("xe_aL = xe_aL.xxyz;\n"); if (!instr.is_repeat) { // Push new loop starting index if not reusing the current one. - EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n", + EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u].x >> 8u) & 0xFFu);\n", instr.loop_constant_index); } @@ -465,7 +467,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction( Indent(); // Still looping. Adjust index and jump back to body. - EmitSourceDepth("xe_aL.x += int(xe_loop_constants[%u] << 8u) >> 24;\n", + EmitSourceDepth("xe_aL.x += int(xe_loop_constants[%u].x << 8u) >> 24;\n", instr.loop_constant_index); EmitSourceDepth("xe_pc = %uu; // Loop back to body L%u\n", instr.loop_body_address, instr.loop_body_address); @@ -503,7 +505,7 @@ void HlslShaderTranslator::ProcessJumpInstruction( EmitSourceDepth("{\n"); break; case ParsedJumpInstruction::Type::kConditional: - EmitSourceDepth("if ((xe_bool_constants[%u] & (1u << %uu)) %c= 0u) {\n", + EmitSourceDepth("if ((xe_bool_constants[%u].x & (1u << %uu)) %c= 0u) {\n", instr.bool_constant_index >> 5, instr.bool_constant_index & 31, instr.condition ? '!' : '='); @@ -605,10 +607,10 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index, op.storage_index & 31); break; case InstructionStorageSource::kConstantInt: - EmitSource("xe_loop_constants[%u]", op.storage_index); + EmitSource("xe_loop_constants[%u].x", op.storage_index); break; case InstructionStorageSource::kConstantBool: - EmitSource("float((xe_bool_constants[%u] >> %uu) & 1u)", + EmitSource("float((xe_bool_constants[%u].x >> %uu) & 1u)", op.storage_index >> 5, op.storage_index & 31); break; default: @@ -625,11 +627,12 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index, "xe_float_constants[xe_src_index >> 5u].c[xe_src_index & 31u]"); break; case InstructionStorageSource::kConstantInt: - EmitSource("xe_loop_constants[xe_src_index]"); + EmitSource("xe_loop_constants[xe_src_index].x"); break; case InstructionStorageSource::kConstantBool: - EmitSource("float((xe_bool_constants[xe_src_index >> 5u] >> " - "(xe_src_index & 31u)) & 1u)"); + EmitSource( + "float((xe_bool_constants[xe_src_index >> 5u].x >> " + "(xe_src_index & 31u)) & 1u)"); break; default: assert_always(); @@ -858,7 +861,7 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( } EmitSourceDepth("xe_vertex_element%s = XeByteSwap(xe_shared_memory.Load%s(\n", load_swizzle, load_function_suffix); - EmitSourceDepth(" ((xe_vertex_fetch[%uu].%c << 2u) & 0x1FFFFFFCu)", + EmitSourceDepth(" (xe_vertex_fetch[%uu].%c & 0x1FFFFFFCu)", vfetch_index >> 1, (vfetch_index & 1) ? 'z' : 'x'); if (instr.attributes.stride != 0) { EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4); @@ -894,9 +897,8 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( EmitSourceDepth( " uint4(0u, 10u, 20u, 30u)) & uint4((1023u).xxx, 3u);\n"); if (instr.attributes.is_signed) { - EmitSourceDepth( - "xe_pv = float4(int4(xe_vertex_element << uint4((22u).xxx, 3u))\n"); - EmitSourceDepth(" >> int4((22).xxx, 3));\n"); + EmitSourceDepth("xe_pv = float4(int4(xe_vertex_element <<\n"); + EmitSourceDepth(" uint4((22u).xxx, 30u)) >> int4((22).xxx, 30));\n"); } else { EmitSourceDepth("xe_pv = float4(xe_vertex_element);\n"); }