diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 8db6f1626..ff9041fbd 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1996,15 +1996,44 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, current_external_pipeline_ = nullptr; } + // Get dynamic rasterizer state. + // Supersampling replacing multisampling due to difficulties of emulating + // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also + // resolution scale. + uint32_t pixel_size_x, pixel_size_y; + if (edram_rov_used_) { + pixel_size_x = 1; + pixel_size_y = 1; + } else { + xenos::MsaaSamples msaa_samples = + regs.Get().msaa_samples; + pixel_size_x = msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1; + pixel_size_y = msaa_samples >= xenos::MsaaSamples::k2X ? 2 : 1; + } + if (texture_cache_->IsResolutionScale2X()) { + pixel_size_x *= 2; + pixel_size_y *= 2; + } + draw_util::ViewportInfo viewport_info; + draw_util::GetHostViewportInfo(regs, float(pixel_size_x), float(pixel_size_y), + true, float(D3D12_VIEWPORT_BOUNDS_MAX), false, + viewport_info); + draw_util::Scissor scissor; + draw_util::GetScissor(regs, scissor); + scissor.left *= pixel_size_x; + scissor.top *= pixel_size_y; + scissor.width *= pixel_size_x; + scissor.height *= pixel_size_y; + // Update viewport, scissor, blend factor and stencil reference. - UpdateFixedFunctionState(primitive_two_faced); + UpdateFixedFunctionState(viewport_info, scissor, primitive_two_faced); // Update system constants before uploading them. UpdateSystemConstantValues( memexport_used, primitive_two_faced, line_loop_closing_index, indexed ? index_buffer_info->endianness : xenos::Endian::kNone, - used_texture_mask, early_z, GetCurrentColorMask(pixel_shader), - pipeline_render_targets); + viewport_info, pixel_size_x, pixel_size_y, used_texture_mask, early_z, + GetCurrentColorMask(pixel_shader), pipeline_render_targets); // Update constant buffers, descriptors and root parameters. if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) { @@ -2753,87 +2782,21 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() { command_allocator_writable_last_ = nullptr; } -void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { +void D3D12CommandProcessor::UpdateFixedFunctionState( + const draw_util::ViewportInfo& viewport_info, + const draw_util::Scissor& scissor, bool primitive_two_faced) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES - const RegisterFile& regs = *register_file_; - - // Window parameters. - // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h - // See r200UpdateWindow: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - auto pa_sc_window_offset = regs.Get(); - - // Supersampling replacing multisampling due to difficulties of emulating - // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also - // resolution scale. - uint32_t pixel_size_x, pixel_size_y; - if (edram_rov_used_) { - pixel_size_x = 1; - pixel_size_y = 1; - } else { - xenos::MsaaSamples msaa_samples = - regs.Get().msaa_samples; - pixel_size_x = msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1; - pixel_size_y = msaa_samples >= xenos::MsaaSamples::k2X ? 2 : 1; - } - if (texture_cache_->IsResolutionScale2X()) { - pixel_size_x *= 2; - pixel_size_y *= 2; - } - // Viewport. - // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // In games, either all are enabled (for regular drawing) or none are (for - // rectangle lists usually). - // - // If scale/offset is enabled, the Xenos shader is writing (neglecting W - // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) - // box. If it's not, the position is in screen space. Since we can only use - // the NDC in PC APIs, we use a viewport of the largest possible size, and - // divide the position by it in translated shaders. - auto pa_cl_vte_cntl = regs.Get(); - float viewport_scale_x = - pa_cl_vte_cntl.vport_x_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) - : 4096.0f; - float viewport_scale_y = - pa_cl_vte_cntl.vport_y_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) - : 4096.0f; - float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 - : 1.0f; - float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 - : std::abs(viewport_scale_x); - float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : std::abs(viewport_scale_y); - float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 - : 0.0f; - if (regs.Get().vtx_window_offset_enable) { - viewport_offset_x += float(pa_sc_window_offset.window_x_offset); - viewport_offset_y += float(pa_sc_window_offset.window_y_offset); - } D3D12_VIEWPORT viewport; - viewport.TopLeftX = - (viewport_offset_x - viewport_scale_x) * float(pixel_size_x); - viewport.TopLeftY = - (viewport_offset_y - viewport_scale_y) * float(pixel_size_y); - viewport.Width = viewport_scale_x * 2.0f * float(pixel_size_x); - viewport.Height = viewport_scale_y * 2.0f * float(pixel_size_y); - viewport.MinDepth = viewport_offset_z; - viewport.MaxDepth = viewport_offset_z + viewport_scale_z; - if (viewport_scale_z < 0.0f) { - // MinDepth > MaxDepth doesn't work on Nvidia, emulating it in vertex - // shaders and when applying polygon offset. - std::swap(viewport.MinDepth, viewport.MaxDepth); - } + viewport.TopLeftX = viewport_info.left; + viewport.TopLeftY = viewport_info.top; + viewport.Width = viewport_info.width; + viewport.Height = viewport_info.height; + viewport.MinDepth = viewport_info.z_min; + viewport.MaxDepth = viewport_info.z_max; ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX; ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY; ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width; @@ -2847,13 +2810,11 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { } // Scissor. - draw_util::Scissor scissor; - draw_util::GetScissor(regs, scissor); D3D12_RECT scissor_rect; - scissor_rect.left = LONG(scissor.left * pixel_size_x); - scissor_rect.top = LONG(scissor.top * pixel_size_y); - scissor_rect.right = LONG((scissor.left + scissor.width) * pixel_size_x); - scissor_rect.bottom = LONG((scissor.top + scissor.height) * pixel_size_y); + scissor_rect.left = LONG(scissor.left); + scissor_rect.top = LONG(scissor.top); + scissor_rect.right = LONG(scissor.left + scissor.width); + scissor_rect.bottom = LONG(scissor.top + scissor.height); ff_scissor_update_needed_ |= ff_scissor_.left != scissor_rect.left; ff_scissor_update_needed_ |= ff_scissor_.top != scissor_rect.top; ff_scissor_update_needed_ |= ff_scissor_.right != scissor_rect.right; @@ -2865,6 +2826,8 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { } if (!edram_rov_used_) { + const RegisterFile& regs = *register_file_; + // Blend factor. ff_blend_factor_update_needed_ |= ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; @@ -2908,7 +2871,9 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { void D3D12CommandProcessor::UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_two_faced, uint32_t line_loop_closing_index, xenos::Endian index_endian, - uint32_t used_texture_mask, bool early_z, uint32_t color_mask, + const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x, + uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z, + uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); @@ -2920,7 +2885,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( auto pa_su_point_minmax = regs.Get(); auto pa_su_point_size = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - auto pa_su_vtx_cntl = regs.Get(); float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); @@ -2986,11 +2950,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } } - // Get viewport Z scale - needed for flags and ROV output. - float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 - : 1.0f; - bool dirty = false; // Flags. @@ -3023,10 +2982,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= (pa_cl_clip_cntl.value & 0b111111) << DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift; } - // Reversed depth. - if (viewport_scale_z < 0.0f) { - flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; - } // Whether SV_IsFrontFace matters. if (primitive_two_faced) { flags |= DxbcShaderTranslator::kSysFlag_PrimitiveTwoFaced; @@ -3122,81 +3077,24 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } // Conversion to Direct3D 12 normalized device coordinates. - // See viewport configuration in UpdateFixedFunctionState for explanations. - // X and Y scale/offset is to convert unnormalized coordinates generated by - // shaders (for rectangle list drawing, for instance) to the viewport of the - // largest possible render target size that is used to emulate unnormalized - // coordinates. Z scale/offset is to convert from OpenGL NDC to Direct3D NDC - // if needed. Also apply half-pixel offset to reproduce Direct3D 9 - // rasterization rules - must be done before clipping, not through the - // viewport, for SSAA and resolution scale to work correctly. - float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; - float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // Kill all primitives if multipass or both faces are culled, but still need // to do memexport. if (sq_program_cntl.vs_export_mode == xenos::VertexShaderExportMode::kMultipass || (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back)) { - dirty |= !std::isnan(system_constants_.ndc_scale[0]); - dirty |= !std::isnan(system_constants_.ndc_scale[1]); - dirty |= !std::isnan(system_constants_.ndc_scale[2]); - dirty |= !std::isnan(system_constants_.ndc_offset[0]); - dirty |= !std::isnan(system_constants_.ndc_offset[1]); - dirty |= !std::isnan(system_constants_.ndc_offset[2]); float nan_value = std::nanf(""); - system_constants_.ndc_scale[0] = nan_value; - system_constants_.ndc_scale[1] = nan_value; - system_constants_.ndc_scale[2] = nan_value; - system_constants_.ndc_offset[0] = nan_value; - system_constants_.ndc_offset[1] = nan_value; - system_constants_.ndc_offset[2] = nan_value; - } else { - // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to - // be written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF - // isn't set in this case in draws in games. - bool gl_clip_space_def = - !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; - float ndc_scale_x = pa_cl_vte_cntl.vport_x_scale_ena - ? (viewport_scale_x >= 0.0f ? 1.0f : -1.0f) - : (1.0f / 4096.0f); - float ndc_scale_y = pa_cl_vte_cntl.vport_y_scale_ena - ? (viewport_scale_y >= 0.0f ? -1.0f : 1.0f) - : (-1.0f / 4096.0f); - float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; - float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f; - float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f; - float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; - if (cvars::half_pixel_offset && !pa_su_vtx_cntl.pix_center) { - // Signs are hopefully correct here, tested in GTA IV on both clearing - // (without a viewport) and drawing things near the edges of the screen. - if (pa_cl_vte_cntl.vport_x_scale_ena) { - if (viewport_scale_x != 0.0f) { - ndc_offset_x += 0.5f / viewport_scale_x; - } - } else { - ndc_offset_x += 1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } - if (pa_cl_vte_cntl.vport_y_scale_ena) { - if (viewport_scale_y != 0.0f) { - ndc_offset_y += 0.5f / viewport_scale_y; - } - } else { - ndc_offset_y -= 1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } + for (uint32_t i = 0; i < 3; ++i) { + dirty |= !std::isnan(system_constants_.ndc_scale[i]); + system_constants_.ndc_scale[i] = nan_value; + } + } else { + for (uint32_t i = 0; i < 3; ++i) { + dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i]; + dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i]; + system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i]; + system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; } - dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; - dirty |= system_constants_.ndc_scale[1] != ndc_scale_y; - dirty |= system_constants_.ndc_scale[2] != ndc_scale_z; - dirty |= system_constants_.ndc_offset[0] != ndc_offset_x; - dirty |= system_constants_.ndc_offset[1] != ndc_offset_y; - dirty |= system_constants_.ndc_offset[2] != ndc_offset_z; - system_constants_.ndc_scale[0] = ndc_scale_x; - system_constants_.ndc_scale[1] = ndc_scale_y; - system_constants_.ndc_scale[2] = ndc_scale_z; - system_constants_.ndc_offset[0] = ndc_offset_x; - system_constants_.ndc_offset[1] = ndc_offset_y; - system_constants_.ndc_offset[2] = ndc_offset_z; } // Point size. @@ -3212,19 +3110,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.point_size[1] = point_size_y; system_constants_.point_size_min_max[0] = point_size_min; system_constants_.point_size_min_max[1] = point_size_max; - float point_screen_to_ndc_x, point_screen_to_ndc_y; - if (pa_cl_vte_cntl.vport_x_scale_ena) { - point_screen_to_ndc_x = - (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; - } else { - point_screen_to_ndc_x = 1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } - if (pa_cl_vte_cntl.vport_y_scale_ena) { - point_screen_to_ndc_y = - (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; - } else { - point_screen_to_ndc_y = -1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } + float point_screen_to_ndc_x = + (0.5f * 2.0f * pixel_size_x) / viewport_info.width; + float point_screen_to_ndc_y = + (0.5f * 2.0f * pixel_size_y) / viewport_info.height; dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x; dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y; system_constants_.point_screen_to_ndc[0] = point_screen_to_ndc_x; @@ -3374,20 +3263,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords; - // The Z range is reversed in the vertex shader if it's reverse - use the - // absolute value of the scale. - float depth_range_scale = std::abs(viewport_scale_z); + float depth_range_scale = viewport_info.z_max - viewport_info.z_min; dirty |= system_constants_.edram_depth_range_scale != depth_range_scale; system_constants_.edram_depth_range_scale = depth_range_scale; - float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 - : 0.0f; - if (viewport_scale_z < 0.0f) { - // Similar to MinDepth in fixed-function viewport calculation. - depth_range_offset += viewport_scale_z; - } - dirty |= system_constants_.edram_depth_range_offset != depth_range_offset; - system_constants_.edram_depth_range_offset = depth_range_offset; + dirty |= system_constants_.edram_depth_range_offset != viewport_info.z_min; + system_constants_.edram_depth_range_offset = viewport_info.z_min; // For non-polygons, front polygon offset is used, and it's enabled if // POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index ceffe5fd0..982f9eac5 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -26,6 +26,7 @@ #include "xenia/gpu/d3d12/primitive_converter.h" #include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/texture_cache.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" @@ -345,11 +346,15 @@ class D3D12CommandProcessor : public CommandProcessor { D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); - void UpdateFixedFunctionState(bool primitive_two_faced); + void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info, + const draw_util::Scissor& scissor, + bool primitive_two_faced); void UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_two_faced, uint32_t line_loop_closing_index, xenos::Endian index_endian, - uint32_t used_texture_mask, bool early_z, uint32_t color_mask, + const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x, + uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z, + uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]); bool UpdateBindings(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso index b871af09c..13cca4e6c 100644 Binary files a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso and b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso differ diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h index 997fb892b..de5761c47 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h +++ b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h @@ -1,11 +1,11 @@ // generated from `xb buildhlsl` // source: primitive_point_list.gs.hlsl const uint8_t primitive_point_list_gs[] = { - 0x44, 0x58, 0x42, 0x43, 0x6F, 0x7A, 0xE0, 0xA0, 0x82, 0xF0, 0x8E, 0x77, - 0x2B, 0x62, 0x44, 0x00, 0xA3, 0x34, 0x47, 0x40, 0x01, 0x00, 0x00, 0x00, - 0x0C, 0x1E, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x16, 0x84, 0x10, 0x1C, 0xE9, 0xAD, 0x76, 0xF9, + 0x92, 0xF2, 0xD5, 0x65, 0x7C, 0x8A, 0x5F, 0xC5, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x1E, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0xD0, 0x0A, 0x00, 0x00, 0x28, 0x0D, 0x00, 0x00, 0xAC, 0x0F, 0x00, 0x00, - 0x70, 0x1D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x94, 0x0A, 0x00, 0x00, + 0x84, 0x1D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x94, 0x0A, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x47, 0x00, 0x05, 0x00, 0x00, 0x6A, 0x0A, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, @@ -335,8 +335,8 @@ const uint8_t primitive_point_list_gs[] = { 0x54, 0x45, 0x58, 0x43, 0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x53, 0x56, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, - 0x00, 0xAB, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, 0xBC, 0x0D, 0x00, 0x00, - 0x51, 0x00, 0x02, 0x00, 0x6F, 0x03, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, + 0x00, 0xAB, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, 0xD0, 0x0D, 0x00, 0x00, + 0x51, 0x00, 0x02, 0x00, 0x74, 0x03, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x04, 0xF2, 0x10, 0x20, 0x00, @@ -369,7 +369,7 @@ const uint8_t primitive_point_list_gs[] = { 0x13, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x04, 0x32, 0x10, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x04, 0x42, 0x10, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x68, 0x00, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x5D, 0x08, 0x00, 0x01, + 0x68, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x5D, 0x08, 0x00, 0x01, 0x8F, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5C, 0x28, 0x00, 0x01, 0x65, 0x00, 0x00, 0x03, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0xF2, 0x20, 0x10, 0x00, @@ -426,113 +426,13 @@ const uint8_t primitive_point_list_gs[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x38, 0x00, 0x18, 0x08, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF6, 0x1F, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x38, 0x00, 0x78, 0x0A, - 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x04, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0xBF, - 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0xBF, - 0x00, 0x00, 0x78, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x14, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, - 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, - 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x60, 0x08, 0xC2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x04, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, - 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, - 0x12, 0x00, 0x00, 0x00, 0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, - 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x18, 0x09, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x46, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x38, 0x06, + 0x72, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, 0x10, 0x80, + 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x40, 0x05, + 0x82, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x08, 0xF2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0xC6, 0x09, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x46, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, @@ -573,7 +473,7 @@ const uint8_t primitive_point_list_gs[] = { 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -613,6 +513,56 @@ const uint8_t primitive_point_list_gs[] = { 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, 0x32, 0x20, 0x10, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x2A, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, + 0xE6, 0x0A, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x08, + 0xA2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x04, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, @@ -620,26 +570,78 @@ const uint8_t primitive_point_list_gs[] = { 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, - 0xE6, 0x0A, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0xD6, 0x05, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, - 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x20, 0x05, + 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x08, 0x32, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x08, 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0x42, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, + 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, + 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, + 0x94, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt index 7fdfc3a62..436c59923 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt +++ b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt @@ -130,7 +130,7 @@ dcl_input_siv v[1][18].xyzw, position dcl_input v[1][19].xyzw dcl_input v[1][20].xy dcl_input v[1][20].z -dcl_temps 2 +dcl_temps 3 dcl_inputprimitive point dcl_stream m0 dcl_outputtopology trianglestrip @@ -170,58 +170,9 @@ max [precise(xy)] r0.xy, r0.xyxx, CB0[0][2].xxxx min [precise(xy)] r0.xy, r0.xyxx, CB0[0][2].yyyy mul [precise(xy)] r0.xy, r0.xyxx, CB0[0][2].zwzz mul [precise(xy)] r0.xy, r0.xyxx, v[0][18].wwww -mul [precise] r1.xyzw, r0.xyxy, l(-1.000000, 1.000000, 1.000000, -1.000000) -add [precise] r1.xyzw, r1.xyzw, v[0][18].xyxy -mov o0.xyzw, v[0][0].xyzw -mov o1.xyzw, v[0][1].xyzw -mov o2.xyzw, v[0][2].xyzw -mov o3.xyzw, v[0][3].xyzw -mov o4.xyzw, v[0][4].xyzw -mov o5.xyzw, v[0][5].xyzw -mov o6.xyzw, v[0][6].xyzw -mov o7.xyzw, v[0][7].xyzw -mov o8.xyzw, v[0][8].xyzw -mov o9.xyzw, v[0][9].xyzw -mov o10.xyzw, v[0][10].xyzw -mov o11.xyzw, v[0][11].xyzw -mov o12.xyzw, v[0][12].xyzw -mov o13.xyzw, v[0][13].xyzw -mov o14.xyzw, v[0][14].xyzw -mov o15.xyzw, v[0][15].xyzw -mov o16.xy, l(0,1.000000,0,0) -mov o16.z, v[0][16].z -mov o17.xy, v[0][17].xyxx -mov o18.xy, r1.xyxx -mov o18.zw, v[0][18].zzzw -mov o19.xyzw, v[0][19].xyzw -mov o20.xy, v[0][20].xyxx -emit_stream m0 -add [precise(zw)] r0.zw, r0.xxxy, v[0][18].xxxy -mov o0.xyzw, v[0][0].xyzw -mov o1.xyzw, v[0][1].xyzw -mov o2.xyzw, v[0][2].xyzw -mov o3.xyzw, v[0][3].xyzw -mov o4.xyzw, v[0][4].xyzw -mov o5.xyzw, v[0][5].xyzw -mov o6.xyzw, v[0][6].xyzw -mov o7.xyzw, v[0][7].xyzw -mov o8.xyzw, v[0][8].xyzw -mov o9.xyzw, v[0][9].xyzw -mov o10.xyzw, v[0][10].xyzw -mov o11.xyzw, v[0][11].xyzw -mov o12.xyzw, v[0][12].xyzw -mov o13.xyzw, v[0][13].xyzw -mov o14.xyzw, v[0][14].xyzw -mov o15.xyzw, v[0][15].xyzw -mov o16.xy, l(1.000000,1.000000,0,0) -mov o16.z, v[0][16].z -mov o17.xy, v[0][17].xyxx -mov o18.xy, r0.zwzz -mov o18.zw, v[0][18].zzzw -mov o19.xyzw, v[0][19].xyzw -mov o20.xy, v[0][20].xyxx -emit_stream m0 -add [precise(xy)] r0.xy, -r0.xyxx, v[0][18].xyxx +mov [precise(xyz)] r1.xyz, -r0.xxyx +mov [precise(w)] r1.w, r0.y +add [precise] r2.xyzw, r1.xwyz, v[0][18].xyxy mov o0.xyzw, v[0][0].xyzw mov o1.xyzw, v[0][1].xyzw mov o2.xyzw, v[0][2].xyzw @@ -241,7 +192,7 @@ mov o15.xyzw, v[0][15].xyzw mov o16.xy, l(0,0,0,0) mov o16.z, v[0][16].z mov o17.xy, v[0][17].xyxx -mov o18.xy, r0.xyxx +mov o18.xy, r2.xyxx mov o18.zw, v[0][18].zzzw mov o19.xyzw, v[0][19].xyzw mov o20.xy, v[0][20].xyxx @@ -262,14 +213,65 @@ mov o12.xyzw, v[0][12].xyzw mov o13.xyzw, v[0][13].xyzw mov o14.xyzw, v[0][14].xyzw mov o15.xyzw, v[0][15].xyzw +mov o16.xy, l(0,1.000000,0,0) +mov o16.z, v[0][16].z +mov o17.xy, v[0][17].xyxx +mov o18.xy, r2.zwzz +mov o18.zw, v[0][18].zzzw +mov o19.xyzw, v[0][19].xyzw +mov o20.xy, v[0][20].xyxx +emit_stream m0 +add [precise(yw)] r0.yw, r0.xxxy, v[0][18].xxxy +mov o0.xyzw, v[0][0].xyzw +mov o1.xyzw, v[0][1].xyzw +mov o2.xyzw, v[0][2].xyzw +mov o3.xyzw, v[0][3].xyzw +mov o4.xyzw, v[0][4].xyzw +mov o5.xyzw, v[0][5].xyzw +mov o6.xyzw, v[0][6].xyzw +mov o7.xyzw, v[0][7].xyzw +mov o8.xyzw, v[0][8].xyzw +mov o9.xyzw, v[0][9].xyzw +mov o10.xyzw, v[0][10].xyzw +mov o11.xyzw, v[0][11].xyzw +mov o12.xyzw, v[0][12].xyzw +mov o13.xyzw, v[0][13].xyzw +mov o14.xyzw, v[0][14].xyzw +mov o15.xyzw, v[0][15].xyzw mov o16.xy, l(1.000000,0,0,0) mov o16.z, v[0][16].z mov o17.xy, v[0][17].xyxx -mov o18.xy, r1.zwzz +mov o18.xy, r0.ywyy +mov o18.zw, v[0][18].zzzw +mov o19.xyzw, v[0][19].xyzw +mov o20.xy, v[0][20].xyxx +emit_stream m0 +mov [precise(z)] r0.z, r1.z +add [precise(xy)] r0.xy, r0.xzxx, v[0][18].xyxx +mov o0.xyzw, v[0][0].xyzw +mov o1.xyzw, v[0][1].xyzw +mov o2.xyzw, v[0][2].xyzw +mov o3.xyzw, v[0][3].xyzw +mov o4.xyzw, v[0][4].xyzw +mov o5.xyzw, v[0][5].xyzw +mov o6.xyzw, v[0][6].xyzw +mov o7.xyzw, v[0][7].xyzw +mov o8.xyzw, v[0][8].xyzw +mov o9.xyzw, v[0][9].xyzw +mov o10.xyzw, v[0][10].xyzw +mov o11.xyzw, v[0][11].xyzw +mov o12.xyzw, v[0][12].xyzw +mov o13.xyzw, v[0][13].xyzw +mov o14.xyzw, v[0][14].xyzw +mov o15.xyzw, v[0][15].xyzw +mov o16.xy, l(1.000000,1.000000,0,0) +mov o16.z, v[0][16].z +mov o17.xy, v[0][17].xyxx +mov o18.xy, r0.xyxx mov o18.zw, v[0][18].zzzw mov o19.xyzw, v[0][19].xyzw mov o20.xy, v[0][20].xyxx emit_stream m0 cut_stream m0 ret -// Approximately 116 instruction slots used +// Approximately 118 instruction slots used diff --git a/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl index f9b0e6753..33d5a5c48 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl @@ -26,19 +26,22 @@ void main(point XeVertexPreGS xe_in[1], clamp(point_size, xe_point_size_min_max.xx, xe_point_size_min_max.yy) * xe_point_screen_to_ndc * xe_in[0].post_gs.position.w; - xe_out.point_params.xy = float2(0.0, 1.0); - xe_out.position.xy = - xe_in[0].post_gs.position.xy + float2(-1.0, 1.0) * point_size; - xe_stream.Append(xe_out); - xe_out.point_params.xy = float2(1.0, 1.0); - xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size; - xe_stream.Append(xe_out); xe_out.point_params.xy = float2(0.0, 0.0); + // TODO(Triang3l): On Vulkan, sign of Y needs to inverted because of + // upper-left origin. + // TODO(Triang3l): Investigate the true signs of point sprites. + xe_out.position.xy = + xe_in[0].post_gs.position.xy + float2(-point_size.x, point_size.y); + xe_stream.Append(xe_out); + xe_out.point_params.xy = float2(0.0, 1.0); xe_out.position.xy = xe_in[0].post_gs.position.xy - point_size; xe_stream.Append(xe_out); xe_out.point_params.xy = float2(1.0, 0.0); + xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size; + xe_stream.Append(xe_out); + xe_out.point_params.xy = float2(1.0, 1.0); xe_out.position.xy = - xe_in[0].post_gs.position.xy + float2(1.0, -1.0) * point_size; + xe_in[0].post_gs.position.xy + float2(point_size.x, -point_size.y); xe_stream.Append(xe_out); xe_stream.RestartStrip(); } diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 202d34965..c78d5122a 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -111,6 +111,178 @@ int32_t FloatToD3D11Fixed16p8(float f32) { return result.s; } +void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, + float pixel_size_y, bool origin_bottom_left, + float xy_max, bool allow_reverse_z, + ViewportInfo& viewport_info_out) { + assert_true(pixel_size_x >= 1.0f); + assert_true(pixel_size_y >= 1.0f); + assert_true(xy_max >= 1.0f); + + // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // In games, either all are enabled (for regular drawing) or none are (for + // rectangle lists usually). + // + // If scale/offset is enabled, the Xenos shader is writing (neglecting W + // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) + // box. If it's not, the position is in screen space. Since we can only use + // the NDC in PC APIs, we use a viewport of the largest possible size, and + // divide the position by it in translated shaders. + + auto pa_cl_clip_cntl = regs.Get(); + auto pa_cl_vte_cntl = regs.Get(); + auto pa_su_sc_mode_cntl = regs.Get(); + auto pa_su_vtx_cntl = regs.Get(); + + float viewport_left, viewport_top; + float viewport_width, viewport_height; + float ndc_scale_x, ndc_scale_y; + float ndc_offset_x, ndc_offset_y; + // To avoid zero size viewports, which would harm division and aren't allowed + // on Vulkan. Nothing will ever be covered by a viewport of this size - this + // is 2 orders of magnitude smaller than a .8 subpixel, and thus shouldn't + // have any effect on rounding, n and n + 1 / 1024 would be rounded to the + // same .8 fixed-point value, thus in fixed-point, the viewport would have + // zero size. + const float size_min = 1.0f / 1024.0f; + + float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 + : 0.0f; + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 + : 0.0f; + if (pa_su_sc_mode_cntl.vtx_window_offset_enable) { + auto pa_sc_window_offset = regs.Get(); + viewport_offset_x += float(pa_sc_window_offset.window_x_offset); + viewport_offset_y += float(pa_sc_window_offset.window_y_offset); + } + + if (pa_cl_vte_cntl.vport_x_scale_ena) { + float pa_cl_vport_xscale = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; + float viewport_scale_x_abs = std::abs(pa_cl_vport_xscale) * pixel_size_x; + viewport_left = viewport_offset_x * pixel_size_x - viewport_scale_x_abs; + float viewport_right = viewport_left + viewport_scale_x_abs * 2.0f; + // Keep the viewport in the positive quarter-plane for simplicity of + // clamping to the maximum supported bounds. + float cutoff_left = std::fmax(-viewport_left, 0.0f); + float cutoff_right = std::fmax(viewport_right - xy_max, 0.0f); + viewport_left = std::fmax(viewport_left, 0.0f); + viewport_right = std::fmin(viewport_right, xy_max); + viewport_width = viewport_right - viewport_left; + if (viewport_width > size_min) { + ndc_scale_x = + (viewport_width + cutoff_left + cutoff_right) / viewport_width; + if (pa_cl_vport_xscale < 0.0f) { + ndc_scale_x = -ndc_scale_x; + } + ndc_offset_x = + ((cutoff_right - cutoff_left) * (0.5f * 2.0f)) / viewport_width; + } else { + // Empty viewport, but don't pass 0 because that's against the Vulkan + // specification. + viewport_left = 0.0f; + viewport_width = size_min; + ndc_scale_x = 0.0f; + ndc_offset_x = 0.0f; + } + } else { + // Drawing without a viewport and without clipping to one - use a viewport + // covering the entire potential guest render target or the positive part of + // the host viewport area, whichever is smaller, and apply the offset, if + // enabled, via the shader. + viewport_left = 0.0f; + viewport_width = std::min( + float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_x, xy_max); + ndc_scale_x = (2.0f * pixel_size_x) / viewport_width; + ndc_offset_x = viewport_offset_x * ndc_scale_x - 1.0f; + } + + if (pa_cl_vte_cntl.vport_y_scale_ena) { + float pa_cl_vport_yscale = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; + float viewport_scale_y_abs = std::abs(pa_cl_vport_yscale) * pixel_size_y; + viewport_top = viewport_offset_y * pixel_size_y - viewport_scale_y_abs; + float viewport_bottom = viewport_top + viewport_scale_y_abs * 2.0f; + float cutoff_top = std::fmax(-viewport_top, 0.0f); + float cutoff_bottom = std::fmax(viewport_bottom - xy_max, 0.0f); + viewport_top = std::fmax(viewport_top, 0.0f); + viewport_bottom = std::fmin(viewport_bottom, xy_max); + viewport_height = viewport_bottom - viewport_top; + if (viewport_height > size_min) { + ndc_scale_y = + (viewport_height + cutoff_top + cutoff_bottom) / viewport_height; + if (pa_cl_vport_yscale < 0.0f) { + ndc_scale_y = -ndc_scale_y; + } + ndc_offset_y = + ((cutoff_bottom - cutoff_top) * (0.5f * 2.0f)) / viewport_height; + } else { + // Empty viewport, but don't pass 0 because that's against the Vulkan + // specification. + viewport_top = 0.0f; + viewport_height = size_min; + ndc_scale_y = 0.0f; + ndc_offset_y = 0.0f; + } + } else { + viewport_height = std::min( + float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_y, xy_max); + ndc_scale_y = (2.0f * pixel_size_y) / viewport_height; + ndc_offset_y = viewport_offset_y * ndc_scale_y - 1.0f; + } + + // Apply the vertex half-pixel offset via the shader (it must not affect + // clipping, otherwise with SSAA or resolution scale, samples in the left/top + // half will never be covered). + if (cvars::half_pixel_offset && !pa_su_vtx_cntl.pix_center) { + ndc_offset_x += (0.5f * 2.0f * pixel_size_x) / viewport_width; + ndc_offset_y += (0.5f * 2.0f * pixel_size_y) / viewport_height; + } + + if (origin_bottom_left) { + ndc_scale_y = -ndc_scale_y; + ndc_offset_y = -ndc_offset_y; + } + + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 + : 1.0f; + float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 + : 0.0f; + // Vulkan requires the depth bounds to be in the 0 to 1 range without + // VK_EXT_depth_range_unrestricted (which isn't used on the Xbox 360). + float viewport_z_min = std::min(std::fmax(viewport_offset_z, 0.0f), 1.0f); + float viewport_z_max = + std::min(std::fmax(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f); + // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be + // written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't + // set in this case in draws in games. + bool gl_clip_space_def = + !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; + float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; + float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; + if (viewport_z_min > viewport_z_max && !allow_reverse_z) { + std::swap(viewport_z_min, viewport_z_max); + ndc_scale_z = -ndc_scale_z; + ndc_offset_z = 1.0f - ndc_offset_z; + } + + viewport_info_out.left = viewport_left; + viewport_info_out.top = viewport_top; + viewport_info_out.width = viewport_width; + viewport_info_out.height = viewport_height; + viewport_info_out.z_min = viewport_z_min; + viewport_info_out.z_max = viewport_z_max; + viewport_info_out.ndc_scale[0] = ndc_scale_x; + viewport_info_out.ndc_scale[1] = ndc_scale_y; + viewport_info_out.ndc_scale[2] = ndc_scale_z; + viewport_info_out.ndc_offset[0] = ndc_offset_x; + viewport_info_out.ndc_offset[1] = ndc_offset_y; + viewport_info_out.ndc_offset[2] = ndc_offset_z; +} + void GetScissor(const RegisterFile& regs, Scissor& scissor_out) { // FIXME(Triang3l): Screen scissor isn't applied here, but it seems to be // unused on Xbox 360 Direct3D 9. diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 7ef3186a0..2cee26de7 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -33,6 +33,28 @@ namespace draw_util { // for use with the top-left rasterization rule later. int32_t FloatToD3D11Fixed16p8(float f32); +struct ViewportInfo { + // The returned viewport will always be in the positive quarter-plane for + // simplicity of clamping to the maximum size supported by the host, negative + // offset will be applied via ndc_offset. + float left; + float top; + float width; + float height; + float z_min; + float z_max; + float ndc_scale[3]; + float ndc_offset[3]; +}; +// Converts the guest viewport (or fakes one if drawing without a viewport) to +// a viewport, plus values to multiply-add the returned position by, usable on +// host graphics APIs such as Direct3D 11+ and Vulkan, also forcing it to the +// Direct3D clip space with 0...W Z rather than -W...W. +void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, + float pixel_size_y, bool origin_bottom_left, + float xy_max, bool allow_reverse_z, + ViewportInfo& viewport_info_out); + struct Scissor { uint32_t left; uint32_t top; diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 3f9140158..56278157d 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1044,10 +1044,9 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { DxbcOpEndIf(); } - // Apply scale for drawing without a viewport, and also remap from OpenGL - // Z clip space to Direct3D if needed. Also, if the vertex shader is - // multipass, the NDC scale constant can be used to set position to NaN to - // kill all primitives. + // Apply scale for guest to host viewport and clip space conversion. Also, if + // the vertex shader is multipass, the NDC scale constant can be used to set + // position to NaN to kill all primitives. system_constants_used_ |= 1ull << kSysConst_NDCScale_Index; DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0111), DxbcSrc::R(system_temp_position_), @@ -1056,16 +1055,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { kSysConst_NDCScale_Vec, kSysConst_NDCScale_Comp * 0b010101 + 0b100100)); - // Reverse Z (Z = W - Z) if the viewport depth is inverted. - DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_ReverseZ)); - DxbcOpIf(true, temp_x_src); - DxbcOpAdd(DxbcDest::R(system_temp_position_, 0b0100), - DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW), - -DxbcSrc::R(system_temp_position_, DxbcSrc::kZZZZ)); - DxbcOpEndIf(); - - // Apply offset (multiplied by W) for drawing without a viewport and for half - // pixel offset. + // Apply offset (multiplied by W) used for the same purposes. system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index; DxbcOpMAd(DxbcDest::R(system_temp_position_, 0b0111), DxbcSrc::CB(cbuffer_index_system_constants_, diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index c45cfc4d9..997be5fe7 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -123,7 +123,6 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_UserClipPlane3_Shift, kSysFlag_UserClipPlane4_Shift, kSysFlag_UserClipPlane5_Shift, - kSysFlag_ReverseZ_Shift, kSysFlag_KillIfAnyVertexKilled_Shift, kSysFlag_PrimitiveTwoFaced_Shift, kSysFlag_AlphaPassIfLess_Shift, @@ -165,7 +164,6 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_UserClipPlane3 = 1u << kSysFlag_UserClipPlane3_Shift, kSysFlag_UserClipPlane4 = 1u << kSysFlag_UserClipPlane4_Shift, kSysFlag_UserClipPlane5 = 1u << kSysFlag_UserClipPlane5_Shift, - kSysFlag_ReverseZ = 1u << kSysFlag_ReverseZ_Shift, kSysFlag_KillIfAnyVertexKilled = 1u << kSysFlag_KillIfAnyVertexKilled_Shift, kSysFlag_PrimitiveTwoFaced = 1u << kSysFlag_PrimitiveTwoFaced_Shift, kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, @@ -220,8 +218,7 @@ class DxbcShaderTranslator : public ShaderTranslator { float point_size[2]; float point_size_min_max[2]; - // Inverse scale of the host viewport (but not supersampled), with signs - // pre-applied. + // Screen point size * 2 (but not supersampled) -> size in NDC. float point_screen_to_ndc[2]; float user_clip_planes[6][4];