diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 2c5820411..df917a532 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1821,7 +1821,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Exponent bias is in bits 20:25 of RB_COLOR_INFO. int32_t color_exp_bias = int32_t(color_info << 6) >> 26; ColorRenderTargetFormat color_format = - ColorRenderTargetFormat((color_info >> 16) & 0xF); + RenderTargetCache::GetBaseColorFormat( + ColorRenderTargetFormat((color_info >> 16) & 0xF)); if (color_format == ColorRenderTargetFormat::k_16_16 || color_format == ColorRenderTargetFormat::k_16_16_16_16) { // On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have @@ -1840,8 +1841,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; system_constants_.color_exp_bias[i] = color_exp_bias_scale; if (render_target_cache_->IsROVUsedForEDRAM()) { - uint32_t rt_pair_index = i >> 1; - uint32_t rt_pair_comp = (i & 1) << 1; uint32_t edram_base_dwords = (color_info & 0xFFF) * 1280; dirty |= system_constants_.edram_base_dwords[i] != edram_base_dwords; system_constants_.edram_base_dwords[i] = edram_base_dwords; @@ -1852,49 +1851,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( 80; dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles; system_constants_.edram_pitch_tiles = edram_pitch_tiles; - static const uint32_t kRTFormatFlags[16] = { - // k_8_8_8_8 - DxbcShaderTranslator::kRTFlag_FormatFixed, - // k_8_8_8_8_GAMMA - DxbcShaderTranslator::kRTFlag_FormatFixed, - // k_2_10_10_10 - DxbcShaderTranslator::kRTFlag_FormatFixed, - // k_2_10_10_10_FLOAT - DxbcShaderTranslator::kRTFlag_FormatFloat10, - // k_16_16 - DxbcShaderTranslator::kRTFlag_FormatFixed, - // k_16_16_16_16 - DxbcShaderTranslator::kRTFlag_Format64bpp | - DxbcShaderTranslator::kRTFlag_FormatFixed, - // k_16_16_FLOAT - DxbcShaderTranslator::kRTFlag_FormatFloat16, - // k_16_16_16_16_FLOAT - DxbcShaderTranslator::kRTFlag_Format64bpp | - DxbcShaderTranslator::kRTFlag_FormatFloat16, - // Unused - 0, - // Unused - 0, - // k_2_10_10_10_AS_16_16_16_16 - DxbcShaderTranslator::kRTFlag_FormatFixed, - // Unused. - 0, - // k_2_10_10_10_FLOAT_AS_16_16_16_16 - DxbcShaderTranslator::kRTFlag_FormatFloat10, - // Unused. - 0, - // k_32_FLOAT - 0, - // k_32_32_FLOAT - DxbcShaderTranslator::kRTFlag_Format64bpp, - }; static const uint32_t kRTFormatAllComponentsMask[16] = { 0b1111, 0b1111, 0b1111, 0b1111, 0b0011, 0b1111, 0b0011, 0b1111, 0b0000, 0b0000, 0b1111, 0b0000, 0b1111, 0b0000, 0b0001, 0b0011, }; uint32_t rt_mask_all = kRTFormatAllComponentsMask[uint32_t(color_format)]; uint32_t rt_mask = (rb_color_mask >> (i * 4)) & rt_mask_all; - uint32_t rt_flags = kRTFormatFlags[uint32_t(color_format)]; + uint32_t rt_flags = + DxbcShaderTranslator::GetColorFormatRTFlags(color_format); if (rt_mask != 0) { rt_flags |= DxbcShaderTranslator::kRTFlag_Used; if (rt_mask != rt_mask_all) { @@ -1906,6 +1870,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( blend_y)) { rt_flags |= DxbcShaderTranslator::kRTFlag_Load | DxbcShaderTranslator::kRTFlag_Blend; + uint32_t rt_pair_index = i >> 1; + uint32_t rt_pair_comp = (i & 1) << 1; if (system_constants_ .edram_blend_rt01_rt23[rt_pair_index][rt_pair_comp] != blend_x) { @@ -1927,143 +1893,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.edram_rt_flags[i] = rt_flags; if (system_constants_color_formats_[i] != color_format) { dirty = true; - uint32_t color_mask = UINT32_MAX, alpha_mask = UINT32_MAX; - // Initialize min/max to Infinity. - uint32_t color_min = 0xFF800000u, alpha_min = 0xFF800000u; - uint32_t color_max = 0x7F800000u, alpha_max = 0x7F800000u; - float color_load_scale = 1.0f, alpha_load_scale = 1.0f; - float color_store_scale = 1.0f, alpha_store_scale = 1.0f; - switch (color_format) { - case ColorRenderTargetFormat::k_8_8_8_8: - case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: - system_constants_.edram_rt_pack_width_low[i][0] = 8; - system_constants_.edram_rt_pack_width_low[i][1] = 8; - system_constants_.edram_rt_pack_width_low[i][2] = 8; - system_constants_.edram_rt_pack_width_low[i][3] = 8; - system_constants_.edram_rt_pack_offset_low[i][0] = 0; - system_constants_.edram_rt_pack_offset_low[i][1] = 8; - system_constants_.edram_rt_pack_offset_low[i][2] = 16; - system_constants_.edram_rt_pack_offset_low[i][3] = 24; - color_mask = alpha_mask = 255; - color_min = alpha_min = 0; - color_max = alpha_max = 0x3F800000; - color_load_scale = alpha_load_scale = 1.0f / 255.0f; - color_store_scale = alpha_store_scale = 255.0f; - break; - case ColorRenderTargetFormat::k_2_10_10_10: - case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16: - system_constants_.edram_rt_pack_width_low[i][0] = 10; - system_constants_.edram_rt_pack_width_low[i][1] = 10; - system_constants_.edram_rt_pack_width_low[i][2] = 10; - system_constants_.edram_rt_pack_width_low[i][3] = 2; - system_constants_.edram_rt_pack_offset_low[i][0] = 0; - system_constants_.edram_rt_pack_offset_low[i][1] = 10; - system_constants_.edram_rt_pack_offset_low[i][2] = 20; - system_constants_.edram_rt_pack_offset_low[i][3] = 30; - color_mask = 1023; - alpha_mask = 3; - color_min = alpha_min = 0; - color_max = alpha_max = 0x3F800000; - color_load_scale = 1.0f / 1023.0f; - alpha_load_scale = 1.0f / 3.0f; - color_store_scale = 1023.0f; - alpha_store_scale = 3.0f; - break; - case ColorRenderTargetFormat::k_2_10_10_10_FLOAT: - case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: - system_constants_.edram_rt_pack_width_low[i][0] = 10; - system_constants_.edram_rt_pack_width_low[i][1] = 10; - system_constants_.edram_rt_pack_width_low[i][2] = 10; - system_constants_.edram_rt_pack_width_low[i][3] = 2; - system_constants_.edram_rt_pack_offset_low[i][0] = 0; - system_constants_.edram_rt_pack_offset_low[i][1] = 10; - system_constants_.edram_rt_pack_offset_low[i][2] = 20; - system_constants_.edram_rt_pack_offset_low[i][3] = 30; - color_mask = 1023; - alpha_mask = 3; - color_min = alpha_min = 0; - // 31.875. - color_max = 0x41FF0000; - alpha_max = 0x3F800000; - alpha_load_scale = 1.0f / 3.0f; - alpha_store_scale = 3.0f; - break; - case ColorRenderTargetFormat::k_16_16: - case ColorRenderTargetFormat::k_16_16_16_16: - system_constants_.edram_rt_pack_width_low[i][0] = 16; - system_constants_.edram_rt_pack_width_low[i][1] = 16; - system_constants_.edram_rt_pack_width_low[i][2] = 0; - system_constants_.edram_rt_pack_width_low[i][3] = 0; - system_constants_.edram_rt_pack_offset_low[i][0] = 0; - system_constants_.edram_rt_pack_offset_low[i][1] = 16; - system_constants_.edram_rt_pack_offset_low[i][2] = 0; - system_constants_.edram_rt_pack_offset_low[i][3] = 0; - // TODO(Triang3l): 64bpp variant. - // Color and alpha mask UINT32_MAX because the format is signed. - // -32.0. - color_min = alpha_min = 0xC2000000u; - // 32.0. - color_max = alpha_max = 0x42000000u; - color_load_scale = alpha_load_scale = 32.0f / 32767.0f; - color_store_scale = alpha_store_scale = 32767.0f / 32.0f; - break; - case ColorRenderTargetFormat::k_16_16_FLOAT: - case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: - system_constants_.edram_rt_pack_width_low[i][0] = 16; - system_constants_.edram_rt_pack_width_low[i][1] = 16; - system_constants_.edram_rt_pack_width_low[i][2] = 0; - system_constants_.edram_rt_pack_width_low[i][3] = 0; - system_constants_.edram_rt_pack_offset_low[i][0] = 0; - system_constants_.edram_rt_pack_offset_low[i][1] = 16; - system_constants_.edram_rt_pack_offset_low[i][2] = 0; - system_constants_.edram_rt_pack_offset_low[i][3] = 0; - // TODO(Triang3l): 64bpp variant. - color_mask = alpha_mask = 0xFFFF; - break; - case ColorRenderTargetFormat::k_32_FLOAT: - case ColorRenderTargetFormat::k_32_32_FLOAT: - system_constants_.edram_rt_pack_width_low[i][0] = 32; - system_constants_.edram_rt_pack_width_low[i][1] = 0; - system_constants_.edram_rt_pack_width_low[i][2] = 0; - system_constants_.edram_rt_pack_width_low[i][3] = 0; - system_constants_.edram_rt_pack_offset_low[i][0] = 0; - system_constants_.edram_rt_pack_offset_low[i][1] = 0; - system_constants_.edram_rt_pack_offset_low[i][2] = 0; - system_constants_.edram_rt_pack_offset_low[i][3] = 0; - // TODO(Triang3l): 64bpp variant. - break; - default: - assert_always(); - break; - } - system_constants_ - .edram_load_mask_rt01_rt23[rt_pair_index][rt_pair_comp] = - color_mask; - system_constants_ - .edram_load_mask_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = - alpha_mask; - system_constants_ - .edram_load_scale_rt01_rt23[rt_pair_index][rt_pair_comp] = - color_load_scale; - system_constants_ - .edram_load_scale_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = - alpha_load_scale; - system_constants_ - .edram_store_min_rt01_rt23[rt_pair_index][rt_pair_comp] = color_min; - system_constants_ - .edram_store_min_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = - alpha_min; - system_constants_ - .edram_store_max_rt01_rt23[rt_pair_index][rt_pair_comp] = color_max; - system_constants_ - .edram_store_max_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = - alpha_max; - system_constants_ - .edram_store_scale_rt01_rt23[rt_pair_index][rt_pair_comp] = - color_store_scale; - system_constants_ - .edram_store_scale_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = - alpha_store_scale; + DxbcShaderTranslator::SetColorFormatSystemConstants(system_constants_, + i, color_format); system_constants_color_formats_[i] = color_format; } } else { diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso index 4350af743..ef0706558 100644 Binary files a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso and b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso differ diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h index b893cf413..3ef815eb4 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h +++ b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h @@ -1,14 +1,14 @@ // generated from `xb buildhlsl` // source: primitive_point_list.gs.hlsl const uint8_t primitive_point_list_gs[] = { - 0x44, 0x58, 0x42, 0x43, 0xF3, 0x5C, 0x0F, 0x58, 0xD6, 0xC6, 0x30, 0x20, - 0x80, 0x1D, 0xFF, 0x38, 0x08, 0x52, 0x63, 0xAA, 0x01, 0x00, 0x00, 0x00, - 0xE8, 0x1B, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0xC4, 0x0B, 0x00, 0x00, 0x9C, 0x0D, 0x00, 0x00, 0xBC, 0x0F, 0x00, 0x00, - 0x4C, 0x1B, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x88, 0x0B, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x46, 0x61, 0x96, 0xA0, 0xFD, 0xA8, 0x4A, 0x8B, + 0xF7, 0x0D, 0x0E, 0x2B, 0x40, 0xC5, 0x2E, 0x8E, 0x01, 0x00, 0x00, 0x00, + 0xBC, 0x1A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x98, 0x0A, 0x00, 0x00, 0x70, 0x0C, 0x00, 0x00, 0x90, 0x0E, 0x00, 0x00, + 0x20, 0x1A, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x5C, 0x0A, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x47, 0x00, 0x05, 0x00, 0x00, - 0x5D, 0x0B, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, + 0x31, 0x0A, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -16,213 +16,188 @@ const uint8_t primitive_point_list_gs[] = { 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6D, 0x5F, 0x63, 0x62, 0x75, 0x66, 0x66, 0x65, 0x72, 0x00, 0xAB, 0xAB, - 0x64, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, - 0xF0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xA8, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xB8, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, + 0xB0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0xDC, 0x06, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB8, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3C, 0x06, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xF3, 0x06, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x53, 0x06, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xB8, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x18, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x07, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xB8, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x68, 0x06, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x19, 0x07, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x07, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x79, 0x06, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x54, 0x07, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xB4, 0x06, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x78, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xD8, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x9C, 0x07, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x30, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFC, 0x06, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x90, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0xAA, 0x07, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xBC, 0x07, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x07, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1C, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x07, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x40, 0x07, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0xF8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x58, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x1C, 0x08, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xF8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7C, 0x07, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x58, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x32, 0x08, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xF8, 0x07, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x92, 0x07, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x58, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x49, 0x08, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xA9, 0x07, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xF8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x58, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x5B, 0x08, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xF8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xBB, 0x07, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x58, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x6F, 0x08, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB8, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xCF, 0x07, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x84, 0x08, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x07, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xC4, 0x08, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x08, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x09, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x64, 0x08, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x19, 0x09, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x79, 0x08, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x09, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x8B, 0x08, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x4A, 0x09, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xA6, 0x08, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x09, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xC2, 0x08, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x88, 0x09, 0x00, 0x00, 0xD0, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xDE, 0x08, 0x00, 0x00, 0xD0, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0xA7, 0x09, 0x00, 0x00, 0xE0, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFB, 0x08, 0x00, 0x00, 0xE0, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xC7, 0x09, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x13, 0x09, 0x00, 0x00, 0xF0, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xE7, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x0A, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x09, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x27, 0x0A, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x5D, 0x09, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x3F, 0x0A, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x71, 0x09, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x57, 0x0A, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x85, 0x09, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x70, 0x0A, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x09, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x89, 0x0A, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xB5, 0x09, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x9D, 0x0A, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xCD, 0x09, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xB1, 0x0A, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xE5, 0x09, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xC9, 0x0A, 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFD, 0x09, 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0xE1, 0x0A, 0x00, 0x00, 0xA0, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x0A, 0x00, 0x00, 0xA0, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x0A, 0x00, 0x00, - 0xB0, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x0B, 0x00, 0x00, 0xC0, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x29, 0x0B, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x08, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x43, 0x0B, 0x00, 0x00, - 0xE0, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xA0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, - 0x78, 0x65, 0x5F, 0x66, 0x6C, 0x61, 0x67, 0x73, 0x00, 0x64, 0x77, 0x6F, - 0x72, 0x64, 0x00, 0xAB, 0x00, 0x00, 0x13, 0x00, 0x01, 0x00, 0x01, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x66, + 0x6C, 0x61, 0x67, 0x73, 0x00, 0x64, 0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, + 0x00, 0x00, 0x13, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x06, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x5F, 0x69, 0x6E, + 0x64, 0x65, 0x78, 0x5F, 0x65, 0x6E, 0x64, 0x69, 0x61, 0x6E, 0x00, 0x78, + 0x65, 0x5F, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x5F, 0x62, 0x61, 0x73, + 0x65, 0x5F, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x00, 0x78, 0x65, 0x5F, 0x70, + 0x69, 0x78, 0x65, 0x6C, 0x5F, 0x70, 0x6F, 0x73, 0x5F, 0x72, 0x65, 0x67, + 0x00, 0x78, 0x65, 0x5F, 0x6E, 0x64, 0x63, 0x5F, 0x73, 0x63, 0x61, 0x6C, + 0x65, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x33, 0x00, 0xAB, 0xAB, 0xAB, + 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xB1, 0x06, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x76, 0x65, 0x72, 0x74, 0x65, - 0x78, 0x5F, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x5F, 0x65, 0x6E, 0x64, 0x69, - 0x61, 0x6E, 0x00, 0x78, 0x65, 0x5F, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x00, - 0x78, 0x65, 0x5F, 0x70, 0x69, 0x78, 0x65, 0x6C, 0x5F, 0x70, 0x6F, 0x73, - 0x5F, 0x72, 0x65, 0x67, 0x00, 0x78, 0x65, 0x5F, 0x6E, 0x64, 0x63, 0x5F, - 0x73, 0x63, 0x61, 0x6C, 0x65, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x33, - 0x00, 0xAB, 0xAB, 0xAB, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x06, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x70, 0x69, 0x78, 0x65, 0x6C, 0x5F, 0x68, 0x61, 0x6C, + 0x66, 0x5F, 0x70, 0x69, 0x78, 0x65, 0x6C, 0x5F, 0x6F, 0x66, 0x66, 0x73, + 0x65, 0x74, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x00, 0xAB, 0xAB, 0xAB, + 0x00, 0x00, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xCF, 0x06, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x6E, 0x64, 0x63, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x00, 0x78, 0x65, 0x5F, 0x61, 0x6C, 0x70, 0x68, 0x61, 0x5F, 0x74, + 0x65, 0x73, 0x74, 0x00, 0x69, 0x6E, 0x74, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x26, 0x07, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x70, 0x69, 0x78, 0x65, 0x6C, - 0x5F, 0x68, 0x61, 0x6C, 0x66, 0x5F, 0x70, 0x69, 0x78, 0x65, 0x6C, 0x5F, - 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, - 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x07, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x70, + 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, 0x65, 0x00, 0x66, 0x6C, + 0x6F, 0x61, 0x74, 0x32, 0x00, 0xAB, 0xAB, 0xAB, 0x01, 0x00, 0x03, 0x00, + 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x4E, 0x07, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x70, + 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, 0x65, 0x5F, 0x6D, 0x69, + 0x6E, 0x5F, 0x6D, 0x61, 0x78, 0x00, 0x78, 0x65, 0x5F, 0x70, 0x6F, 0x69, + 0x6E, 0x74, 0x5F, 0x73, 0x63, 0x72, 0x65, 0x65, 0x6E, 0x5F, 0x74, 0x6F, + 0x5F, 0x6E, 0x64, 0x63, 0x00, 0x78, 0x65, 0x5F, 0x73, 0x73, 0x61, 0x61, + 0x5F, 0x69, 0x6E, 0x76, 0x5F, 0x73, 0x63, 0x61, 0x6C, 0x65, 0x00, 0x78, + 0x65, 0x5F, 0x61, 0x6C, 0x70, 0x68, 0x61, 0x5F, 0x74, 0x65, 0x73, 0x74, + 0x5F, 0x72, 0x61, 0x6E, 0x67, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, + 0x72, 0x61, 0x6D, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x5F, 0x74, 0x69, + 0x6C, 0x65, 0x73, 0x00, 0x78, 0x65, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, + 0x5F, 0x65, 0x78, 0x70, 0x5F, 0x62, 0x69, 0x61, 0x73, 0x00, 0x66, 0x6C, + 0x6F, 0x61, 0x74, 0x34, 0x00, 0xAB, 0xAB, 0xAB, 0x01, 0x00, 0x03, 0x00, + 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x6F, 0x07, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x6E, 0x64, 0x63, 0x5F, 0x6F, - 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x61, 0x6C, 0x70, - 0x68, 0x61, 0x5F, 0x74, 0x65, 0x73, 0x74, 0x00, 0x69, 0x6E, 0x74, 0x00, - 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xF6, 0x07, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x63, + 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x6F, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5F, + 0x6D, 0x61, 0x70, 0x00, 0x75, 0x69, 0x6E, 0x74, 0x34, 0x00, 0xAB, 0xAB, + 0x01, 0x00, 0x13, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB8, 0x07, 0x00, 0x00, - 0x78, 0x65, 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, - 0x65, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x32, 0x00, 0xAB, 0xAB, 0xAB, - 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x07, 0x00, 0x00, - 0x78, 0x65, 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, - 0x65, 0x5F, 0x6D, 0x69, 0x6E, 0x5F, 0x6D, 0x61, 0x78, 0x00, 0x78, 0x65, - 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x63, 0x72, 0x65, 0x65, - 0x6E, 0x5F, 0x74, 0x6F, 0x5F, 0x6E, 0x64, 0x63, 0x00, 0x78, 0x65, 0x5F, - 0x73, 0x73, 0x61, 0x61, 0x5F, 0x69, 0x6E, 0x76, 0x5F, 0x73, 0x63, 0x61, - 0x6C, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x61, 0x6C, 0x70, 0x68, 0x61, 0x5F, - 0x74, 0x65, 0x73, 0x74, 0x5F, 0x72, 0x61, 0x6E, 0x67, 0x65, 0x00, 0x78, - 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x70, 0x69, 0x74, 0x63, - 0x68, 0x5F, 0x74, 0x69, 0x6C, 0x65, 0x73, 0x00, 0x78, 0x65, 0x5F, 0x63, - 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x65, 0x78, 0x70, 0x5F, 0x62, 0x69, 0x61, - 0x73, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x34, 0x00, 0xAB, 0xAB, 0xAB, - 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x96, 0x08, 0x00, 0x00, - 0x78, 0x65, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x6F, 0x75, 0x74, - 0x70, 0x75, 0x74, 0x5F, 0x6D, 0x61, 0x70, 0x00, 0x75, 0x69, 0x6E, 0x74, - 0x34, 0x00, 0xAB, 0xAB, 0x01, 0x00, 0x13, 0x00, 0x01, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xD8, 0x08, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, - 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x64, 0x77, 0x6F, 0x72, 0x64, 0x73, - 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, - 0x5F, 0x66, 0x6C, 0x61, 0x67, 0x73, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, - 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, - 0x77, 0x69, 0x64, 0x74, 0x68, 0x5F, 0x6C, 0x6F, 0x77, 0x5F, 0x72, 0x74, - 0x30, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, - 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, 0x77, 0x69, 0x64, 0x74, 0x68, - 0x5F, 0x6C, 0x6F, 0x77, 0x5F, 0x72, 0x74, 0x31, 0x00, 0x78, 0x65, 0x5F, - 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, - 0x6B, 0x5F, 0x77, 0x69, 0x64, 0x74, 0x68, 0x5F, 0x6C, 0x6F, 0x77, 0x5F, - 0x72, 0x74, 0x32, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, - 0x5F, 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, 0x77, 0x69, 0x64, - 0x74, 0x68, 0x5F, 0x6C, 0x6F, 0x77, 0x5F, 0x72, 0x74, 0x33, 0x00, 0x78, - 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, - 0x61, 0x63, 0x6B, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x5F, 0x6C, - 0x6F, 0x77, 0x5F, 0x72, 0x74, 0x30, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, - 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, - 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x5F, 0x6C, 0x6F, 0x77, 0x5F, 0x72, - 0x74, 0x31, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, - 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, 0x6F, 0x66, 0x66, 0x73, - 0x65, 0x74, 0x5F, 0x6C, 0x6F, 0x77, 0x5F, 0x72, 0x74, 0x32, 0x00, 0x78, - 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, - 0x61, 0x63, 0x6B, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x5F, 0x6C, - 0x6F, 0x77, 0x5F, 0x72, 0x74, 0x33, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x08, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, + 0x65, 0x5F, 0x64, 0x77, 0x6F, 0x72, 0x64, 0x73, 0x00, 0x78, 0x65, 0x5F, + 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x66, 0x6C, 0x61, + 0x67, 0x73, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, + 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, 0x77, 0x69, 0x64, 0x74, + 0x68, 0x5F, 0x6C, 0x6F, 0x77, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, + 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, 0x6F, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x5F, 0x6C, 0x6F, 0x77, 0x00, 0x78, 0x65, + 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x70, 0x61, + 0x63, 0x6B, 0x5F, 0x77, 0x69, 0x64, 0x74, 0x68, 0x5F, 0x68, 0x69, 0x67, + 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, + 0x74, 0x5F, 0x70, 0x61, 0x63, 0x6B, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x5F, 0x68, 0x69, 0x67, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x6D, 0x61, 0x73, 0x6B, 0x5F, 0x72, 0x74, 0x30, 0x31, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x6D, 0x61, 0x73, diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt index 3aa296021..03eabcef3 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt +++ b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt @@ -25,27 +25,23 @@ // uint4 xe_color_output_map; // Offset: 112 Size: 16 [unused] // uint4 xe_edram_base_dwords; // Offset: 128 Size: 16 [unused] // uint4 xe_edram_rt_flags; // Offset: 144 Size: 16 [unused] -// uint4 xe_edram_rt_pack_width_low_rt0;// Offset: 160 Size: 16 [unused] -// uint4 xe_edram_rt_pack_width_low_rt1;// Offset: 176 Size: 16 [unused] -// uint4 xe_edram_rt_pack_width_low_rt2;// Offset: 192 Size: 16 [unused] -// uint4 xe_edram_rt_pack_width_low_rt3;// Offset: 208 Size: 16 [unused] -// uint4 xe_edram_rt_pack_offset_low_rt0;// Offset: 224 Size: 16 [unused] -// uint4 xe_edram_rt_pack_offset_low_rt1;// Offset: 240 Size: 16 [unused] -// uint4 xe_edram_rt_pack_offset_low_rt2;// Offset: 256 Size: 16 [unused] -// uint4 xe_edram_rt_pack_offset_low_rt3;// Offset: 272 Size: 16 [unused] -// uint4 xe_edram_load_mask_rt01; // Offset: 288 Size: 16 [unused] -// uint4 xe_edram_load_mask_rt23; // Offset: 304 Size: 16 [unused] -// float4 xe_edram_load_scale_rt01; // Offset: 320 Size: 16 [unused] -// float4 xe_edram_load_scale_rt23; // Offset: 336 Size: 16 [unused] -// uint4 xe_edram_blend_rt01; // Offset: 352 Size: 16 [unused] -// uint4 xe_edram_blend_rt23; // Offset: 368 Size: 16 [unused] -// float4 xe_edram_blend_constant; // Offset: 384 Size: 16 [unused] -// float4 xe_edram_store_min_rt01; // Offset: 400 Size: 16 [unused] -// float4 xe_edram_store_min_rt23; // Offset: 416 Size: 16 [unused] -// float4 xe_edram_store_max_rt01; // Offset: 432 Size: 16 [unused] -// float4 xe_edram_store_max_rt23; // Offset: 448 Size: 16 [unused] -// float4 xe_edram_store_scale_rt01; // Offset: 464 Size: 16 [unused] -// float4 xe_edram_store_scale_rt23; // Offset: 480 Size: 16 [unused] +// uint4 xe_edram_rt_pack_width_low; // Offset: 160 Size: 16 [unused] +// uint4 xe_edram_rt_pack_offset_low; // Offset: 176 Size: 16 [unused] +// uint4 xe_edram_rt_pack_width_high; // Offset: 192 Size: 16 [unused] +// uint4 xe_edram_rt_pack_offset_high;// Offset: 208 Size: 16 [unused] +// uint4 xe_edram_load_mask_rt01; // Offset: 224 Size: 16 [unused] +// uint4 xe_edram_load_mask_rt23; // Offset: 240 Size: 16 [unused] +// float4 xe_edram_load_scale_rt01; // Offset: 256 Size: 16 [unused] +// float4 xe_edram_load_scale_rt23; // Offset: 272 Size: 16 [unused] +// uint4 xe_edram_blend_rt01; // Offset: 288 Size: 16 [unused] +// uint4 xe_edram_blend_rt23; // Offset: 304 Size: 16 [unused] +// float4 xe_edram_blend_constant; // Offset: 320 Size: 16 [unused] +// float4 xe_edram_store_min_rt01; // Offset: 336 Size: 16 [unused] +// float4 xe_edram_store_min_rt23; // Offset: 352 Size: 16 [unused] +// float4 xe_edram_store_max_rt01; // Offset: 368 Size: 16 [unused] +// float4 xe_edram_store_max_rt23; // Offset: 384 Size: 16 [unused] +// float4 xe_edram_store_scale_rt01; // Offset: 400 Size: 16 [unused] +// float4 xe_edram_store_scale_rt23; // Offset: 416 Size: 16 [unused] // // } // diff --git a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli index 810c9d8bc..cdc333ce6 100644 --- a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli +++ b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli @@ -31,46 +31,38 @@ cbuffer xe_system_cbuffer : register(b0) { // vec4 9 uint4 xe_edram_rt_flags; // vec4 10 - uint4 xe_edram_rt_pack_width_low_rt0; + uint4 xe_edram_rt_pack_width_low; // vec4 11 - uint4 xe_edram_rt_pack_width_low_rt1; + uint4 xe_edram_rt_pack_offset_low; // vec4 12 - uint4 xe_edram_rt_pack_width_low_rt2; + uint4 xe_edram_rt_pack_width_high; // vec4 13 - uint4 xe_edram_rt_pack_width_low_rt3; + uint4 xe_edram_rt_pack_offset_high; // vec4 14 - uint4 xe_edram_rt_pack_offset_low_rt0; - // vec4 15 - uint4 xe_edram_rt_pack_offset_low_rt1; - // vec4 16 - uint4 xe_edram_rt_pack_offset_low_rt2; - // vec4 17 - uint4 xe_edram_rt_pack_offset_low_rt3; - // vec4 18 uint4 xe_edram_load_mask_rt01; - // vec4 19 + // vec4 15 uint4 xe_edram_load_mask_rt23; - // vec4 20 + // vec4 16 float4 xe_edram_load_scale_rt01; - // vec4 21 + // vec4 17 float4 xe_edram_load_scale_rt23; - // vec4 22 + // vec4 18 uint4 xe_edram_blend_rt01; - // vec4 23 + // vec4 19 uint4 xe_edram_blend_rt23; - // vec4 24 + // vec4 20 float4 xe_edram_blend_constant; - // vec4 25 + // vec4 21 float4 xe_edram_store_min_rt01; - // vec4 26 + // vec4 22 float4 xe_edram_store_min_rt23; - // vec4 27 + // vec4 23 float4 xe_edram_store_max_rt01; - // vec4 28 + // vec4 24 float4 xe_edram_store_max_rt23; - // vec4 29 + // vec4 25 float4 xe_edram_store_scale_rt01; - // vec4 30 + // vec4 26 float4 xe_edram_store_scale_rt23; }; diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 888f7b606..4213ff9e6 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -71,6 +71,157 @@ DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used) } DxbcShaderTranslator::~DxbcShaderTranslator() = default; +uint32_t DxbcShaderTranslator::GetColorFormatRTFlags( + ColorRenderTargetFormat format) { + static const uint32_t kRTFormatFlags[16] = { + // k_8_8_8_8 + kRTFlag_FormatFixed, + // k_8_8_8_8_GAMMA + kRTFlag_FormatFixed, + // k_2_10_10_10 + kRTFlag_FormatFixed, + // k_2_10_10_10_FLOAT + kRTFlag_FormatFloat10, + // k_16_16 + kRTFlag_FormatFixed, + // k_16_16_16_16 + kRTFlag_FormatFixed, + // k_16_16_FLOAT + kRTFlag_FormatFloat16, + // k_16_16_16_16_FLOAT + kRTFlag_FormatFloat16, + // Unused + 0, + // Unused + 0, + // k_2_10_10_10_AS_16_16_16_16 + kRTFlag_FormatFixed, + // Unused. + 0, + // k_2_10_10_10_FLOAT_AS_16_16_16_16 + kRTFlag_FormatFloat10, + // Unused. + 0, + // k_32_FLOAT + 0, + // k_32_32_FLOAT + 0, + }; + return kRTFormatFlags[uint32_t(format)]; +} + +void DxbcShaderTranslator::SetColorFormatSystemConstants( + SystemConstants& constants, uint32_t rt_index, + ColorRenderTargetFormat format) { + constants.edram_rt_pack_width_high[rt_index] = 0; + constants.edram_rt_pack_offset_high[rt_index] = 0; + uint32_t color_mask = UINT32_MAX, alpha_mask = UINT32_MAX; + // Initialize min/max to Infinity. + uint32_t color_min = 0xFF800000u, alpha_min = 0xFF800000u; + uint32_t color_max = 0x7F800000u, alpha_max = 0x7F800000u; + float color_load_scale = 1.0f, alpha_load_scale = 1.0f; + float color_store_scale = 1.0f, alpha_store_scale = 1.0f; + switch (format) { + case ColorRenderTargetFormat::k_8_8_8_8: + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + constants.edram_rt_pack_width_low[rt_index] = + 8 | (8 << 8) | (8 << 16) | (8 << 24); + constants.edram_rt_pack_offset_low[rt_index] = + (8 << 8) | (16 << 16) | (24 << 24); + color_mask = alpha_mask = 255; + color_min = alpha_min = 0; + color_max = alpha_max = 0x3F800000; + color_load_scale = alpha_load_scale = 1.0f / 255.0f; + color_store_scale = alpha_store_scale = 255.0f; + break; + case ColorRenderTargetFormat::k_2_10_10_10: + case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16: + constants.edram_rt_pack_width_low[rt_index] = + 10 | (10 << 8) | (10 << 16) | (2 << 24); + constants.edram_rt_pack_offset_low[rt_index] = + (10 << 8) | (20 << 16) | (30 << 24); + color_mask = 1023; + alpha_mask = 3; + color_min = alpha_min = 0; + color_max = alpha_max = 0x3F800000; + color_load_scale = 1.0f / 1023.0f; + alpha_load_scale = 1.0f / 3.0f; + color_store_scale = 1023.0f; + alpha_store_scale = 3.0f; + break; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: + constants.edram_rt_pack_width_low[rt_index] = + 10 | (10 << 8) | (10 << 16) | (2 << 24); + constants.edram_rt_pack_offset_low[rt_index] = + (10 << 8) | (20 << 16) | (30 << 24); + color_mask = 1023; + alpha_mask = 3; + color_min = alpha_min = 0; + // 31.875. + color_max = 0x41FF0000; + alpha_max = 0x3F800000; + alpha_load_scale = 1.0f / 3.0f; + alpha_store_scale = 3.0f; + break; + case ColorRenderTargetFormat::k_16_16: + case ColorRenderTargetFormat::k_16_16_16_16: + constants.edram_rt_pack_width_low[rt_index] = 16 | (16 << 8); + constants.edram_rt_pack_offset_low[rt_index] = 16 << 8; + if (format == ColorRenderTargetFormat::k_16_16_16_16) { + constants.edram_rt_pack_width_high[rt_index] = (16 << 16) | (16 << 24); + constants.edram_rt_pack_offset_high[rt_index] = 16 << 24; + } + // -32.0. + color_min = alpha_min = 0xC2000000u; + // 32.0. + color_max = alpha_max = 0x42000000u; + color_load_scale = alpha_load_scale = 32.0f / 32767.0f; + color_store_scale = alpha_store_scale = 32767.0f / 32.0f; + break; + case ColorRenderTargetFormat::k_16_16_FLOAT: + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + constants.edram_rt_pack_width_low[rt_index] = 16 | (16 << 8); + constants.edram_rt_pack_offset_low[rt_index] = 16 << 8; + if (format == ColorRenderTargetFormat::k_16_16_16_16_FLOAT) { + constants.edram_rt_pack_width_high[rt_index] = (16 << 16) | (16 << 24); + constants.edram_rt_pack_offset_high[rt_index] = 16 << 24; + } + color_mask = alpha_mask = 0xFFFF; + break; + case ColorRenderTargetFormat::k_32_FLOAT: + case ColorRenderTargetFormat::k_32_32_FLOAT: + constants.edram_rt_pack_width_low[rt_index] = 32; + constants.edram_rt_pack_offset_low[rt_index] = 0; + if (format == ColorRenderTargetFormat::k_32_32_FLOAT) { + constants.edram_rt_pack_width_high[rt_index] = 32; + } + break; + default: + assert_always(); + break; + } + uint32_t rt_pair_index = rt_index >> 1; + uint32_t rt_pair_comp = (rt_index & 1) << 1; + constants.edram_load_mask_rt01_rt23[rt_pair_index][rt_pair_comp] = color_mask; + constants.edram_load_mask_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = + alpha_mask; + constants.edram_load_scale_rt01_rt23[rt_pair_index][rt_pair_comp] = + color_load_scale; + constants.edram_load_scale_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = + alpha_load_scale; + constants.edram_store_min_rt01_rt23[rt_pair_index][rt_pair_comp] = color_min; + constants.edram_store_min_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = + alpha_min; + constants.edram_store_max_rt01_rt23[rt_pair_index][rt_pair_comp] = color_max; + constants.edram_store_max_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = + alpha_max; + constants.edram_store_scale_rt01_rt23[rt_pair_index][rt_pair_comp] = + color_store_scale; + constants.edram_store_scale_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = + alpha_store_scale; +} + bool DxbcShaderTranslator::GetBlendConstants(uint32_t blend_control, uint32_t& blend_x_out, uint32_t& blend_y_out) { @@ -1188,8 +1339,52 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { PopSystemTemp(2); } +void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ExtractPackLayout( + uint32_t rt_index, bool high, uint32_t width_temp, uint32_t offset_temp) { + if (high) { + system_constants_used_ |= (1ull << kSysConst_EDRAMRTPackWidthHigh_Index) | + (1ull << kSysConst_EDRAMRTPackOffsetHigh_Index); + } else { + system_constants_used_ |= (1ull << kSysConst_EDRAMRTPackWidthLow_Index) | + (1ull << kSysConst_EDRAMRTPackOffsetLow_Index); + } + for (uint32_t i = 0; i < 2; ++i) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(i ? offset_temp : width_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(8); + shader_code_.push_back(8); + shader_code_.push_back(8); + shader_code_.push_back(8); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(8); + shader_code_.push_back(16); + shader_code_.push_back(24); + shader_code_.push_back(EncodeVectorReplicatedOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + if (i) { + shader_code_.push_back(high ? kSysConst_EDRAMRTPackOffsetHigh_Vec + : kSysConst_EDRAMRTPackOffsetLow_Vec); + } else { + shader_code_.push_back(high ? kSysConst_EDRAMRTPackWidthHigh_Vec + : kSysConst_EDRAMRTPackWidthLow_Vec); + } + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + } +} + void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( - uint32_t edram_dword_offset_temp, uint32_t rt_index, uint32_t target_temp) { + uint32_t edram_dword_offset_low_temp, uint32_t edram_dword_offset_high_temp, + uint32_t rt_index, uint32_t target_temp) { // For indexing of the format constants. uint32_t rt_pair_index = rt_index >> 1; uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000; @@ -1200,7 +1395,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); shader_code_.push_back(flags_temp); shader_code_.push_back(EncodeVectorReplicatedOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3)); @@ -1209,10 +1404,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(kRTFlag_Format64bpp); shader_code_.push_back(kRTFlag_FormatFixed); shader_code_.push_back(kRTFlag_FormatFloat10); shader_code_.push_back(kRTFlag_FormatFloat16); + shader_code_.push_back(0); ++stat_.instruction_count; ++stat_.uint_instruction_count; @@ -1225,7 +1420,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( shader_code_.push_back(target_temp); shader_code_.push_back( EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1)); - shader_code_.push_back(edram_dword_offset_temp); + shader_code_.push_back(edram_dword_offset_low_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 2)); shader_code_.push_back(0); @@ -1235,29 +1430,28 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( // Unpack the low 32 bits, as signed because of k_16_16 and k_16_16_16_16 // (will be masked later if needed). - system_constants_used_ |= ((1ull << kSysConst_EDRAMRTPackWidthLowRT0_Index) | - (1ull << kSysConst_EDRAMRTPackOffsetLowRT0_Index)) - << rt_index; + uint32_t pack_width_low_temp = PushSystemTemp(); + uint32_t pack_offset_low_temp = PushSystemTemp(); + CompletePixelShader_WriteToROV_ExtractPackLayout( + rt_index, false, pack_width_low_temp, pack_offset_low_temp); shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); shader_code_.push_back(target_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMRTPackWidthLowRT0_Vec + rt_index); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMRTPackOffsetLowRT0_Vec + rt_index); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(pack_width_low_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(pack_offset_low_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1)); shader_code_.push_back(target_temp); ++stat_.instruction_count; ++stat_.int_instruction_count; + // Release pack_width_low_temp and pack_offset_low_temp. + PopSystemTemp(2); // Mask the components to differentiate between signed and unsigned. system_constants_used_ |= (1ull << kSysConst_EDRAMLoadMaskRT01_Index) @@ -1298,7 +1492,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); shader_code_.push_back(target_temp); shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); shader_code_.push_back(flags_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); @@ -1323,7 +1517,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( D3D10_SB_INSTRUCTION_TEST_NONZERO) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); shader_code_.push_back(flags_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -1638,7 +1832,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); shader_code_.push_back(target_temp); shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); shader_code_.push_back(flags_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); @@ -1946,8 +2140,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend( } void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( - uint32_t edram_dword_offset_temp, uint32_t rt_index, - uint32_t source_and_scratch_temp) { + uint32_t edram_dword_offset_low_temp, uint32_t edram_dword_offset_high_temp, + uint32_t rt_index, uint32_t source_and_scratch_temp) { // For indexing of the format constants. uint32_t rt_pair_index = rt_index >> 1; uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000; @@ -1958,7 +2152,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); shader_code_.push_back(flags_temp); shader_code_.push_back(EncodeVectorReplicatedOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3)); @@ -1967,10 +2161,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(kRTFlag_Format64bpp); shader_code_.push_back(kRTFlag_FormatFixed); shader_code_.push_back(kRTFlag_FormatFloat10); shader_code_.push_back(kRTFlag_FormatFloat16); + shader_code_.push_back(0); ++stat_.instruction_count; ++stat_.uint_instruction_count; @@ -2048,7 +2242,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); shader_code_.push_back(source_and_scratch_temp); shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); shader_code_.push_back(flags_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); @@ -2073,7 +2267,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( D3D10_SB_INSTRUCTION_TEST_NONZERO) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); shader_code_.push_back(flags_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -2380,7 +2574,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); shader_code_.push_back(source_and_scratch_temp); shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); shader_code_.push_back(flags_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); @@ -2394,46 +2588,46 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( PopSystemTemp(); // Pack and store the low 32 bits. - uint32_t pack_low_temp = PushSystemTemp(true); + uint32_t pack_temp = PushSystemTemp(true); + // 1) Insert color components into different vector components. - system_constants_used_ |= ((1ull << kSysConst_EDRAMRTPackWidthLowRT0_Index) | - (1ull << kSysConst_EDRAMRTPackOffsetLowRT0_Index)) - << rt_index; + uint32_t pack_width_low_temp = PushSystemTemp(); + uint32_t pack_offset_low_temp = PushSystemTemp(); + CompletePixelShader_WriteToROV_ExtractPackLayout( + rt_index, false, pack_width_low_temp, pack_offset_low_temp); shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_BFI) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(pack_low_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMRTPackWidthLowRT0_Vec + rt_index); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMRTPackOffsetLowRT0_Vec + rt_index); + shader_code_.push_back(pack_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(pack_width_low_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(pack_offset_low_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(source_and_scratch_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; + // Release pack_width_low_temp and pack_offset_low_temp. + PopSystemTemp(2); // 2) Merge XY and ZW. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; // 3) Merge X and Y. @@ -2441,13 +2635,13 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; // 4) Write the low 32 bits. @@ -2460,13 +2654,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( shader_code_.push_back(0); shader_code_.push_back( EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1)); - shader_code_.push_back(edram_dword_offset_temp); + shader_code_.push_back(edram_dword_offset_low_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(pack_low_temp); + shader_code_.push_back(pack_temp); ++stat_.instruction_count; ++stat_.c_texture_store_instructions; - // Release pack_low_temp. + + // Release pack_temp. PopSystemTemp(); // TODO(Triang3l): 64bpp packing and storing. @@ -2480,51 +2675,51 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Calculate the offsets of the samples in the EDRAM. // *************************************************************************** - uint32_t edram_coord_temp = PushSystemTemp(); + uint32_t edram_coord_low_temp = PushSystemTemp(); - // Load SV_Position in edram_coord_temp.xy as an integer. + // Load SV_Position in edram_coord_low_temp.xy as an integer. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1)); shader_code_.push_back(kPSInPositionRegister); ++stat_.instruction_count; ++stat_.conversion_instruction_count; - // Load X tile index to edram_coord_temp.z, part 1 of the division by 80 - + // Load X tile index to edram_coord_low_temp.z, part 1 of the division by 80 - // get the high 32 bits of the result of the multiplication by 0xCCCCCCCD. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMUL) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_0_COMPONENT) | ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_NULL) | ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_0D)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back(0xCCCCCCCDu); ++stat_.instruction_count; ++stat_.uint_instruction_count; - // Load tile index to edram_coord_temp.zw. Part 2 of the division by 80 - + // Load tile index to edram_coord_low_temp.zw. Part 2 of the division by 80 - // right shift the high bits of x*0xCCCCCCCD by 6. And divide by 16 by right // shifting by 4. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1100, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01100100, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(0); @@ -2540,10 +2735,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b11101110, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(uint32_t(-80)); @@ -2552,7 +2747,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(0); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); ++stat_.instruction_count; ++stat_.int_instruction_count; @@ -2563,16 +2758,16 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Calculate the address in the EDRAM buffer. // 1) Multiply tile Y index by the pitch and add X tile index to it to - // edram_coord_temp.z. + // edram_coord_low_temp.z. system_constants_used_ |= 1ull << kSysConst_EDRAMPitchTiles_Index; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_EDRAMPitchTiles_Comp, 3)); @@ -2581,47 +2776,100 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(kSysConst_EDRAMPitchTiles_Vec); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; - // 2) Get dword offset within the tile to edram_coord_temp.x. + // TODO(Triang3l): For depth, swap 40-column groups into Y. + + // 2) Get dword offset within the tile to edram_coord_low_temp.x. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back(80); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; // 3) Combine the tile offset and the offset within the tile to - // edram_coord_temp.x. + // edram_coord_low_temp.x. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back(1280); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; + // Adjust the offsets for 64 bits per pixel. + + uint32_t edram_coord_high_temp = PushSystemTemp(); + + // Get which render targets are 64bpp, as log2 of dword count per pixel. + system_constants_used_ |= 1ull << kSysConst_EDRAMRTPackWidthHigh_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(edram_coord_high_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMRTPackWidthHigh_Vec); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // Multiply the offsets by 1 or 2 depending on the number of bits per pixel. + // It's okay to do this here because everything in the equation (at least for + // Xenia's representation of the EDRAM - may not be true on the real console) + // needs to be multiplied by 2 - Y tile index (the same as multipying the + // pitch by 2), X tile index (it addresses pairs of tiles in this case), and + // the offset within a pair of tiles. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(edram_coord_low_temp); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(edram_coord_low_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(edram_coord_high_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + // Add the EDRAM bases for each render target. // TODO(Triang3l): Do this for depth to a separate register. system_constants_used_ |= 1ull << kSysConst_EDRAMBaseDwords_Index; @@ -2629,10 +2877,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(edram_coord_temp); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(edram_coord_temp); + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(edram_coord_low_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); shader_code_.push_back(cbuffer_index_system_constants_); @@ -2641,6 +2889,21 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.int_instruction_count; + // Get the offsets of the upper 32 bits. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(edram_coord_high_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(edram_coord_low_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(edram_coord_high_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + // *************************************************************************** // Write to color render targets. // *************************************************************************** @@ -2739,8 +3002,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; uint32_t dest_color_temp = PushSystemTemp(); - CompletePixelShader_WriteToROV_LoadColor(edram_coord_temp, rt_index, - dest_color_temp); + CompletePixelShader_WriteToROV_LoadColor( + edram_coord_low_temp, edram_coord_high_temp, rt_index, dest_color_temp); // Blend if needed. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | @@ -2769,7 +3032,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // TODO(Triang3l): Convert to sRGB for k_8_8_8_8_GAMMA. // Write the new color, which may have been modified by blending. - CompletePixelShader_WriteToROV_StoreColor(edram_coord_temp, rt_index, + CompletePixelShader_WriteToROV_StoreColor(edram_coord_low_temp, + edram_coord_high_temp, rt_index, system_temp_color_[rt_index]); // Close the check whether the RT is used. @@ -2778,8 +3042,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; } - // Release edram_coord_temp, rt_used_temp, rt_load_temp and rt_blend_temp. - PopSystemTemp(4); + // Release edram_coord_low_temp, edram_coord_high_temp, rt_used_temp, + // rt_load_temp and rt_blend_temp. + PopSystemTemp(5); } void DxbcShaderTranslator::CompletePixelShader() { @@ -9301,47 +9566,39 @@ const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator:: // vec4 9 {"xe_edram_rt_flags", RdefTypeIndex::kUint4, 144, 16}, // vec4 10 - {"xe_edram_rt_pack_width_low_rt0", RdefTypeIndex::kUint4, 160, 16}, + {"xe_edram_rt_pack_width_low", RdefTypeIndex::kUint4, 160, 16}, // vec4 11 - {"xe_edram_rt_pack_width_low_rt1", RdefTypeIndex::kUint4, 176, 16}, + {"xe_edram_rt_pack_offset_low", RdefTypeIndex::kUint4, 176, 16}, // vec4 12 - {"xe_edram_rt_pack_width_low_rt2", RdefTypeIndex::kUint4, 192, 16}, + {"xe_edram_rt_pack_width_high", RdefTypeIndex::kUint4, 192, 16}, // vec4 13 - {"xe_edram_rt_pack_width_low_rt3", RdefTypeIndex::kUint4, 208, 16}, + {"xe_edram_rt_pack_offset_high", RdefTypeIndex::kUint4, 208, 16}, // vec4 14 - {"xe_edram_rt_pack_offset_low_rt0", RdefTypeIndex::kUint4, 224, 16}, + {"xe_edram_load_mask_low_rt01", RdefTypeIndex::kUint4, 224, 16}, // vec4 15 - {"xe_edram_rt_pack_offset_low_rt1", RdefTypeIndex::kUint4, 240, 16}, + {"xe_edram_load_mask_low_rt23", RdefTypeIndex::kUint4, 240, 16}, // vec4 16 - {"xe_edram_rt_pack_offset_low_rt2", RdefTypeIndex::kUint4, 256, 16}, + {"xe_edram_load_scale_rt01", RdefTypeIndex::kFloat4, 256, 16}, // vec4 17 - {"xe_edram_rt_pack_offset_low_rt3", RdefTypeIndex::kUint4, 272, 16}, + {"xe_edram_load_scale_rt23", RdefTypeIndex::kFloat4, 272, 16}, // vec4 18 - {"xe_edram_load_mask_low_rt01", RdefTypeIndex::kUint4, 288, 16}, + {"xe_edram_blend_rt01", RdefTypeIndex::kUint4, 288, 16}, // vec4 19 - {"xe_edram_load_mask_low_rt23", RdefTypeIndex::kUint4, 304, 16}, + {"xe_edram_blend_rt23", RdefTypeIndex::kUint4, 304, 16}, // vec4 20 - {"xe_edram_load_scale_rt01", RdefTypeIndex::kFloat4, 320, 16}, + {"xe_edram_blend_constant", RdefTypeIndex::kFloat4, 320, 16}, // vec4 21 - {"xe_edram_load_scale_rt23", RdefTypeIndex::kFloat4, 336, 16}, + {"xe_edram_store_min_rt01", RdefTypeIndex::kFloat4, 336, 16}, // vec4 22 - {"xe_edram_blend_rt01", RdefTypeIndex::kUint4, 352, 16}, + {"xe_edram_store_min_rt23", RdefTypeIndex::kFloat4, 352, 16}, // vec4 23 - {"xe_edram_blend_rt23", RdefTypeIndex::kUint4, 368, 16}, + {"xe_edram_store_max_rt01", RdefTypeIndex::kFloat4, 368, 16}, // vec4 24 - {"xe_edram_blend_constant", RdefTypeIndex::kFloat4, 384, 16}, + {"xe_edram_store_max_rt23", RdefTypeIndex::kFloat4, 384, 16}, // vec4 25 - {"xe_edram_store_min_rt01", RdefTypeIndex::kFloat4, 400, 16}, + {"xe_edram_store_scale_rt01", RdefTypeIndex::kFloat4, 400, 16}, // vec4 26 - {"xe_edram_store_min_rt23", RdefTypeIndex::kFloat4, 416, 16}, - // vec4 27 - {"xe_edram_store_max_rt01", RdefTypeIndex::kFloat4, 432, 16}, - // vec4 28 - {"xe_edram_store_max_rt23", RdefTypeIndex::kFloat4, 448, 16}, - // vec4 29 - {"xe_edram_store_scale_rt01", RdefTypeIndex::kFloat4, 464, 16}, - // vec4 30 - {"xe_edram_store_scale_rt23", RdefTypeIndex::kFloat4, 480, 16}, + {"xe_edram_store_scale_rt23", RdefTypeIndex::kFloat4, 416, 16}, }; void DxbcShaderTranslator::WriteResourceDefinitions() { diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 8037237c8..7859cc5a8 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -47,22 +47,27 @@ class DxbcShaderTranslator : public ShaderTranslator { enum : uint32_t { // Whether the write mask is non-zero. - kRTFlag_Used = 1, + kRTFlag_Used_Shift = 0, + kRTFlag_Used = 1u << kRTFlag_Used_Shift, // Whether the render target needs to be merged with another (if the write // mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is // enabled and it's not no-op). - kRTFlag_Load = kRTFlag_Used << 1, - kRTFlag_Blend = kRTFlag_Load << 1, - // Whether the format is represented by 2 dwords. - kRTFlag_Format64bpp = kRTFlag_Blend << 1, + kRTFlag_Load_Shift = kRTFlag_Used_Shift + 1, + kRTFlag_Load = 1u << kRTFlag_Load_Shift, + kRTFlag_Blend_Shift = kRTFlag_Load_Shift + 1, + kRTFlag_Blend = 1u << kRTFlag_Blend_Shift, + // Whether the format is fixed-point and needs to be converted to integer // (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16). - kRTFlag_FormatFixed = kRTFlag_Format64bpp << 1, + kRTFlag_FormatFixed_Shift = kRTFlag_Blend_Shift + 1, + kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift, // Whether the format is k_2_10_10_10_FLOAT and 7e3 conversion is needed. - kRTFlag_FormatFloat10 = kRTFlag_FormatFixed << 1, + kRTFlag_FormatFloat10_Shift = kRTFlag_FormatFixed_Shift + 1, + kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift, // Whether the format is k_16_16_FLOAT or k_16_16_16_16_FLOAT and // f16tof32/f32tof16 is needed. - kRTFlag_FormatFloat16 = kRTFlag_FormatFloat10 << 1, + kRTFlag_FormatFloat16_Shift = kRTFlag_FormatFloat10_Shift + 1, + kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift, }; enum : uint32_t { @@ -230,48 +235,59 @@ class DxbcShaderTranslator : public ShaderTranslator { // Binding and format info flags. uint32_t edram_rt_flags[4]; - // vec4 10:13 - // Format info - widths of components in the lower 32 bits (for ibfe/bfi). - uint32_t edram_rt_pack_width_low[4][4]; + // vec4 10 + // Format info - widths of components in the lower 32 bits (for ibfe/bfi), + // packed as 8:8:8:8 for each render target. + uint32_t edram_rt_pack_width_low[4]; - // vec4 14:17 + // vec4 11 // Format info - offsets of components in the lower 32 bits (for ibfe/bfi), - // each in 8 bits. - uint32_t edram_rt_pack_offset_low[4][4]; + // packed as 8:8:8:8 for each render target. + uint32_t edram_rt_pack_offset_low[4]; - // vec4 18:19 + // vec4 12 + // Format info - widths of components in the upper 32 bits (for ibfe/bfi), + // packed as 8:8:8:8 for each render target. + uint32_t edram_rt_pack_width_high[4]; + + // vec4 13 + // Format info - offsets of components in the upper 32 bits (for ibfe/bfi), + // packed as 8:8:8:8 for each render target. + uint32_t edram_rt_pack_offset_high[4]; + + // vec4 14:15 // Format info - mask of color and alpha after unpacking, but before float // conversion. Primarily to differentiate between signed and unsigned // formats because ibfe is used for both since k_16_16 and k_16_16_16_16 are // signed. uint32_t edram_load_mask_rt01_rt23[2][4]; - // vec4 20:21 + // vec4 16:17 // Format info - scale to apply to the color and the alpha of each render // target after unpacking and converting. float edram_load_scale_rt01_rt23[2][4]; - // vec4 22:23 + // vec4 18:19 // Render target blending options. uint32_t edram_blend_rt01_rt23[2][4]; - // vec4 24 + // vec4 20 // The constant blend factor for the respective modes. float edram_blend_constant[4]; - // vec4 25:26 + // vec4 21:22 // Format info - minimum color and alpha values (as float, before // conversion) writable to the each render target. Integer so it's easier to // write infinity. uint32_t edram_store_min_rt01_rt23[2][4]; - // vec4 27:28 + // vec4 23:24 // Format info - maximum color and alpha values (as float, before // conversion) writable to the each render target. Integer so it's easier to // write infinity. uint32_t edram_store_max_rt01_rt23[2][4]; - // vec4 29:30 + // vec4 25:26 // Format info - scale to apply to the color and the alpha of each render // target before converting and packing. float edram_store_scale_rt01_rt23[2][4]; @@ -321,6 +337,13 @@ class DxbcShaderTranslator : public ShaderTranslator { return sampler_bindings_.data(); } + // Returns the bits that need to be added to the RT flags constant - needs to + // be done externally, not in SetColorFormatConstants, because the flags + // contain other state. + static uint32_t GetColorFormatRTFlags(ColorRenderTargetFormat format); + static void SetColorFormatSystemConstants(SystemConstants& constants, + uint32_t rt_index, + ColorRenderTargetFormat format); // Returns whether blending should be done at all (not 1 * src + 0 * dest). static bool GetBlendConstants(uint32_t blend_control, uint32_t& blend_x_out, uint32_t& blend_y_out); @@ -410,47 +433,25 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysConst_EDRAMRTFlags_Index = kSysConst_EDRAMBaseDwords_Index + 1, kSysConst_EDRAMRTFlags_Vec = kSysConst_EDRAMBaseDwords_Vec + 1, - kSysConst_EDRAMRTPackWidthLowRT0_Index = kSysConst_EDRAMRTFlags_Index + 1, - kSysConst_EDRAMRTPackWidthLowRT0_Vec = kSysConst_EDRAMRTFlags_Vec + 1, + kSysConst_EDRAMRTPackWidthLow_Index = kSysConst_EDRAMRTFlags_Index + 1, + kSysConst_EDRAMRTPackWidthLow_Vec = kSysConst_EDRAMRTFlags_Vec + 1, - kSysConst_EDRAMRTPackWidthLowRT1_Index = - kSysConst_EDRAMRTPackWidthLowRT0_Index + 1, - kSysConst_EDRAMRTPackWidthLowRT1_Vec = - kSysConst_EDRAMRTPackWidthLowRT0_Vec + 1, + kSysConst_EDRAMRTPackOffsetLow_Index = + kSysConst_EDRAMRTPackWidthLow_Index + 1, + kSysConst_EDRAMRTPackOffsetLow_Vec = kSysConst_EDRAMRTPackWidthLow_Vec + 1, - kSysConst_EDRAMRTPackWidthLowRT2_Index = - kSysConst_EDRAMRTPackWidthLowRT1_Index + 1, - kSysConst_EDRAMRTPackWidthLowRT2_Vec = - kSysConst_EDRAMRTPackWidthLowRT1_Vec + 1, + kSysConst_EDRAMRTPackWidthHigh_Index = + kSysConst_EDRAMRTPackOffsetLow_Index + 1, + kSysConst_EDRAMRTPackWidthHigh_Vec = kSysConst_EDRAMRTPackOffsetLow_Vec + 1, - kSysConst_EDRAMRTPackWidthLowRT3_Index = - kSysConst_EDRAMRTPackWidthLowRT2_Index + 1, - kSysConst_EDRAMRTPackWidthLowRT3_Vec = - kSysConst_EDRAMRTPackWidthLowRT2_Vec + 1, - - kSysConst_EDRAMRTPackOffsetLowRT0_Index = - kSysConst_EDRAMRTPackWidthLowRT3_Index + 1, - kSysConst_EDRAMRTPackOffsetLowRT0_Vec = - kSysConst_EDRAMRTPackWidthLowRT3_Vec + 1, - - kSysConst_EDRAMRTPackOffsetLowRT1_Index = - kSysConst_EDRAMRTPackOffsetLowRT0_Index + 1, - kSysConst_EDRAMRTPackOffsetLowRT1_Vec = - kSysConst_EDRAMRTPackOffsetLowRT0_Vec + 1, - - kSysConst_EDRAMRTPackOffsetLowRT2_Index = - kSysConst_EDRAMRTPackOffsetLowRT1_Index + 1, - kSysConst_EDRAMRTPackOffsetLowRT2_Vec = - kSysConst_EDRAMRTPackOffsetLowRT1_Vec + 1, - - kSysConst_EDRAMRTPackOffsetLowRT3_Index = - kSysConst_EDRAMRTPackOffsetLowRT2_Index + 1, - kSysConst_EDRAMRTPackOffsetLowRT3_Vec = - kSysConst_EDRAMRTPackOffsetLowRT2_Vec + 1, + kSysConst_EDRAMRTPackOffsetHigh_Index = + kSysConst_EDRAMRTPackWidthHigh_Index + 1, + kSysConst_EDRAMRTPackOffsetHigh_Vec = + kSysConst_EDRAMRTPackWidthHigh_Vec + 1, kSysConst_EDRAMLoadMaskRT01_Index = - kSysConst_EDRAMRTPackOffsetLowRT3_Index + 1, - kSysConst_EDRAMLoadMaskRT01_Vec = kSysConst_EDRAMRTPackOffsetLowRT3_Vec + 1, + kSysConst_EDRAMRTPackOffsetHigh_Index + 1, + kSysConst_EDRAMLoadMaskRT01_Vec = kSysConst_EDRAMRTPackOffsetHigh_Vec + 1, kSysConst_EDRAMLoadMaskRT23_Index = kSysConst_EDRAMLoadMaskRT01_Index + 1, kSysConst_EDRAMLoadMaskRT23_Vec = kSysConst_EDRAMLoadMaskRT01_Vec + 1, @@ -588,8 +589,16 @@ class DxbcShaderTranslator : public ShaderTranslator { // Writing the epilogue. void CompleteVertexShader(); void CompletePixelShader_WriteToRTVs(); + // Extracts widths and offsets of the components in the lower or the upper + // dword of a pixel from the format constants, for use as ibfe and bfi + // operands later. + void CompletePixelShader_WriteToROV_ExtractPackLayout(uint32_t rt_index, + bool high, + uint32_t width_temp, + uint32_t offset_temp); void CompletePixelShader_WriteToROV_LoadColor( - uint32_t edram_dword_offset_temp, uint32_t rt_index, + uint32_t edram_dword_offset_low_temp, + uint32_t edram_dword_offset_high_temp, uint32_t rt_index, uint32_t target_temp); void CompletePixelShader_WriteToROV_Blend(uint32_t rt_index, uint32_t src_color_and_output_temp, @@ -606,7 +615,8 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t shift_x, uint32_t shift_y, uint32_t shift_z, uint32_t shift_w, uint32_t target_temp, uint32_t write_mask = 0b1111); void CompletePixelShader_WriteToROV_StoreColor( - uint32_t edram_dword_offset_temp, uint32_t rt_index, + uint32_t edram_dword_offset_low_temp, + uint32_t edram_dword_offset_high_temp, uint32_t rt_index, uint32_t source_and_scratch_temp); void CompletePixelShader_WriteToROV(); void CompletePixelShader();