Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2022-06-26 16:58:27 +03:00
commit e191430091
8 changed files with 54 additions and 39 deletions

View File

@ -112,7 +112,8 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32;
uint32_t index_buffer_read_count =
std::min(vgt_draw_initiator.num_indices, vgt_dma_size.num_words);
std::min(uint32_t(vgt_draw_initiator.num_indices),
uint32_t(vgt_dma_size.num_words));
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16) {
// Handle the index endianness to same way as the PrimitiveProcessor.
if (index_endian == xenos::Endian::k8in32) {
@ -281,7 +282,8 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
scissor_bottom += window_y_offset;
}
auto pa_sc_screen_scissor_br = regs.Get<reg::PA_SC_SCREEN_SCISSOR_BR>();
scissor_bottom = std::min(scissor_bottom, pa_sc_screen_scissor_br.br_y);
scissor_bottom =
std::min(scissor_bottom, int32_t(pa_sc_screen_scissor_br.br_y));
uint32_t max_y = uint32_t(std::max(scissor_bottom, int32_t(0)));
if (regs.Get<reg::PA_CL_CLIP_CNTL>().clip_disable) {
@ -302,7 +304,8 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
if (scissor_window_offset) {
scissor_right += pa_sc_window_offset.window_x_offset;
}
scissor_right = std::min(scissor_right, pa_sc_screen_scissor_br.br_x);
scissor_right =
std::min(scissor_right, int32_t(pa_sc_screen_scissor_br.br_x));
if (scissor_right >= xenos::kTexture2DCubeMaxWidthHeight) {
estimate_vertex_max_y = true;
}

View File

@ -576,11 +576,11 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
// Screen scissor is not used by Direct3D 9 (always 0, 0 to 8192, 8192), but
// still handled here for completeness.
auto pa_sc_screen_scissor_tl = regs.Get<reg::PA_SC_SCREEN_SCISSOR_TL>();
tl_x = std::max(tl_x, pa_sc_screen_scissor_tl.tl_x);
tl_y = std::max(tl_y, pa_sc_screen_scissor_tl.tl_y);
tl_x = std::max(tl_x, int32_t(pa_sc_screen_scissor_tl.tl_x));
tl_y = std::max(tl_y, int32_t(pa_sc_screen_scissor_tl.tl_y));
auto pa_sc_screen_scissor_br = regs.Get<reg::PA_SC_SCREEN_SCISSOR_BR>();
br_x = std::min(br_x, pa_sc_screen_scissor_br.br_x);
br_y = std::min(br_y, pa_sc_screen_scissor_br.br_y);
br_x = std::min(br_x, int32_t(pa_sc_screen_scissor_br.br_x));
br_y = std::min(br_y, int32_t(pa_sc_screen_scissor_br.br_y));
if (clamp_to_surface_pitch) {
// Clamp the horizontal scissor to surface_pitch for safety, in case that's
// not done by the guest for some reason (it's not when doing draws without

View File

@ -3236,7 +3236,8 @@ void DxbcShaderTranslator::WriteOutputSignature() {
// Coverage output for alpha to mask (SV_Coverage).
size_t coverage_position = SIZE_MAX;
if (color_targets_written & 0b1) {
if ((color_targets_written & 0b1) &&
!IsForceEarlyDepthStencilGlobalFlagEnabled()) {
coverage_position = shader_object_.size();
shader_object_.resize(shader_object_.size() + kParameterDwords);
++parameter_count;
@ -3364,14 +3365,11 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Don't allow refactoring when converting to native code to maintain position
// invariance (needed even in pixel shaders for oDepth invariance).
uint32_t global_flags = 0;
if (is_pixel_shader() &&
GetDxbcShaderModification().pixel.depth_stencil_mode ==
Modification::DepthStencilMode::kEarlyHint &&
!edram_rov_used_ && current_shader().implicit_early_z_write_allowed()) {
global_flags |= dxbc::kGlobalFlagForceEarlyDepthStencil;
}
ao_.OpDclGlobalFlags(global_flags);
bool global_flag_force_early_depth_stencil =
IsForceEarlyDepthStencilGlobalFlagEnabled();
ao_.OpDclGlobalFlags(global_flag_force_early_depth_stencil
? dxbc::kGlobalFlagForceEarlyDepthStencil
: 0);
// Constant buffers, from most frequenly accessed to least frequently accessed
// (the order is a hint to the driver according to the DXBC header).
@ -3655,7 +3653,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
}
// Coverage output for alpha to mask.
if (color_targets_written & 0b1) {
if ((color_targets_written & 0b1) &&
!global_flag_force_early_depth_stencil) {
ao_.OpDclOutput(dxbc::Dest::OMask());
}
// Depth output.

View File

@ -656,6 +656,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
GetDxbcShaderModification().vertex.host_vertex_shader_type);
}
bool IsForceEarlyDepthStencilGlobalFlagEnabled() const {
return is_pixel_shader() &&
GetDxbcShaderModification().pixel.depth_stencil_mode ==
Modification::DepthStencilMode::kEarlyHint &&
!edram_rov_used_ &&
current_shader().implicit_early_z_write_allowed();
}
// Whether to use switch-case rather than if (pc >= label) for control flow.
bool UseSwitchForControlFlow() const;

View File

@ -1979,7 +1979,8 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample(
void DxbcShaderTranslator::CompletePixelShader_AlphaToMask() {
// Check if alpha to coverage can be done at all in this shader.
if (!current_shader().writes_color_target(0)) {
if (!current_shader().writes_color_target(0) ||
IsForceEarlyDepthStencilGlobalFlagEnabled()) {
return;
}
@ -2987,7 +2988,8 @@ void DxbcShaderTranslator::CompletePixelShader() {
return;
}
if (current_shader().writes_color_target(0)) {
if (current_shader().writes_color_target(0) &&
!IsForceEarlyDepthStencilGlobalFlagEnabled()) {
// Alpha test.
// X - mask, then masked result (SGPR for loading, VGPR for masking).
// Y - operation result (SGPR for mask operations, VGPR for alpha

View File

@ -498,7 +498,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
// std::min for safety, to avoid negative numbers in case it's completely
// wrong.
edram_bases[0] =
std::min(rb_depth_info.depth_base, xenos::kEdramTileCount);
std::min(uint32_t(rb_depth_info.depth_base), xenos::kEdramTileCount);
// With pixel shader interlock, always the same addressing disregarding
// the format.
resource_formats[0] =
@ -513,7 +513,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
uint32_t rt_bit_index = 1 + i;
depth_and_color_rts_used_bits |= uint32_t(1) << rt_bit_index;
edram_bases[rt_bit_index] =
std::min(color_info.color_base, xenos::kEdramTileCount);
std::min(uint32_t(color_info.color_base), xenos::kEdramTileCount);
xenos::ColorRenderTargetFormat color_format =
regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[i])
@ -1054,22 +1054,22 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
uint32_t base_offset_rows_at_32bpp =
base_offset_tiles_at_32bpp / pitch_tiles_at_32bpp;
Transfer::Rectangle clear_rectangle;
clear_rectangle.x_pixels =
std::min((base_offset_tiles_at_32bpp -
base_offset_rows_at_32bpp * pitch_tiles_at_32bpp) *
(xenos::kEdramTileWidthSamples >> msaa_samples_x_log2) +
(resolve_info.coordinate_info.edram_offset_x_div_8 << 3),
pitch_pixels);
clear_rectangle.y_pixels =
std::min(base_offset_rows_at_32bpp *
(xenos::kEdramTileHeightSamples >> msaa_samples_y_log2) +
(resolve_info.coordinate_info.edram_offset_y_div_8 << 3),
render_target_height_pixels);
clear_rectangle.x_pixels = std::min(
(base_offset_tiles_at_32bpp -
base_offset_rows_at_32bpp * pitch_tiles_at_32bpp) *
(xenos::kEdramTileWidthSamples >> msaa_samples_x_log2) +
(uint32_t(resolve_info.coordinate_info.edram_offset_x_div_8) << 3),
pitch_pixels);
clear_rectangle.y_pixels = std::min(
base_offset_rows_at_32bpp *
(xenos::kEdramTileHeightSamples >> msaa_samples_y_log2) +
(uint32_t(resolve_info.coordinate_info.edram_offset_y_div_8) << 3),
render_target_height_pixels);
clear_rectangle.width_pixels =
std::min(resolve_info.coordinate_info.width_div_8 << 3,
std::min(uint32_t(resolve_info.coordinate_info.width_div_8) << 3,
pitch_pixels - clear_rectangle.x_pixels);
clear_rectangle.height_pixels =
std::min(resolve_info.coordinate_info.height_div_8 << 3,
std::min(uint32_t(resolve_info.coordinate_info.height_div_8) << 3,
render_target_height_pixels - clear_rectangle.y_pixels);
if (!clear_rectangle.width_pixels || !clear_rectangle.height_pixels) {
// Outside the pitch / height (or initially specified as 0).

View File

@ -73,7 +73,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
info.pitch = fetch.pitch << 5;
info.mip_min_level = fetch.mip_min_level;
info.mip_max_level = std::max(fetch.mip_min_level, fetch.mip_max_level);
info.mip_max_level =
std::max(uint32_t(fetch.mip_min_level), uint32_t(fetch.mip_max_level));
info.is_tiled = fetch.tiled;
info.has_packed_mips = fetch.packed_mips;

View File

@ -85,9 +85,10 @@ void GetSubresourcesFromFetchConstant(
mip_min_level = 0;
mip_max_level = 0;
} else {
mip_min_level = std::min(fetch.mip_min_level, size_mip_max_level);
mip_max_level = std::max(std::min(fetch.mip_max_level, size_mip_max_level),
mip_min_level);
mip_min_level = std::min(uint32_t(fetch.mip_min_level), size_mip_max_level);
mip_max_level =
std::max(std::min(uint32_t(fetch.mip_max_level), size_mip_max_level),
mip_min_level);
}
if (mip_max_level != 0) {
if (base_page == 0) {
@ -260,7 +261,8 @@ TextureGuestLayout GetGuestTextureLayout(
if (layout.packed_level != 0) {
std::memset(&layout.mips[0], 0, sizeof(layout.mips[0]));
}
uint32_t max_stored_level = std::min(max_level, layout.packed_level);
uint32_t max_stored_level =
std::min(max_level, uint32_t(layout.packed_level));
{
uint32_t mips_end = max_stored_level + 1;
assert_true(mips_end <= xe::countof(layout.mips));