Merge branch 'master' into vulkan
This commit is contained in:
commit
e191430091
|
@ -112,7 +112,8 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
|
|||
xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
|
||||
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32;
|
||||
uint32_t index_buffer_read_count =
|
||||
std::min(vgt_draw_initiator.num_indices, vgt_dma_size.num_words);
|
||||
std::min(uint32_t(vgt_draw_initiator.num_indices),
|
||||
uint32_t(vgt_dma_size.num_words));
|
||||
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16) {
|
||||
// Handle the index endianness to same way as the PrimitiveProcessor.
|
||||
if (index_endian == xenos::Endian::k8in32) {
|
||||
|
@ -281,7 +282,8 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
|
|||
scissor_bottom += window_y_offset;
|
||||
}
|
||||
auto pa_sc_screen_scissor_br = regs.Get<reg::PA_SC_SCREEN_SCISSOR_BR>();
|
||||
scissor_bottom = std::min(scissor_bottom, pa_sc_screen_scissor_br.br_y);
|
||||
scissor_bottom =
|
||||
std::min(scissor_bottom, int32_t(pa_sc_screen_scissor_br.br_y));
|
||||
uint32_t max_y = uint32_t(std::max(scissor_bottom, int32_t(0)));
|
||||
|
||||
if (regs.Get<reg::PA_CL_CLIP_CNTL>().clip_disable) {
|
||||
|
@ -302,7 +304,8 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
|
|||
if (scissor_window_offset) {
|
||||
scissor_right += pa_sc_window_offset.window_x_offset;
|
||||
}
|
||||
scissor_right = std::min(scissor_right, pa_sc_screen_scissor_br.br_x);
|
||||
scissor_right =
|
||||
std::min(scissor_right, int32_t(pa_sc_screen_scissor_br.br_x));
|
||||
if (scissor_right >= xenos::kTexture2DCubeMaxWidthHeight) {
|
||||
estimate_vertex_max_y = true;
|
||||
}
|
||||
|
|
|
@ -576,11 +576,11 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
|
|||
// Screen scissor is not used by Direct3D 9 (always 0, 0 to 8192, 8192), but
|
||||
// still handled here for completeness.
|
||||
auto pa_sc_screen_scissor_tl = regs.Get<reg::PA_SC_SCREEN_SCISSOR_TL>();
|
||||
tl_x = std::max(tl_x, pa_sc_screen_scissor_tl.tl_x);
|
||||
tl_y = std::max(tl_y, pa_sc_screen_scissor_tl.tl_y);
|
||||
tl_x = std::max(tl_x, int32_t(pa_sc_screen_scissor_tl.tl_x));
|
||||
tl_y = std::max(tl_y, int32_t(pa_sc_screen_scissor_tl.tl_y));
|
||||
auto pa_sc_screen_scissor_br = regs.Get<reg::PA_SC_SCREEN_SCISSOR_BR>();
|
||||
br_x = std::min(br_x, pa_sc_screen_scissor_br.br_x);
|
||||
br_y = std::min(br_y, pa_sc_screen_scissor_br.br_y);
|
||||
br_x = std::min(br_x, int32_t(pa_sc_screen_scissor_br.br_x));
|
||||
br_y = std::min(br_y, int32_t(pa_sc_screen_scissor_br.br_y));
|
||||
if (clamp_to_surface_pitch) {
|
||||
// Clamp the horizontal scissor to surface_pitch for safety, in case that's
|
||||
// not done by the guest for some reason (it's not when doing draws without
|
||||
|
|
|
@ -3236,7 +3236,8 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
|
||||
// Coverage output for alpha to mask (SV_Coverage).
|
||||
size_t coverage_position = SIZE_MAX;
|
||||
if (color_targets_written & 0b1) {
|
||||
if ((color_targets_written & 0b1) &&
|
||||
!IsForceEarlyDepthStencilGlobalFlagEnabled()) {
|
||||
coverage_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() + kParameterDwords);
|
||||
++parameter_count;
|
||||
|
@ -3364,14 +3365,11 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
|
||||
// Don't allow refactoring when converting to native code to maintain position
|
||||
// invariance (needed even in pixel shaders for oDepth invariance).
|
||||
uint32_t global_flags = 0;
|
||||
if (is_pixel_shader() &&
|
||||
GetDxbcShaderModification().pixel.depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kEarlyHint &&
|
||||
!edram_rov_used_ && current_shader().implicit_early_z_write_allowed()) {
|
||||
global_flags |= dxbc::kGlobalFlagForceEarlyDepthStencil;
|
||||
}
|
||||
ao_.OpDclGlobalFlags(global_flags);
|
||||
bool global_flag_force_early_depth_stencil =
|
||||
IsForceEarlyDepthStencilGlobalFlagEnabled();
|
||||
ao_.OpDclGlobalFlags(global_flag_force_early_depth_stencil
|
||||
? dxbc::kGlobalFlagForceEarlyDepthStencil
|
||||
: 0);
|
||||
|
||||
// Constant buffers, from most frequenly accessed to least frequently accessed
|
||||
// (the order is a hint to the driver according to the DXBC header).
|
||||
|
@ -3655,7 +3653,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
}
|
||||
// Coverage output for alpha to mask.
|
||||
if (color_targets_written & 0b1) {
|
||||
if ((color_targets_written & 0b1) &&
|
||||
!global_flag_force_early_depth_stencil) {
|
||||
ao_.OpDclOutput(dxbc::Dest::OMask());
|
||||
}
|
||||
// Depth output.
|
||||
|
|
|
@ -656,6 +656,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
GetDxbcShaderModification().vertex.host_vertex_shader_type);
|
||||
}
|
||||
|
||||
bool IsForceEarlyDepthStencilGlobalFlagEnabled() const {
|
||||
return is_pixel_shader() &&
|
||||
GetDxbcShaderModification().pixel.depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kEarlyHint &&
|
||||
!edram_rov_used_ &&
|
||||
current_shader().implicit_early_z_write_allowed();
|
||||
}
|
||||
|
||||
// Whether to use switch-case rather than if (pc >= label) for control flow.
|
||||
bool UseSwitchForControlFlow() const;
|
||||
|
||||
|
|
|
@ -1979,7 +1979,8 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample(
|
|||
|
||||
void DxbcShaderTranslator::CompletePixelShader_AlphaToMask() {
|
||||
// Check if alpha to coverage can be done at all in this shader.
|
||||
if (!current_shader().writes_color_target(0)) {
|
||||
if (!current_shader().writes_color_target(0) ||
|
||||
IsForceEarlyDepthStencilGlobalFlagEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2987,7 +2988,8 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
|||
return;
|
||||
}
|
||||
|
||||
if (current_shader().writes_color_target(0)) {
|
||||
if (current_shader().writes_color_target(0) &&
|
||||
!IsForceEarlyDepthStencilGlobalFlagEnabled()) {
|
||||
// Alpha test.
|
||||
// X - mask, then masked result (SGPR for loading, VGPR for masking).
|
||||
// Y - operation result (SGPR for mask operations, VGPR for alpha
|
||||
|
|
|
@ -498,7 +498,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
|||
// std::min for safety, to avoid negative numbers in case it's completely
|
||||
// wrong.
|
||||
edram_bases[0] =
|
||||
std::min(rb_depth_info.depth_base, xenos::kEdramTileCount);
|
||||
std::min(uint32_t(rb_depth_info.depth_base), xenos::kEdramTileCount);
|
||||
// With pixel shader interlock, always the same addressing disregarding
|
||||
// the format.
|
||||
resource_formats[0] =
|
||||
|
@ -513,7 +513,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
|||
uint32_t rt_bit_index = 1 + i;
|
||||
depth_and_color_rts_used_bits |= uint32_t(1) << rt_bit_index;
|
||||
edram_bases[rt_bit_index] =
|
||||
std::min(color_info.color_base, xenos::kEdramTileCount);
|
||||
std::min(uint32_t(color_info.color_base), xenos::kEdramTileCount);
|
||||
xenos::ColorRenderTargetFormat color_format =
|
||||
regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[i])
|
||||
|
@ -1054,22 +1054,22 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
|
|||
uint32_t base_offset_rows_at_32bpp =
|
||||
base_offset_tiles_at_32bpp / pitch_tiles_at_32bpp;
|
||||
Transfer::Rectangle clear_rectangle;
|
||||
clear_rectangle.x_pixels =
|
||||
std::min((base_offset_tiles_at_32bpp -
|
||||
base_offset_rows_at_32bpp * pitch_tiles_at_32bpp) *
|
||||
(xenos::kEdramTileWidthSamples >> msaa_samples_x_log2) +
|
||||
(resolve_info.coordinate_info.edram_offset_x_div_8 << 3),
|
||||
pitch_pixels);
|
||||
clear_rectangle.y_pixels =
|
||||
std::min(base_offset_rows_at_32bpp *
|
||||
(xenos::kEdramTileHeightSamples >> msaa_samples_y_log2) +
|
||||
(resolve_info.coordinate_info.edram_offset_y_div_8 << 3),
|
||||
render_target_height_pixels);
|
||||
clear_rectangle.x_pixels = std::min(
|
||||
(base_offset_tiles_at_32bpp -
|
||||
base_offset_rows_at_32bpp * pitch_tiles_at_32bpp) *
|
||||
(xenos::kEdramTileWidthSamples >> msaa_samples_x_log2) +
|
||||
(uint32_t(resolve_info.coordinate_info.edram_offset_x_div_8) << 3),
|
||||
pitch_pixels);
|
||||
clear_rectangle.y_pixels = std::min(
|
||||
base_offset_rows_at_32bpp *
|
||||
(xenos::kEdramTileHeightSamples >> msaa_samples_y_log2) +
|
||||
(uint32_t(resolve_info.coordinate_info.edram_offset_y_div_8) << 3),
|
||||
render_target_height_pixels);
|
||||
clear_rectangle.width_pixels =
|
||||
std::min(resolve_info.coordinate_info.width_div_8 << 3,
|
||||
std::min(uint32_t(resolve_info.coordinate_info.width_div_8) << 3,
|
||||
pitch_pixels - clear_rectangle.x_pixels);
|
||||
clear_rectangle.height_pixels =
|
||||
std::min(resolve_info.coordinate_info.height_div_8 << 3,
|
||||
std::min(uint32_t(resolve_info.coordinate_info.height_div_8) << 3,
|
||||
render_target_height_pixels - clear_rectangle.y_pixels);
|
||||
if (!clear_rectangle.width_pixels || !clear_rectangle.height_pixels) {
|
||||
// Outside the pitch / height (or initially specified as 0).
|
||||
|
|
|
@ -73,7 +73,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
|||
info.pitch = fetch.pitch << 5;
|
||||
|
||||
info.mip_min_level = fetch.mip_min_level;
|
||||
info.mip_max_level = std::max(fetch.mip_min_level, fetch.mip_max_level);
|
||||
info.mip_max_level =
|
||||
std::max(uint32_t(fetch.mip_min_level), uint32_t(fetch.mip_max_level));
|
||||
|
||||
info.is_tiled = fetch.tiled;
|
||||
info.has_packed_mips = fetch.packed_mips;
|
||||
|
|
|
@ -85,9 +85,10 @@ void GetSubresourcesFromFetchConstant(
|
|||
mip_min_level = 0;
|
||||
mip_max_level = 0;
|
||||
} else {
|
||||
mip_min_level = std::min(fetch.mip_min_level, size_mip_max_level);
|
||||
mip_max_level = std::max(std::min(fetch.mip_max_level, size_mip_max_level),
|
||||
mip_min_level);
|
||||
mip_min_level = std::min(uint32_t(fetch.mip_min_level), size_mip_max_level);
|
||||
mip_max_level =
|
||||
std::max(std::min(uint32_t(fetch.mip_max_level), size_mip_max_level),
|
||||
mip_min_level);
|
||||
}
|
||||
if (mip_max_level != 0) {
|
||||
if (base_page == 0) {
|
||||
|
@ -260,7 +261,8 @@ TextureGuestLayout GetGuestTextureLayout(
|
|||
if (layout.packed_level != 0) {
|
||||
std::memset(&layout.mips[0], 0, sizeof(layout.mips[0]));
|
||||
}
|
||||
uint32_t max_stored_level = std::min(max_level, layout.packed_level);
|
||||
uint32_t max_stored_level =
|
||||
std::min(max_level, uint32_t(layout.packed_level));
|
||||
{
|
||||
uint32_t mips_end = max_stored_level + 1;
|
||||
assert_true(mips_end <= xe::countof(layout.mips));
|
||||
|
|
Loading…
Reference in New Issue