diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 87eaf092c..8b70f182b 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -564,48 +564,54 @@ void GetHostViewportInfo(const RegisterFile& regs, uint32_t resolution_scale, } } -void GetScissor(const RegisterFile& regs, Scissor& scissor_out) { - // FIXME(Triang3l): Screen scissor isn't applied here, but it seems to be - // unused on Xbox 360 Direct3D 9. - // TODO(Triang3l): Clamp X to RB_SURFACE_INFO::surface_pitch to prevent - // overflow with target-indepdent rasterization (with ROV). +void GetScissor(const RegisterFile& regs, Scissor& scissor_out, + bool clamp_to_surface_pitch) { auto pa_sc_window_scissor_tl = regs.Get(); + int32_t tl_x = int32_t(pa_sc_window_scissor_tl.tl_x); + int32_t tl_y = int32_t(pa_sc_window_scissor_tl.tl_y); auto pa_sc_window_scissor_br = regs.Get(); - uint32_t tl_x = pa_sc_window_scissor_tl.tl_x; - uint32_t tl_y = pa_sc_window_scissor_tl.tl_y; - uint32_t br_x = pa_sc_window_scissor_br.br_x; - uint32_t br_y = pa_sc_window_scissor_br.br_y; + int32_t br_x = int32_t(pa_sc_window_scissor_br.br_x); + int32_t br_y = int32_t(pa_sc_window_scissor_br.br_y); if (!pa_sc_window_scissor_tl.window_offset_disable) { auto pa_sc_window_offset = regs.Get(); - tl_x = uint32_t(std::max( - int32_t(tl_x) + pa_sc_window_offset.window_x_offset, int32_t(0))); - tl_y = uint32_t(std::max( - int32_t(tl_y) + pa_sc_window_offset.window_y_offset, int32_t(0))); - br_x = uint32_t(std::max( - int32_t(br_x) + pa_sc_window_offset.window_x_offset, int32_t(0))); - br_y = uint32_t(std::max( - int32_t(br_y) + pa_sc_window_offset.window_y_offset, int32_t(0))); + tl_x += pa_sc_window_offset.window_x_offset; + tl_y += pa_sc_window_offset.window_y_offset; + br_x += pa_sc_window_offset.window_x_offset; + br_y += pa_sc_window_offset.window_y_offset; + } + // Screen scissor is not used by Direct3D 9 (always 0, 0 to 8192, 8192), but + // still handled here for completeness. + auto pa_sc_screen_scissor_tl = regs.Get(); + tl_x = std::max(tl_x, pa_sc_screen_scissor_tl.tl_x); + tl_y = std::max(tl_y, pa_sc_screen_scissor_tl.tl_y); + auto pa_sc_screen_scissor_br = regs.Get(); + br_x = std::min(br_x, pa_sc_screen_scissor_br.br_x); + br_y = std::min(br_y, pa_sc_screen_scissor_br.br_y); + if (clamp_to_surface_pitch) { + // Clamp the horizontal scissor to surface_pitch for safety, in case that's + // not done by the guest for some reason (it's not when doing draws without + // clipping in Direct3D 9, for instance), to prevent overflow - this is + // important for host implementations, both based on target-indepedent + // rasterization without render target width at all (pixel shader + // interlock-based custom RB implementations) and using conventional render + // targets, but padded to EDRAM tiles. + uint32_t surface_pitch = regs.Get().surface_pitch; + tl_x = std::min(tl_x, int32_t(surface_pitch)); + br_x = std::min(br_x, int32_t(surface_pitch)); } - // Clamp the horizontal scissor to surface_pitch for safety, in case that's - // not done by the guest for some reason (it's not when doing draws completely - // without a viewport, for instance), to prevent overflow - this is important - // for host implementations, both based on target-indepedent rasterization - // without render target width at all (pixel shader interlocks-based custom RB - // implementations) and using conventional render targets, but padded to EDRAM - // tiles. - uint32_t surface_pitch = regs.Get().surface_pitch; - tl_x = std::min(tl_x, surface_pitch); - br_x = std::min(br_x, surface_pitch); // Ensure the rectangle is non-negative, by collapsing it into a 0-sized one // (not by reordering the bounds preserving the width / height, which would // reveal samples not meant to be covered, unless TL > BR does that on a real - // console, but no evidence of such has ever been seen). + // console, but no evidence of such has ever been seen), and also drop + // negative offsets. + tl_x = std::max(tl_x, int32_t(0)); + tl_y = std::max(tl_y, int32_t(0)); br_x = std::max(br_x, tl_x); br_y = std::max(br_y, tl_y); - scissor_out.offset[0] = tl_x; - scissor_out.offset[1] = tl_y; - scissor_out.extent[0] = br_x - tl_x; - scissor_out.extent[1] = br_y - tl_y; + scissor_out.offset[0] = uint32_t(tl_x); + scissor_out.offset[1] = uint32_t(tl_y); + scissor_out.extent[0] = uint32_t(br_x - tl_x); + scissor_out.extent[1] = uint32_t(br_y - tl_y); } xenos::CopySampleSelect SanitizeCopySampleSelect( @@ -780,28 +786,17 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, } // Apply the scissor and prevent negative origin (behind the EDRAM base). - auto pa_sc_window_scissor_tl = regs.Get(); - auto pa_sc_window_scissor_br = regs.Get(); - int32_t scissor_x0 = int32_t(pa_sc_window_scissor_tl.tl_x); - int32_t scissor_y0 = int32_t(pa_sc_window_scissor_tl.tl_y); - int32_t scissor_x1 = - std::max(int32_t(pa_sc_window_scissor_br.br_x), scissor_x0); - int32_t scissor_y1 = - std::max(int32_t(pa_sc_window_scissor_br.br_y), scissor_y0); - if (!pa_sc_window_scissor_tl.window_offset_disable) { - scissor_x0 = - std::max(scissor_x0 + pa_sc_window_offset.window_x_offset, int32_t(0)); - scissor_y0 = - std::max(scissor_y0 + pa_sc_window_offset.window_y_offset, int32_t(0)); - scissor_x1 = - std::max(scissor_x1 + pa_sc_window_offset.window_x_offset, int32_t(0)); - scissor_y1 = - std::max(scissor_y1 + pa_sc_window_offset.window_y_offset, int32_t(0)); - } - x0 = xe::clamp(x0, scissor_x0, scissor_x1); - y0 = xe::clamp(y0, scissor_y0, scissor_y1); - x1 = xe::clamp(x1, scissor_x0, scissor_x1); - y1 = xe::clamp(y1, scissor_y0, scissor_y1); + Scissor scissor; + // False because clamping to the surface pitch will be done later (it will be + // aligned to the resolve alignment here, for resolving from render targets + // with a pitch that is not a multiple of 8). + GetScissor(regs, scissor, false); + int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]); + int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]); + x0 = xe::clamp(x0, int32_t(scissor.offset[0]), scissor_right); + y0 = xe::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom); + x1 = xe::clamp(x1, int32_t(scissor.offset[0]), scissor_right); + y1 = xe::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom); assert_true(x0 <= x1 && y0 <= y1); diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 193aabca5..5608181aa 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -118,7 +118,8 @@ struct Scissor { // Extent can be zero. uint32_t extent[2]; }; -void GetScissor(const RegisterFile& regs, Scissor& scissor_out); +void GetScissor(const RegisterFile& regs, Scissor& scissor_out, + bool clamp_to_surface_pitch = true); // Scales, and shift amounts of the upper 32 bits of the 32x32=64-bit // multiplication result, for fast division and multiplication by diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index deaecaf39..a193800b7 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -349,6 +349,26 @@ union PA_CL_VTE_CNTL { static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; +union PA_SC_SCREEN_SCISSOR_TL { + struct { + int32_t tl_x : 15; // +0 + uint32_t : 1; // +15 + int32_t tl_y : 15; // +16 + }; + uint32_t value; + static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL; +}; + +union PA_SC_SCREEN_SCISSOR_BR { + struct { + int32_t br_x : 15; // +0 + uint32_t : 1; // +15 + int32_t br_y : 15; // +16 + }; + uint32_t value; + static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR; +}; + union PA_SC_WINDOW_OFFSET { struct { int32_t window_x_offset : 15; // +0 diff --git a/src/xenia/gpu/render_target_cache.cc b/src/xenia/gpu/render_target_cache.cc index 70935c3c9..64fda40fd 100644 --- a/src/xenia/gpu/render_target_cache.cc +++ b/src/xenia/gpu/render_target_cache.cc @@ -590,12 +590,15 @@ bool RenderTargetCache::Update(bool is_rasterization_done, height_used = uint32_t( std::min(std::fmax(viewport_bottom, 0.0f), float(height_used))); } - uint32_t scissor_bottom = regs.Get().br_y; + int32_t scissor_bottom = + int32_t(regs.Get().br_y); if (!regs.Get().window_offset_disable) { - scissor_bottom = uint32_t( - std::max(int32_t(scissor_bottom) + window_y_offset, int32_t(0))); + scissor_bottom += window_y_offset; } - height_used = std::min(height_used, scissor_bottom); + scissor_bottom = + std::min(scissor_bottom, regs.Get().br_y); + height_used = + std::min(height_used, uint32_t(std::max(scissor_bottom, int32_t(0)))); // Sorted by EDRAM base and then by index in the pipeline - for simplicity, // treat render targets placed closer to the end of the EDRAM as truncating