Playing more with viewport stuff.

This commit is contained in:
Ben Vanik 2015-03-14 16:11:55 -07:00
parent d57306b473
commit 7192728fe0
7 changed files with 186 additions and 129 deletions

View File

@ -50,7 +50,7 @@ struct VertexData { \n\
}; \n\ }; \n\
"; ";
const std::string vs_source = header + const std::string vs_source = header +
"\n\ "\n\
layout(location = 0) uniform vec4 src_uv_params; \n\ layout(location = 0) uniform vec4 src_uv_params; \n\
out gl_PerVertex { \n\ out gl_PerVertex { \n\
vec4 gl_Position; \n\ vec4 gl_Position; \n\
@ -240,6 +240,9 @@ void Blitter::Draw(GLuint src_texture, uint32_t src_x, uint32_t src_y,
src_width / float(src_texture_width), src_width / float(src_texture_width),
src_height / float(src_texture_height)); src_height / float(src_texture_height));
// Useful for seeing the entire framebuffer/etc:
// glProgramUniform4f(vertex_program_, 0, 0.0f, 0.0f, 1.0f, 1.0f);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
} }

View File

@ -405,6 +405,10 @@ bool CommandProcessor::SetupGL() {
return false; return false;
} }
glEnable(GL_SCISSOR_TEST);
glClipControl(GL_UPPER_LEFT, GL_NEGATIVE_ONE_TO_ONE);
glPointParameteri(GL_POINT_SPRITE_COORD_ORIGIN, GL_UPPER_LEFT);
return true; return true;
} }
@ -568,6 +572,9 @@ void CommandProcessor::IssueSwap() {
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// HACK: just use whatever our current framebuffer is. // HACK: just use whatever our current framebuffer is.
swap_params.framebuffer_texture = last_framebuffer_texture_; swap_params.framebuffer_texture = last_framebuffer_texture_;
/*swap_params.framebuffer_texture = active_framebuffer_
? active_framebuffer_->color_targets[0]
: last_framebuffer_texture_;*/
// Guess frontbuffer dimensions. // Guess frontbuffer dimensions.
// Command buffer seems to set these right before the XE_SWAP. // Command buffer seems to set these right before the XE_SWAP.
@ -578,10 +585,6 @@ void CommandProcessor::IssueSwap() {
swap_params.width = window_scissor_br & 0x7FFF - swap_params.x; swap_params.width = window_scissor_br & 0x7FFF - swap_params.x;
swap_params.height = (window_scissor_br >> 16) & 0x7FFF - swap_params.y; swap_params.height = (window_scissor_br >> 16) & 0x7FFF - swap_params.y;
// This is just so that we draw reasonable garbage when drawing garbage.
swap_params.width = std::min(swap_params.width, 2560u);
swap_params.height = std::min(swap_params.height, 2560u);
PrepareForWait(); PrepareForWait();
swap_handler_(swap_params); swap_handler_(swap_params);
ReturnFromWait(); ReturnFromWait();
@ -1712,8 +1715,6 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRenderTargets() {
// TODO(benvanik): can we do this all named? // TODO(benvanik): can we do this all named?
// TODO(benvanik): do we want this on READ too? // TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
glViewport(0, 0, 2560, 2560);
} }
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
@ -1771,62 +1772,16 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() {
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow: // See r200UpdateWindow:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
uint32_t window_offset_x = 0;
uint32_t window_offset_y = 0;
if ((pa_su_sc_mode_cntl >> 17) & 1) { if ((pa_su_sc_mode_cntl >> 17) & 1) {
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
draw_batcher_.set_window_offset(window_offset & 0x7FFF, window_offset_x = window_offset & 0x7FFF;
(window_offset >> 16) & 0x7FFF); window_offset_y = (window_offset >> 16) & 0x7FFF;
draw_batcher_.set_window_offset(window_offset_x, window_offset_y);
} else { } else {
draw_batcher_.set_window_offset(0, 0); draw_batcher_.set_window_offset(0, 0);
} }
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
draw_batcher_.set_window_scissor(
window_scissor_tl & 0x7FFF, (window_scissor_tl >> 16) & 0x7FFF,
window_scissor_br & 0x7FFF, (window_scissor_br >> 16) & 0x7FFF);
// HACK: no clue where to get these values.
// RB_SURFACE_INFO
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
// TODO(benvanik): ??
float window_width_scalar = 1;
float window_height_scalar = 1;
switch (surface_msaa) {
case MsaaSamples::k1X:
break;
case MsaaSamples::k2X:
window_width_scalar = 2;
break;
case MsaaSamples::k4X:
window_width_scalar = 2;
window_height_scalar = 2;
break;
}
draw_batcher_.set_window_scalar(window_width_scalar, window_height_scalar);
// Whether each of the viewport settings is enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
bool vport_yoffset_enable = (vte_control & (1 << 3)) > 0;
bool vport_zscale_enable = (vte_control & (1 << 4)) > 0;
bool vport_zoffset_enable = (vte_control & (1 << 5)) > 0;
assert_true(vport_xscale_enable == vport_yscale_enable ==
vport_zscale_enable == vport_xoffset_enable ==
vport_yoffset_enable == vport_zoffset_enable);
// Viewport scaling. Only enabled if the flags are all set.
draw_batcher_.set_viewport_offset(
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0,
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0);
draw_batcher_.set_viewport_scale(
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
@ -1835,6 +1790,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() {
// = false: multiply the Z coordinate by 1/W0. // = false: multiply the Z coordinate by 1/W0.
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
// get 1/W0. // get 1/W0.
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
draw_batcher_.set_vtx_fmt((vte_control >> 8) & 0x1 ? 1.0f : 0.0f, draw_batcher_.set_vtx_fmt((vte_control >> 8) & 0x1 ? 1.0f : 0.0f,
(vte_control >> 9) & 0x1 ? 1.0f : 0.0f, (vte_control >> 9) & 0x1 ? 1.0f : 0.0f,
(vte_control >> 10) & 0x1 ? 1.0f : 0.0f); (vte_control >> 10) & 0x1 ? 1.0f : 0.0f);
@ -1844,7 +1800,32 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() {
bool dirty = false; bool dirty = false;
// dirty |= SetShadowRegister(state_regs.pa_cl_clip_cntl, // dirty |= SetShadowRegister(state_regs.pa_cl_clip_cntl,
// XE_GPU_REG_PA_CL_CLIP_CNTL); // XE_GPU_REG_PA_CL_CLIP_CNTL);
dirty |=
SetShadowRegister(state_regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |=
SetShadowRegister(state_regs.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
dirty |= SetShadowRegister(state_regs.pa_sc_window_scissor_tl,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
dirty |= SetShadowRegister(state_regs.pa_sc_window_scissor_br,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
dirty |= SetShadowRegister(state_regs.pa_cl_vport_xoffset,
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
dirty |= SetShadowRegister(state_regs.pa_cl_vport_yoffset,
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
dirty |= SetShadowRegister(state_regs.pa_cl_vport_zoffset,
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
dirty |= SetShadowRegister(state_regs.pa_cl_vport_xscale,
XE_GPU_REG_PA_CL_VPORT_XSCALE);
dirty |= SetShadowRegister(state_regs.pa_cl_vport_yscale,
XE_GPU_REG_PA_CL_VPORT_YSCALE);
dirty |= SetShadowRegister(state_regs.pa_cl_vport_zscale,
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
if (!dirty) { if (!dirty) {
if ((state_regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
draw_batcher_.set_window_scalar(1.0f, 1.0f);
} else {
draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
}
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
} }
@ -1861,6 +1842,69 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() {
// glClipControl(GL_LOWER_LEFT, GL_NEGATIVE_ONE_TO_ONE); // glClipControl(GL_LOWER_LEFT, GL_NEGATIVE_ONE_TO_ONE);
//} //}
GLint ws_x = state_regs.pa_sc_window_scissor_tl & 0x7FFF;
GLint ws_y = (state_regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF;
GLsizei ws_w = (state_regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x;
GLsizei ws_h = ((state_regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y;
glScissorIndexed(0, ws_x, ws_y, ws_w, ws_h);
// HACK: no clue where to get these values.
// RB_SURFACE_INFO
auto surface_msaa =
static_cast<MsaaSamples>((state_regs.rb_surface_info >> 16) & 0x3);
// TODO(benvanik): ??
float window_width_scalar = 1;
float window_height_scalar = 1;
switch (surface_msaa) {
case MsaaSamples::k1X:
break;
case MsaaSamples::k2X:
// window_width_scalar = 2;
break;
case MsaaSamples::k4X:
window_width_scalar = 2;
window_height_scalar = 2;
break;
}
// Whether each of the viewport settings are enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
bool vport_xscale_enable = (state_regs.pa_cl_vte_cntl & (1 << 0)) > 0;
bool vport_xoffset_enable = (state_regs.pa_cl_vte_cntl & (1 << 1)) > 0;
bool vport_yscale_enable = (state_regs.pa_cl_vte_cntl & (1 << 2)) > 0;
bool vport_yoffset_enable = (state_regs.pa_cl_vte_cntl & (1 << 3)) > 0;
bool vport_zscale_enable = (state_regs.pa_cl_vte_cntl & (1 << 4)) > 0;
bool vport_zoffset_enable = (state_regs.pa_cl_vte_cntl & (1 << 5)) > 0;
assert_true(vport_xscale_enable == vport_yscale_enable ==
vport_zscale_enable == vport_xoffset_enable ==
vport_yoffset_enable == vport_zoffset_enable);
if (vport_xscale_enable) {
float texel_offset_x = 0.0f;
float texel_offset_y = 0.0f;
float vox = vport_xoffset_enable ? state_regs.pa_cl_vport_xoffset : 0;
float voy = vport_yoffset_enable ? state_regs.pa_cl_vport_yoffset : 0;
float voz = vport_zoffset_enable ? state_regs.pa_cl_vport_zoffset : 0;
float vsx = vport_xscale_enable ? state_regs.pa_cl_vport_xscale : 1;
float vsy = vport_yscale_enable ? state_regs.pa_cl_vport_yscale : 1;
float vsz = vport_zscale_enable ? state_regs.pa_cl_vport_zscale : 1;
float vpw = 2 * window_width_scalar * vsx;
float vph = -2 * window_height_scalar * vsy;
float vpx = window_width_scalar * vox - vpw / 2;
float vpy = window_height_scalar * voy - vph / 2;
glViewportIndexedf(0, vpx + texel_offset_x, vpy + texel_offset_y, vpw, vph);
draw_batcher_.set_window_scalar(1.0f, 1.0f);
} else {
float texel_offset_x = 0.0f;
float texel_offset_y = 0.0f;
float vpw = 2 * 2560.0f * window_width_scalar;
float vph = 2 * 2560.0f * window_height_scalar;
float vpx = -2560.0f * window_width_scalar;
float vpy = -2560.0f * window_height_scalar;
glViewportIndexedf(0, vpx + texel_offset_x, vpy + texel_offset_y, vpw, vph);
draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
}
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
} }
@ -1883,9 +1927,11 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() {
draw_batcher_.Flush(DrawBatcher::FlushMode::kStateChange); draw_batcher_.Flush(DrawBatcher::FlushMode::kStateChange);
// Scissoring. // Scissoring.
// TODO(benvanik): is this used? we are using scissoring for window scissor.
if (regs.pa_sc_screen_scissor_tl != 0 && if (regs.pa_sc_screen_scissor_tl != 0 &&
regs.pa_sc_screen_scissor_br != 0x20002000) { regs.pa_sc_screen_scissor_br != 0x20002000) {
glEnable(GL_SCISSOR_TEST); assert_always();
// glEnable(GL_SCISSOR_TEST);
// TODO(benvanik): signed? // TODO(benvanik): signed?
int32_t screen_scissor_x = regs.pa_sc_screen_scissor_tl & 0x7FFF; int32_t screen_scissor_x = regs.pa_sc_screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (regs.pa_sc_screen_scissor_tl >> 16) & 0x7FFF; int32_t screen_scissor_y = (regs.pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
@ -1896,7 +1942,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() {
glScissor(screen_scissor_x, screen_scissor_y, screen_scissor_w, glScissor(screen_scissor_x, screen_scissor_y, screen_scissor_w,
screen_scissor_h); screen_scissor_h);
} else { } else {
glDisable(GL_SCISSOR_TEST); // glDisable(GL_SCISSOR_TEST);
} }
switch (regs.pa_su_sc_mode_cntl & 0x3) { switch (regs.pa_su_sc_mode_cntl & 0x3) {
@ -1912,7 +1958,6 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() {
glCullFace(GL_BACK); glCullFace(GL_BACK);
break; break;
} }
if (regs.pa_su_sc_mode_cntl & 0x4) { if (regs.pa_su_sc_mode_cntl & 0x4) {
glFrontFace(GL_CW); glFrontFace(GL_CW);
} else { } else {
@ -2486,7 +2531,7 @@ bool CommandProcessor::IssueCopy() {
// but I can't seem to find something similar. // but I can't seem to find something similar.
// Maybe scissor rect/window offset? // Maybe scissor rect/window offset?
uint32_t x = 0; uint32_t x = 0;
uint32_t y = 2560 - copy_dest_height; uint32_t y = 0;
uint32_t w = copy_dest_pitch; uint32_t w = copy_dest_pitch;
uint32_t h = copy_dest_height; uint32_t h = copy_dest_height;

View File

@ -298,7 +298,17 @@ class CommandProcessor {
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_render_targets_regs_; } update_render_targets_regs_;
struct UpdateViewportStateRegisters { struct UpdateViewportStateRegisters {
uint32_t pa_cl_clip_cntl; // uint32_t pa_cl_clip_cntl;
uint32_t rb_surface_info;
uint32_t pa_cl_vte_cntl;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
float pa_cl_vport_xoffset;
float pa_cl_vport_yoffset;
float pa_cl_vport_zoffset;
float pa_cl_vport_xscale;
float pa_cl_vport_yscale;
float pa_cl_vport_zscale;
UpdateViewportStateRegisters() { Reset(); } UpdateViewportStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }

View File

@ -81,27 +81,10 @@ class DrawBatcher {
active_draw_.header->window_offset.x = float(x); active_draw_.header->window_offset.x = float(x);
active_draw_.header->window_offset.y = float(y); active_draw_.header->window_offset.y = float(y);
} }
void set_window_scissor(uint32_t left, uint32_t top, uint32_t right,
uint32_t bottom) {
active_draw_.header->window_scissor.x = float(left);
active_draw_.header->window_scissor.y = float(top);
active_draw_.header->window_scissor.z = float(right);
active_draw_.header->window_scissor.w = float(bottom);
}
void set_window_scalar(float width_scalar, float height_scalar) { void set_window_scalar(float width_scalar, float height_scalar) {
active_draw_.header->window_offset.z = width_scalar; active_draw_.header->window_offset.z = width_scalar;
active_draw_.header->window_offset.w = height_scalar; active_draw_.header->window_offset.w = height_scalar;
} }
void set_viewport_offset(float offset_x, float offset_y, float offset_z) {
active_draw_.header->viewport_offset.x = offset_x;
active_draw_.header->viewport_offset.y = offset_y;
active_draw_.header->viewport_offset.z = offset_z;
}
void set_viewport_scale(float scale_x, float scale_y, float scale_z) {
active_draw_.header->viewport_scale.x = scale_x;
active_draw_.header->viewport_scale.y = scale_y;
active_draw_.header->viewport_scale.z = scale_z;
}
void set_vtx_fmt(float xy, float z, float w) { void set_vtx_fmt(float xy, float z, float w) {
active_draw_.header->vtx_fmt.x = xy; active_draw_.header->vtx_fmt.x = xy;
active_draw_.header->vtx_fmt.y = xy; active_draw_.header->vtx_fmt.y = xy;
@ -193,9 +176,6 @@ class DrawBatcher {
// This must match GL4Shader's header. // This must match GL4Shader's header.
struct CommonHeader { struct CommonHeader {
float4 window_offset; // tx,ty,sx,sy float4 window_offset; // tx,ty,sx,sy
float4 window_scissor; // x0,y0,x1,y1
float4 viewport_offset; // tx,ty,tz,?
float4 viewport_scale; // sx,sy,sz,?
float4 vtx_fmt; // float4 vtx_fmt; //
float4 alpha_test; // alpha test enable, func, ref, ? float4 alpha_test; // alpha test enable, func, ref, ?

View File

@ -54,9 +54,6 @@ std::string GL4Shader::GetHeader() {
// This must match DrawBatcher::CommonHeader. // This must match DrawBatcher::CommonHeader.
"struct StateData {\n" "struct StateData {\n"
" vec4 window_offset;\n" " vec4 window_offset;\n"
" vec4 window_scissor;\n"
" vec4 viewport_offset;\n"
" vec4 viewport_scale;\n"
" vec4 vtx_fmt;\n" " vec4 vtx_fmt;\n"
" vec4 alpha_test;\n" " vec4 alpha_test;\n"
// TODO(benvanik): variable length. // TODO(benvanik): variable length.
@ -185,40 +182,21 @@ bool GL4Shader::PrepareVertexShader(
} }
std::string apply_transform = std::string apply_transform =
"vec4 applyTransform(const in StateData state, vec4 Pclip) {\n" "vec4 applyTransform(const in StateData state, vec4 pos) {\n"
" // Clip->NDC with perspective divide.\n" " if (state.vtx_fmt.w == 0.0) {\n"
" // We do this here because it's programmable on the 360.\n" " // w is 1/W0, so fix it.\n"
" if (state.vtx_fmt.w != 0.0) {\n" " pos.w = 1.0 / pos.w;\n"
" // w is not 1/W0. Common case.\n"
" Pclip.w = 1.0 / Pclip.w;\n"
" }\n" " }\n"
" vec3 Pndc = Pclip.xyz;\n" " if (state.vtx_fmt.x != 0.0) {\n"
" if (state.vtx_fmt.x == 0.0) {\n" " // Already multiplied by 1/W0, so pull it out.\n"
" // Need to multiply by 1/W0.\n" " pos.xy /= pos.w;\n"
" Pndc.xy *= Pclip.w;\n"
" }\n" " }\n"
" if (state.vtx_fmt.z == 0.0) {\n" " if (state.vtx_fmt.z != 0.0) {\n"
" // Need to multiply by 1/W0.\n" " // Already multiplied by 1/W0, so pull it out.\n"
" Pndc.z *= Pclip.w;\n" " pos.z /= pos.w;\n"
" }\n" " }\n"
" // Perform clipping, lest we get weird geometry.\n" " pos.xy *= state.window_offset.zw;\n"
// TODO(benvanik): is this right? dxclip mode may change this? " return pos;\n"
" Pclip.w = 1.0;\n"
" if (Pndc.z < gl_DepthRange.near || Pndc.z > gl_DepthRange.far) {\n"
" // Clipped! w=0 will kill it in the hardware persp divide.\n"
" Pclip.w = 0.0;\n"
" }\n"
" vec3 Pwnd = Pndc.xyz * state.viewport_scale.xyz + \n"
" state.viewport_offset.xyz;\n"
" // 1px padding required for pixel offset issue.\n"
" Pwnd.xy += 1.0;\n"
" vec3 Pwnd2 = vec3(Pwnd.xy * state.window_offset.zw + \n"
" state.window_offset.xy, Pwnd.z);\n"
" Pwnd2.y = 2560.0 - Pwnd2.y;\n"
" vec3 fb_offset = vec3(2560.0 / 2.0, 2560.0 / 2.0, 0.0);\n"
" vec3 fb_scale = vec3(2560.0 / 2.0, 2560.0 / 2.0, 1.0);\n"
" vec3 Pndc2 = (Pwnd2.xyz - fb_offset.xyz) / fb_scale.xyz;\n"
" return vec4(Pndc2.xy, Pndc2.z, Pclip.w);\n"
"}\n"; "}\n";
std::string source = std::string source =
GetHeader() + apply_transform + GetHeader() + apply_transform +
@ -275,13 +253,6 @@ bool GL4Shader::PreparePixelShader(
"void processFragment(const in StateData state);\n" "void processFragment(const in StateData state);\n"
"void main() {\n" + "void main() {\n" +
" const StateData state = states[draw_id];\n" " const StateData state = states[draw_id];\n"
" // Custom scissoring. Doing it here avoids the need for glScissor.\n"
" if (gl_FragCoord.x < state.window_scissor.x ||\n"
" gl_FragCoord.x > state.window_scissor.z ||\n"
" gl_FragCoord.y < state.window_scissor.y ||\n"
" gl_FragCoord.y > state.window_scissor.w) {\n"
" discard;\n"
" }\n"
" processFragment(state);\n" " processFragment(state);\n"
"}\n"; "}\n";

View File

@ -855,6 +855,26 @@ bool GL4ShaderTranslator::TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu) {
return true; return true;
} }
bool GL4ShaderTranslator::TranslateALU_RECIP_CLAMP(const instr_alu_t& alu) {
// if result == -inf result = -flt_max
// if result == +inf result = flt_max
BeginAppendScalarOp(alu);
Append("1.0 / ");
AppendScalarOpSrcReg(alu, 3);
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_RECIP_FF(const instr_alu_t& alu) {
// if result == -inf result = -zero
// if result == +inf result = zero
BeginAppendScalarOp(alu);
Append("1.0 / ");
AppendScalarOpSrcReg(alu, 3);
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
Append("1.0 / "); Append("1.0 / ");
@ -863,10 +883,34 @@ bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
return true; return true;
} }
bool GL4ShaderTranslator::TranslateALU_RECIPSQ_CLAMP(
const ucode::instr_alu_t& alu) {
// if result == -inf result = -flt_max
// if result == +inf result = flt_max
BeginAppendScalarOp(alu);
Append("inversesqrt(");
AppendScalarOpSrcReg(alu, 3);
Append(".x)");
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_RECIPSQ_FF(
const ucode::instr_alu_t& alu) {
// if result == -inf result = -zero
// if result == +inf result = zero
BeginAppendScalarOp(alu);
Append("inversesqrt(");
AppendScalarOpSrcReg(alu, 3);
Append(".x)");
EndAppendScalarOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_RECIPSQ_IEEE( bool GL4ShaderTranslator::TranslateALU_RECIPSQ_IEEE(
const ucode::instr_alu_t& alu) { const ucode::instr_alu_t& alu) {
BeginAppendScalarOp(alu); BeginAppendScalarOp(alu);
Append("1.0 / sqrt("); Append("inversesqrt(");
AppendScalarOpSrcReg(alu, 3); AppendScalarOpSrcReg(alu, 3);
Append(".x)"); Append(".x)");
EndAppendScalarOp(alu); EndAppendScalarOp(alu);
@ -1097,11 +1141,11 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR_IMPL(EXP_IEEE, 1), // 14 ALU_INSTR_IMPL(EXP_IEEE, 1), // 14
ALU_INSTR(LOG_CLAMP, 1), // 15 ALU_INSTR(LOG_CLAMP, 1), // 15
ALU_INSTR_IMPL(LOG_IEEE, 1), // 16 ALU_INSTR_IMPL(LOG_IEEE, 1), // 16
ALU_INSTR(RECIP_CLAMP, 1), // 17 ALU_INSTR_IMPL(RECIP_CLAMP, 1), // 17
ALU_INSTR(RECIP_FF, 1), // 18 ALU_INSTR_IMPL(RECIP_FF, 1), // 18
ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19 ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19
ALU_INSTR(RECIPSQ_CLAMP, 1), // 20 ALU_INSTR_IMPL(RECIPSQ_CLAMP, 1), // 20
ALU_INSTR(RECIPSQ_FF, 1), // 21 ALU_INSTR_IMPL(RECIPSQ_FF, 1), // 21
ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22 ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22
ALU_INSTR(MOVAs, 1), // 23 ALU_INSTR(MOVAs, 1), // 23
ALU_INSTR(MOVA_FLOORs, 1), // 24 ALU_INSTR(MOVA_FLOORs, 1), // 24

View File

@ -110,7 +110,11 @@ class GL4ShaderTranslator {
bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu); bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu);
bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu);
bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu);
// ... // ...
bool TranslateALU_SUBs(const ucode::instr_alu_t& alu); bool TranslateALU_SUBs(const ucode::instr_alu_t& alu);