Shadow state to eliminate most redundant GL calls.

This commit is contained in:
Ben Vanik 2015-01-03 02:57:58 -08:00
parent 858f70549f
commit 07a82200f9
4 changed files with 320 additions and 160 deletions

View File

@ -128,7 +128,7 @@ void CommandProcessor::WorkerMain() {
// We've run out of commands to execute.
// We spin here waiting for new ones, as the overhead of waiting on our
// event is too high.
//PrepareForWait();
// PrepareForWait();
do {
// TODO(benvanik): if we go longer than Nms, switch to waiting?
// It'll keep us from burning power.
@ -139,7 +139,7 @@ void CommandProcessor::WorkerMain() {
write_ptr_index = write_ptr_index_.load();
} while (write_ptr_index == 0xBAADF00D ||
read_ptr_index_ == write_ptr_index);
//ReturnFromWait();
// ReturnFromWait();
}
assert_true(read_ptr_index_ != write_ptr_index);
@ -163,6 +163,8 @@ void CommandProcessor::WorkerMain() {
}
bool CommandProcessor::SetupGL() {
glViewport(0, 0, 1280, 720);
// Circular buffer holding scratch vertex/index data.
if (!scratch_buffer_.Initialize()) {
PLOGE("Unable to initialize scratch buffer");
@ -236,7 +238,8 @@ bool CommandProcessor::SetupGL() {
"layout(triangle_strip, max_vertices = 4) out;\n"
"void main() {\n"
// Most games use the left-aligned form.
" bool left_aligned = gl_in[0].gl_Position.x == gl_in[2].gl_Position.x;\n"
" bool left_aligned = gl_in[0].gl_Position.x == \n"
" gl_in[2].gl_Position.x;\n"
" if (left_aligned) {\n"
// 0 ------ 1
// | - |
@ -1396,11 +1399,9 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
}
GLenum prim_type = 0;
GLuint pipeline = active_pipeline_->handles.default_pipeline;
switch (cmd.prim_type) {
case PrimitiveType::kPointList:
prim_type = GL_POINTS;
pipeline = active_pipeline_->handles.point_list_pipeline;
break;
case PrimitiveType::kLineList:
prim_type = GL_LINES;
@ -1422,11 +1423,9 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
break;
case PrimitiveType::kRectangleList:
prim_type = GL_TRIANGLE_STRIP;
pipeline = active_pipeline_->handles.rect_list_pipeline;
break;
case PrimitiveType::kQuadList:
prim_type = GL_LINES_ADJACENCY;
pipeline = active_pipeline_->handles.quad_list_pipeline;
break;
default:
case PrimitiveType::kUnknown0x07:
@ -1436,8 +1435,6 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
return false;
}
glBindProgramPipeline(pipeline);
// Commit the state buffer - nothing can change after this.
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, scratch_buffer_.handle(),
allocation.offset, allocation.length);
@ -1462,33 +1459,56 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
}
// Hacky draw counter.
if (false) {
static int draw_count = 0;
glEnable(GL_SCISSOR_TEST);
glScissor(20, 0, 20, 20);
float red[] = {0, draw_count / 100.0f, 0, 1.0f};
draw_count = (draw_count + 1) % 100;
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
red);
glDisable(GL_SCISSOR_TEST);
}
return true;
}
bool CommandProcessor::SetShadowRegister(uint32_t& dest,
uint32_t register_name) {
uint32_t value = register_file_->values[register_name].u32;
if (dest == value) {
return false;
}
dest = value;
return true;
}
bool CommandProcessor::SetShadowRegister(float& dest, uint32_t register_name) {
float value = register_file_->values[register_name].f32;
if (dest == value) {
return false;
}
dest = value;
return true;
}
bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
SCOPE_profile_cpu_f("gpu");
auto& regs = *register_file_;
auto& regs = update_render_targets_regs_;
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
bool dirty = false;
dirty |= SetShadowRegister(regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
dirty |= SetShadowRegister(regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |= SetShadowRegister(regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
dirty |= SetShadowRegister(regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
dirty |= SetShadowRegister(regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
dirty |= SetShadowRegister(regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
dirty |= SetShadowRegister(regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
dirty |= SetShadowRegister(regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
dirty |= SetShadowRegister(regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
if (!dirty) {
return true;
}
SCOPE_profile_cpu_f("gpu");
auto enable_mode = static_cast<ModeControl>(regs.rb_modecontrol & 0x7);
// RB_SURFACE_INFO
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
uint32_t surface_pitch = regs.rb_surface_info & 0x3FFF;
auto surface_msaa =
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
// Get/create all color render targets, if we are using them.
// In depth-only mode we don't need them.
@ -1500,14 +1520,12 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
if (enable_mode == ModeControl::kColorDepth) {
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
regs.rb_color_info, regs.rb_color1_info, regs.rb_color2_info,
regs.rb_color3_info,
};
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
for (int n = 0; n < poly::countof(color_info); n++) {
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
uint32_t write_mask = (regs.rb_color_mask >> (n * 4)) & 0xF;
if (!write_mask || !shader_targets[n]) {
// Unused, so keep disabled and set to wildcard so we'll take any
// framebuffer that has it.
@ -1525,18 +1543,16 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
}
// Get/create depth buffer, but only if we are going to use it.
uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
bool uses_depth =
(depth_control & 0x00000002) || (depth_control & 0x00000004);
uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16;
bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0);
bool uses_depth = (regs.rb_depthcontrol & 0x00000002) ||
(regs.rb_depthcontrol & 0x00000004);
uint32_t stencil_write_mask = (regs.rb_stencilrefmask & 0x00FF0000) >> 16;
bool uses_stencil =
(regs.rb_depthcontrol & 0x00000001) || (stencil_write_mask != 0);
GLuint depth_target = kAnyTarget;
if (uses_depth && uses_stencil) {
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
uint32_t depth_base = depth_info & 0xFFF;
uint32_t depth_base = regs.rb_depth_info & 0xFFF;
auto depth_format =
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
static_cast<DepthRenderTargetFormat>((regs.rb_depth_info >> 16) & 0x1);
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
depth_format);
// TODO(benvanik): when a game switches does it expect to keep the same
@ -1547,20 +1563,17 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
// Note that none may be returned if we really don't need one.
auto cached_framebuffer = GetFramebuffer(color_targets, depth_target);
active_framebuffer_ = cached_framebuffer;
if (!active_framebuffer_) {
// Nothing to do.
return true;
if (active_framebuffer_) {
// Setup just the targets we want.
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
draw_buffers);
// Make active.
// TODO(benvanik): can we do this all named?
// TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
}
// Setup just the targets we want.
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
draw_buffers);
// Make active.
// TODO(benvanik): can we do this all named?
// TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
return true;
}
@ -1569,6 +1582,29 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
auto& regs = *register_file_;
auto state_data = draw_command->state_data;
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// Deprecated in GL, implemented in shader.
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
state_data->alpha_test.x =
(color_control & 0x4) ? 1.0f : 0.0f; // ALPAHTESTENABLE
state_data->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC
state_data->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
UpdateViewportState(draw_command);
UpdateRasterizerState(draw_command);
UpdateBlendState(draw_command);
UpdateDepthStencilState(draw_command);
return true;
}
bool CommandProcessor::UpdateViewportState(DrawCommand* draw_command) {
auto& regs = *register_file_;
auto state_data = draw_command->state_data;
SCOPE_profile_cpu_f("gpu");
// Much of this state machine is extracted from:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
@ -1614,7 +1650,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
}
state_data->window_offset.z = window_width_scalar;
state_data->window_offset.w = window_height_scalar;
glViewport(0, 0, 1280, 720);
// Whether each of the viewport settings is enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
@ -1662,63 +1697,98 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/14/yamato_genenum.h#L1587
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
//assert_true(clip_enabled);
// assert_true(clip_enabled);
bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
//assert_true(dx_clip);
// assert_true(dx_clip);
return true;
}
bool CommandProcessor::UpdateRasterizerState(DrawCommand* draw_command) {
auto& regs = update_rasterizer_state_regs_;
bool dirty = false;
dirty |=
SetShadowRegister(regs.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(regs.pa_sc_screen_scissor_tl,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
dirty |= SetShadowRegister(regs.pa_sc_screen_scissor_br,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
if (!dirty) {
return true;
}
SCOPE_profile_cpu_f("gpu");
// Scissoring.
int32_t screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
int32_t screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
if (screen_scissor_tl != 0 && screen_scissor_br != 0x20002000) {
if (regs.pa_sc_screen_scissor_tl != 0 &&
regs.pa_sc_screen_scissor_br != 0x20002000) {
glEnable(GL_SCISSOR_TEST);
// TODO(benvanik): signed?
int32_t screen_scissor_x = screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (screen_scissor_tl >> 16) & 0x7FFF;
int32_t screen_scissor_w = screen_scissor_br & 0x7FFF - screen_scissor_x;
int32_t screen_scissor_x = regs.pa_sc_screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (regs.pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
int32_t screen_scissor_w =
regs.pa_sc_screen_scissor_br & 0x7FFF - screen_scissor_x;
int32_t screen_scissor_h =
(screen_scissor_br >> 16) & 0x7FFF - screen_scissor_y;
(regs.pa_sc_screen_scissor_br >> 16) & 0x7FFF - screen_scissor_y;
glScissor(screen_scissor_x, screen_scissor_y, screen_scissor_w,
screen_scissor_h);
} else {
glDisable(GL_SCISSOR_TEST);
}
// Rasterizer state.
if (draw_command->prim_type == PrimitiveType::kRectangleList) {
// Rect lists aren't culled. There may be other things they skip too.
glDisable(GL_CULL_FACE);
} else {
switch (mode_control & 0x3) {
case 0:
glDisable(GL_CULL_FACE);
break;
case 1:
glEnable(GL_CULL_FACE);
glCullFace(GL_FRONT);
break;
case 2:
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
break;
}
// Rect lists aren't culled. There may be other things they skip too.
assert_true((regs.pa_su_sc_mode_cntl & 0x3) == 0 ||
draw_command->prim_type != PrimitiveType::kRectangleList);
switch (regs.pa_su_sc_mode_cntl & 0x3) {
case 0:
glDisable(GL_CULL_FACE);
break;
case 1:
glEnable(GL_CULL_FACE);
glCullFace(GL_FRONT);
break;
case 2:
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
break;
}
if (mode_control & 0x4) {
if (regs.pa_su_sc_mode_cntl & 0x4) {
glFrontFace(GL_CW);
} else {
glFrontFace(GL_CCW);
}
// TODO(benvanik): wireframe mode.
// glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// Deprecated in GL, implemented in shader.
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
state_data->alpha_test.x =
(color_control & 0x4) ? 1.0f : 0.0f; // ALPAHTESTENABLE
state_data->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC
state_data->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
return true;
}
bool CommandProcessor::UpdateBlendState(DrawCommand* draw_command) {
auto& regs = update_blend_state_regs_;
bool dirty = false;
dirty |=
SetShadowRegister(regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
dirty |=
SetShadowRegister(regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
dirty |=
SetShadowRegister(regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
dirty |=
SetShadowRegister(regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
dirty |= SetShadowRegister(regs.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
dirty |= SetShadowRegister(regs.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
dirty |= SetShadowRegister(regs.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
dirty |= SetShadowRegister(regs.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
if (!dirty) {
return true;
}
SCOPE_profile_cpu_f("gpu");
static const GLenum blend_map[] = {
/* 0 */ GL_ZERO,
@ -1746,25 +1816,20 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
/* 3 */ GL_MAX,
/* 4 */ GL_FUNC_REVERSE_SUBTRACT,
};
uint32_t blend_control[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32,
};
for (int n = 0; n < poly::countof(blend_control); n++) {
for (int i = 0; i < poly::countof(regs.rb_blendcontrol); ++i) {
uint32_t blend_control = regs.rb_blendcontrol[i];
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
auto src_blend = blend_map[(blend_control[n] & 0x0000001F) >> 0];
auto src_blend = blend_map[(blend_control & 0x0000001F) >> 0];
// A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND
auto dest_blend = blend_map[(blend_control[n] & 0x00001F00) >> 8];
auto dest_blend = blend_map[(blend_control & 0x00001F00) >> 8];
// A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN
auto blend_op = blend_op_map[(blend_control[n] & 0x000000E0) >> 5];
auto blend_op = blend_op_map[(blend_control & 0x000000E0) >> 5];
// A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND
auto src_blend_alpha = blend_map[(blend_control[n] & 0x001F0000) >> 16];
auto src_blend_alpha = blend_map[(blend_control & 0x001F0000) >> 16];
// A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND
auto dest_blend_alpha = blend_map[(blend_control[n] & 0x1F000000) >> 24];
auto dest_blend_alpha = blend_map[(blend_control & 0x1F000000) >> 24];
// A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN
auto blend_op_alpha = blend_op_map[(blend_control[n] & 0x00E00000) >> 21];
auto blend_op_alpha = blend_op_map[(blend_control & 0x00E00000) >> 21];
// A2XX_RB_COLORCONTROL_BLEND_DISABLE ?? Can't find this!
// Just guess based on actions.
bool blend_enable =
@ -1772,19 +1837,33 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
(blend_op == GL_FUNC_ADD) && (src_blend_alpha == GL_ONE) &&
(dest_blend_alpha == GL_ZERO) && (blend_op_alpha == GL_FUNC_ADD));
if (blend_enable) {
glEnablei(GL_BLEND, n);
glBlendEquationSeparatei(n, blend_op, blend_op_alpha);
glBlendFuncSeparatei(n, src_blend, dest_blend, src_blend_alpha,
glEnablei(GL_BLEND, i);
glBlendEquationSeparatei(i, blend_op, blend_op_alpha);
glBlendFuncSeparatei(i, src_blend, dest_blend, src_blend_alpha,
dest_blend_alpha);
} else {
glDisablei(GL_BLEND, n);
glDisablei(GL_BLEND, i);
}
}
float blend_color[4] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32, regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
};
glBlendColor(blend_color[0], blend_color[1], blend_color[2], blend_color[3]);
glBlendColor(regs.rb_blend_rgba[0], regs.rb_blend_rgba[1],
regs.rb_blend_rgba[2], regs.rb_blend_rgba[3]);
return true;
}
bool CommandProcessor::UpdateDepthStencilState(DrawCommand* draw_command) {
auto& regs = update_depth_stencil_state_regs_;
bool dirty = false;
dirty |= SetShadowRegister(regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
if (!dirty) {
return true;
}
SCOPE_profile_cpu_f("gpu");
static const GLenum compare_func_map[] = {
/* 0 */ GL_NEVER,
@ -1806,64 +1885,62 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
/* 6 */ GL_INCR,
/* 7 */ GL_DECR,
};
uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
// A2XX_RB_DEPTHCONTROL_Z_ENABLE
if (depth_control & 0x00000002) {
if (regs.rb_depthcontrol & 0x00000002) {
glEnable(GL_DEPTH_TEST);
} else {
glDisable(GL_DEPTH_TEST);
}
// A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE
glDepthMask((depth_control & 0x00000004) ? GL_TRUE : GL_FALSE);
glDepthMask((regs.rb_depthcontrol & 0x00000004) ? GL_TRUE : GL_FALSE);
// A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE
// ?
// A2XX_RB_DEPTHCONTROL_ZFUNC
glDepthFunc(compare_func_map[(depth_control & 0x00000070) >> 4]);
glDepthFunc(compare_func_map[(regs.rb_depthcontrol & 0x00000070) >> 4]);
// A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE
if (depth_control & 0x00000001) {
if (regs.rb_depthcontrol & 0x00000001) {
glEnable(GL_STENCIL_TEST);
} else {
glDisable(GL_STENCIL_TEST);
}
uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
// RB_STENCILREFMASK_STENCILREF
uint32_t stencil_ref = (stencil_ref_mask & 0x000000FF);
uint32_t stencil_ref = (regs.rb_stencilrefmask & 0x000000FF);
// RB_STENCILREFMASK_STENCILMASK
uint32_t stencil_read_mask = (stencil_ref_mask & 0x0000FF00) >> 8;
uint32_t stencil_read_mask = (regs.rb_stencilrefmask & 0x0000FF00) >> 8;
// RB_STENCILREFMASK_STENCILWRITEMASK
glStencilMask((stencil_ref_mask & 0x00FF0000) >> 16);
glStencilMask((regs.rb_stencilrefmask & 0x00FF0000) >> 16);
// A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE
bool backface_enabled = (depth_control & 0x00000080) != 0;
bool backface_enabled = (regs.rb_depthcontrol & 0x00000080) != 0;
if (backface_enabled) {
// A2XX_RB_DEPTHCONTROL_STENCILFUNC
glStencilFuncSeparate(GL_FRONT,
compare_func_map[(depth_control & 0x00000700) >> 8],
stencil_ref, stencil_read_mask);
glStencilFuncSeparate(
GL_FRONT, compare_func_map[(regs.rb_depthcontrol & 0x00000700) >> 8],
stencil_ref, stencil_read_mask);
// A2XX_RB_DEPTHCONTROL_STENCILFAIL
// A2XX_RB_DEPTHCONTROL_STENCILZFAIL
// A2XX_RB_DEPTHCONTROL_STENCILZPASS
glStencilOpSeparate(GL_FRONT,
stencil_op_map[(depth_control & 0x00003800) >> 11],
stencil_op_map[(depth_control & 0x000E0000) >> 17],
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
glStencilOpSeparate(
GL_FRONT, stencil_op_map[(regs.rb_depthcontrol & 0x00003800) >> 11],
stencil_op_map[(regs.rb_depthcontrol & 0x000E0000) >> 17],
stencil_op_map[(regs.rb_depthcontrol & 0x0001C000) >> 14]);
// A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF
glStencilFuncSeparate(GL_BACK,
compare_func_map[(depth_control & 0x00700000) >> 20],
stencil_ref, stencil_read_mask);
glStencilFuncSeparate(
GL_BACK, compare_func_map[(regs.rb_depthcontrol & 0x00700000) >> 20],
stencil_ref, stencil_read_mask);
// A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF
// A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF
// A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF
glStencilOpSeparate(GL_BACK,
stencil_op_map[(depth_control & 0x03800000) >> 23],
stencil_op_map[(depth_control & 0xE0000000) >> 29],
stencil_op_map[(depth_control & 0x1C000000) >> 26]);
glStencilOpSeparate(
GL_BACK, stencil_op_map[(regs.rb_depthcontrol & 0x03800000) >> 23],
stencil_op_map[(regs.rb_depthcontrol & 0xE0000000) >> 29],
stencil_op_map[(regs.rb_depthcontrol & 0x1C000000) >> 26]);
} else {
// Backfaces disabled - treat backfaces as frontfaces.
glStencilFunc(compare_func_map[(depth_control & 0x00000700) >> 8],
glStencilFunc(compare_func_map[(regs.rb_depthcontrol & 0x00000700) >> 8],
stencil_ref, stencil_read_mask);
glStencilOp(stencil_op_map[(depth_control & 0x00003800) >> 11],
stencil_op_map[(depth_control & 0x000E0000) >> 17],
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
glStencilOp(stencil_op_map[(regs.rb_depthcontrol & 0x00003800) >> 11],
stencil_op_map[(regs.rb_depthcontrol & 0x000E0000) >> 17],
stencil_op_map[(regs.rb_depthcontrol & 0x0001C000) >> 14]);
}
return true;
@ -1888,12 +1965,25 @@ bool CommandProcessor::UpdateConstants(DrawCommand* draw_command) {
}
bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
SCOPE_profile_cpu_f("gpu");
auto& regs = *register_file_;
auto& regs = update_shaders_regs_;
auto& cmd = *draw_command;
bool dirty = false;
dirty |= SetShadowRegister(regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= regs.vertex_shader != active_vertex_shader_;
dirty |= regs.pixel_shader != active_pixel_shader_;
dirty |= regs.prim_type != cmd.prim_type;
if (!dirty) {
return true;
}
regs.vertex_shader = active_vertex_shader_;
regs.pixel_shader = active_pixel_shader_;
regs.prim_type = cmd.prim_type;
SCOPE_profile_cpu_f("gpu");
xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
program_cntl.dword_0 = regs.sq_program_cntl;
if (!active_vertex_shader_->has_prepared()) {
if (!active_vertex_shader_->PrepareVertexShader(program_cntl)) {
XELOGE("Unable to prepare vertex shader");
@ -1961,15 +2051,24 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
cached_pipeline->handles.quad_list_pipeline = pipelines[3];
}
// NOTE: we don't yet have our state data pointer - that comes at the end.
// We also don't know which configuration we want (based on prim type).
active_pipeline_ = cached_pipeline;
GLuint pipeline = cached_pipeline->handles.default_pipeline;
switch (regs.prim_type) {
case PrimitiveType::kPointList:
pipeline = cached_pipeline->handles.point_list_pipeline;
break;
case PrimitiveType::kRectangleList:
pipeline = cached_pipeline->handles.rect_list_pipeline;
break;
case PrimitiveType::kQuadList:
pipeline = cached_pipeline->handles.quad_list_pipeline;
break;
}
glBindProgramPipeline(pipeline);
return true;
}
bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) {
SCOPE_profile_cpu_f("gpu");
auto& cmd = *draw_command;
auto& info = cmd.index_buffer;
@ -1978,6 +2077,8 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) {
return true;
}
SCOPE_profile_cpu_f("gpu");
assert_true(info.endianness == Endian::k8in16 ||
info.endianness == Endian::k8in32);
@ -2406,10 +2507,13 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
GLint stencil = copy_depth_clear & 0xFF;
// HACK: this should work, but throws INVALID_ENUM on nvidia drivers.
// glClearNamedFramebufferfi(source_framebuffer->framebuffer,
// GL_DEPTH_STENCIL,
// depth, stencil);
// GL_DEPTH_STENCIL,
// depth, stencil);
GLint old_draw_framebuffer;
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_framebuffer);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, source_framebuffer->framebuffer);
glClearBufferfi(GL_DEPTH_STENCIL, 0, depth, stencil);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_framebuffer);
}
return true;

View File

@ -237,6 +237,10 @@ class CommandProcessor {
bool IssueDraw(DrawCommand* draw_command);
bool UpdateRenderTargets(DrawCommand* draw_command);
bool UpdateState(DrawCommand* draw_command);
bool UpdateViewportState(DrawCommand* draw_command);
bool UpdateRasterizerState(DrawCommand* draw_command);
bool UpdateBlendState(DrawCommand* draw_command);
bool UpdateDepthStencilState(DrawCommand* draw_command);
bool UpdateConstants(DrawCommand* draw_command);
bool UpdateShaders(DrawCommand* draw_command);
bool PopulateIndexBuffer(DrawCommand* draw_command);
@ -287,7 +291,6 @@ class CommandProcessor {
std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
GL4Shader* active_vertex_shader_;
GL4Shader* active_pixel_shader_;
CachedPipeline* active_pipeline_;
CachedFramebuffer* active_framebuffer_;
std::vector<CachedFramebuffer> cached_framebuffers_;
@ -303,6 +306,68 @@ class CommandProcessor {
CircularBuffer scratch_buffer_;
DrawCommand draw_command_;
private:
bool SetShadowRegister(uint32_t& dest, uint32_t register_name);
bool SetShadowRegister(float& dest, uint32_t register_name);
struct UpdateRenderTargetsRegisters {
uint32_t rb_modecontrol;
uint32_t rb_surface_info;
uint32_t rb_color_info;
uint32_t rb_color1_info;
uint32_t rb_color2_info;
uint32_t rb_color3_info;
uint32_t rb_color_mask;
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
uint32_t rb_depth_info;
UpdateRenderTargetsRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_render_targets_regs_;
struct UpdateViewportStateRegisters {
//
UpdateViewportStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_viewport_state_regs_;
struct UpdateRasterizerStateRegisters {
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br;
UpdateRasterizerStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_rasterizer_state_regs_;
struct UpdateBlendStateRegisters {
uint32_t rb_blendcontrol[4];
float rb_blend_rgba[4];
UpdateBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_blend_state_regs_;
struct UpdateDepthStencilStateRegisters {
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateDepthStencilStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_depth_stencil_state_regs_;
// TODO(benvanik): constant bitmask?
struct UpdateShadersRegisters {
PrimitiveType prim_type;
uint32_t sq_program_cntl;
GL4Shader* vertex_shader;
GL4Shader* pixel_shader;
UpdateShadersRegisters() { Reset(); }
void Reset() {
sq_program_cntl = 0;
vertex_shader = pixel_shader = nullptr;
}
} update_shaders_regs_;
// ib
// vb
// samplers
};
} // namespace gl4

View File

@ -93,15 +93,6 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam,
// TODO(benvanik): profiler present.
Profiler::Present();
// Hacky swap timer.
static int swap_count = 0;
glEnable(GL_SCISSOR_TEST);
glScissor(0, 0, 20, 20);
float red[] = {swap_count / 60.0f, 0, 0, 1.0f};
swap_count = (swap_count + 1) % 60;
glClearNamedFramebufferfv(0, GL_COLOR, 0, red);
glDisable(GL_SCISSOR_TEST);
}
{
SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::WGLControl::SwapBuffers");

View File

@ -15,7 +15,7 @@
#define MICROPROFILEUI_IMPL 1
#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (1024 * 1024 * 10)
#define MICROPROFILE_USE_THREAD_NAME_CALLBACK 1
#define MICROPROFILE_WEBSERVER_MAXFRAMES 10
#define MICROPROFILE_WEBSERVER_MAXFRAMES 3
#define MICROPROFILE_PRINTF PLOGI
#define MICROPROFILE_WEBSERVER 1
#define MICROPROFILE_DEBUG 0