Adding a bunch of profiling tracers.

This commit is contained in:
Ben Vanik 2014-05-28 19:19:39 -07:00
parent beb9bd11f0
commit c1812406f5
34 changed files with 156 additions and 13 deletions

View File

@ -66,6 +66,8 @@ int X64Assembler::Assemble(
FunctionInfo* symbol_info, HIRBuilder* builder, FunctionInfo* symbol_info, HIRBuilder* builder,
uint32_t debug_info_flags, DebugInfo* debug_info, uint32_t debug_info_flags, DebugInfo* debug_info,
Function** out_function) { Function** out_function) {
SCOPE_profile_cpu_f("alloy");
int result = 0; int result = 0;
// Lower HIR -> x64. // Lower HIR -> x64.

View File

@ -75,6 +75,8 @@ int X64CodeCache::Initialize() {
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size, void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
size_t stack_size) { size_t stack_size) {
SCOPE_profile_cpu_f("alloy");
// Add unwind info into the allocation size. Keep things 16b aligned. // Add unwind info into the allocation size. Keep things 16b aligned.
code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16); code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);

View File

@ -77,6 +77,8 @@ int X64Emitter::Emit(
HIRBuilder* builder, HIRBuilder* builder,
uint32_t debug_info_flags, runtime::DebugInfo* debug_info, uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
void*& out_code_address, size_t& out_code_size) { void*& out_code_address, size_t& out_code_size) {
SCOPE_profile_cpu_f("alloy");
// Reset. // Reset.
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) { if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
source_map_count_ = 0; source_map_count_ = 0;

View File

@ -49,6 +49,8 @@ void Compiler::Reset() {
} }
int Compiler::Compile(HIRBuilder* builder) { int Compiler::Compile(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they // TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they
// stop changing things, etc. // stop changing things, etc.
for (auto it = passes_.begin(); it != passes_.end(); ++it) { for (auto it = passes_.begin(); it != passes_.end(); ++it) {

View File

@ -23,6 +23,8 @@ ConstantPropagationPass::~ConstantPropagationPass() {
} }
int ConstantPropagationPass::Run(HIRBuilder* builder) { int ConstantPropagationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Once ContextPromotion has run there will likely be a whole slew of // Once ContextPromotion has run there will likely be a whole slew of
// constants that can be pushed through the function. // constants that can be pushed through the function.
// Example: // Example:

View File

@ -51,6 +51,8 @@ int ContextPromotionPass::Initialize(Compiler* compiler) {
} }
int ContextPromotionPass::Run(HIRBuilder* builder) { int ContextPromotionPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Like mem2reg, but because context memory is unaliasable it's easier to // Like mem2reg, but because context memory is unaliasable it's easier to
// check and convert LoadContext/StoreContext into value operations. // check and convert LoadContext/StoreContext into value operations.
// Example of load->value promotion: // Example of load->value promotion:

View File

@ -30,6 +30,8 @@ ControlFlowAnalysisPass::~ControlFlowAnalysisPass() {
} }
int ControlFlowAnalysisPass::Run(HIRBuilder* builder) { int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// TODO(benvanik): reset edges for all blocks? Needed to be re-runnable. // TODO(benvanik): reset edges for all blocks? Needed to be re-runnable.
// Add edges. // Add edges.

View File

@ -36,6 +36,8 @@ DataFlowAnalysisPass::~DataFlowAnalysisPass() {
} }
int DataFlowAnalysisPass::Run(HIRBuilder* builder) { int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Linearize blocks so that we can detect cycles and propagate dependencies. // Linearize blocks so that we can detect cycles and propagate dependencies.
uint32_t block_count = LinearizeBlocks(builder); uint32_t block_count = LinearizeBlocks(builder);

View File

@ -23,6 +23,8 @@ DeadCodeEliminationPass::~DeadCodeEliminationPass() {
} }
int DeadCodeEliminationPass::Run(HIRBuilder* builder) { int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// ContextPromotion/DSE will likely leave around a lot of dead statements. // ContextPromotion/DSE will likely leave around a lot of dead statements.
// Code generated for comparison/testing produces many unused statements and // Code generated for comparison/testing produces many unused statements and
// with proper use analysis it should be possible to remove most of them: // with proper use analysis it should be possible to remove most of them:

View File

@ -30,6 +30,8 @@ FinalizationPass::~FinalizationPass() {
} }
int FinalizationPass::Run(HIRBuilder* builder) { int FinalizationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Process the HIR and prepare it for lowering. // Process the HIR and prepare it for lowering.
// After this is done the HIR should be ready for emitting. // After this is done the HIR should be ready for emitting.

View File

@ -59,6 +59,8 @@ RegisterAllocationPass::~RegisterAllocationPass() {
} }
int RegisterAllocationPass::Run(HIRBuilder* builder) { int RegisterAllocationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Simple per-block allocator that operates on SSA form. // Simple per-block allocator that operates on SSA form.
// Registers do not move across blocks, though this could be // Registers do not move across blocks, though this could be
// optimized with some intra-block analysis (dominators/etc). // optimized with some intra-block analysis (dominators/etc).

View File

@ -23,6 +23,8 @@ SimplificationPass::~SimplificationPass() {
} }
int SimplificationPass::Run(HIRBuilder* builder) { int SimplificationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
EliminateConversions(builder); EliminateConversions(builder);
SimplifyAssignments(builder); SimplifyAssignments(builder);
return 0; return 0;

View File

@ -30,6 +30,8 @@ ValidationPass::~ValidationPass() {
} }
int ValidationPass::Run(HIRBuilder* builder) { int ValidationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
StringBuffer str; StringBuffer str;
builder->Dump(&str); builder->Dump(&str);
printf(str.GetString()); printf(str.GetString());

View File

@ -53,6 +53,8 @@ void ValueReductionPass::ComputeLastUse(Value* value) {
} }
int ValueReductionPass::Run(HIRBuilder* builder) { int ValueReductionPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Walk each block and reuse variable ordinals as much as possible. // Walk each block and reuse variable ordinals as much as possible.
llvm::BitVector ordinals(builder->max_value_ordinal()); llvm::BitVector ordinals(builder->max_value_ordinal());

View File

@ -44,6 +44,8 @@ void PPCHIRBuilder::Reset() {
} }
int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) { int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
SCOPE_profile_cpu_f("alloy");
Memory* memory = frontend_->memory(); Memory* memory = frontend_->memory();
const uint8_t* p = memory->membase(); const uint8_t* p = memory->membase();

View File

@ -38,6 +38,8 @@ bool PPCScanner::IsRestGprLr(uint64_t address) {
} }
int PPCScanner::FindExtents(FunctionInfo* symbol_info) { int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
SCOPE_profile_cpu_f("alloy");
// This is a simple basic block analyizer. It walks the start address to the // This is a simple basic block analyizer. It walks the start address to the
// end address looking for branches. Each span of instructions between // end address looking for branches. Each span of instructions between
// branches is considered a basic block. When the last blr (that has no // branches is considered a basic block. When the last blr (that has no
@ -286,6 +288,8 @@ int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
} }
std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) { std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) {
SCOPE_profile_cpu_f("alloy");
Memory* memory = frontend_->memory(); Memory* memory = frontend_->memory();
const uint8_t* p = memory->membase(); const uint8_t* p = memory->membase();

View File

@ -86,6 +86,8 @@ int PPCTranslator::Translate(
FunctionInfo* symbol_info, FunctionInfo* symbol_info,
uint32_t debug_info_flags, uint32_t debug_info_flags,
Function** out_function) { Function** out_function) {
SCOPE_profile_cpu_f("alloy");
// Scan the function to find its extents. We only need to do this if we // Scan the function to find its extents. We only need to do this if we
// haven't already been provided with them from some other source. // haven't already been provided with them from some other source.
if (!symbol_info->has_end_address()) { if (!symbol_info->has_end_address()) {

View File

@ -51,6 +51,8 @@ void HIRBuilder::Reset() {
} }
int HIRBuilder::Finalize() { int HIRBuilder::Finalize() {
SCOPE_profile_cpu_f("alloy");
// Scan blocks in order and add fallthrough branches. These are needed for // Scan blocks in order and add fallthrough branches. These are needed for
// analysis passes to work. We may have also added blocks out of order and // analysis passes to work. We may have also added blocks out of order and
// need to ensure they fall through in the right order. // need to ensure they fall through in the right order.
@ -141,6 +143,8 @@ void HIRBuilder::DumpOp(
} }
void HIRBuilder::Dump(StringBuffer* str) { void HIRBuilder::Dump(StringBuffer* str) {
SCOPE_profile_cpu_f("alloy");
if (attributes_) { if (attributes_) {
str->Append("; attributes = %.8X\n", attributes_); str->Append("; attributes = %.8X\n", attributes_);
} }

View File

@ -75,6 +75,8 @@ Entry::Status EntryTable::GetOrCreate(uint64_t address, Entry** out_entry) {
} }
std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) { std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) {
SCOPE_profile_cpu_f("alloy");
std::vector<Function*> fns; std::vector<Function*> fns;
LockMutex(lock_); LockMutex(lock_);
for (auto it = map_.begin(); it != map_.end(); ++it) { for (auto it = map_.begin(); it != map_.end(); ++it) {

View File

@ -74,6 +74,8 @@ Breakpoint* Function::FindBreakpoint(uint64_t address) {
} }
int Function::Call(ThreadState* thread_state, uint64_t return_address) { int Function::Call(ThreadState* thread_state, uint64_t return_address) {
SCOPE_profile_cpu_f("alloy");
ThreadState* original_thread_state = ThreadState::Get(); ThreadState* original_thread_state = ThreadState::Get();
if (original_thread_state != thread_state) { if (original_thread_state != thread_state) {
ThreadState::Bind(thread_state); ThreadState::Bind(thread_state);

View File

@ -161,6 +161,8 @@ SymbolInfo::Status Module::DefineVariable(VariableInfo* symbol_info) {
} }
void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) { void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
SCOPE_profile_cpu_f("alloy");
LockMutex(lock_); LockMutex(lock_);
for (auto it = list_.begin(); it != list_.end(); ++it) { for (auto it = list_.begin(); it != list_.end(); ++it) {
SymbolInfo* symbol_info = *it; SymbolInfo* symbol_info = *it;
@ -174,6 +176,8 @@ void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
void Module::ForEachFunction(size_t since, size_t& version, void Module::ForEachFunction(size_t since, size_t& version,
std::function<void (FunctionInfo*)> callback) { std::function<void (FunctionInfo*)> callback) {
SCOPE_profile_cpu_f("alloy");
LockMutex(lock_); LockMutex(lock_);
size_t count = list_.size(); size_t count = list_.size();
version = count; version = count;

View File

@ -159,6 +159,8 @@ std::vector<Function*> Runtime::FindFunctionsWithAddress(uint64_t address) {
} }
int Runtime::ResolveFunction(uint64_t address, Function** out_function) { int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
SCOPE_profile_cpu_f("alloy");
*out_function = NULL; *out_function = NULL;
Entry* entry; Entry* entry;
Entry::Status status = entry_table_.GetOrCreate(address, &entry); Entry::Status status = entry_table_.GetOrCreate(address, &entry);
@ -192,6 +194,8 @@ int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
int Runtime::LookupFunctionInfo( int Runtime::LookupFunctionInfo(
uint64_t address, FunctionInfo** out_symbol_info) { uint64_t address, FunctionInfo** out_symbol_info) {
SCOPE_profile_cpu_f("alloy");
*out_symbol_info = NULL; *out_symbol_info = NULL;
// TODO(benvanik): fast reject invalid addresses/log errors. // TODO(benvanik): fast reject invalid addresses/log errors.
@ -220,6 +224,8 @@ int Runtime::LookupFunctionInfo(
int Runtime::LookupFunctionInfo(Module* module, uint64_t address, int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
FunctionInfo** out_symbol_info) { FunctionInfo** out_symbol_info) {
SCOPE_profile_cpu_f("alloy");
// Atomic create/lookup symbol in module. // Atomic create/lookup symbol in module.
// If we get back the NEW flag we must declare it now. // If we get back the NEW flag we must declare it now.
FunctionInfo* symbol_info = NULL; FunctionInfo* symbol_info = NULL;
@ -241,6 +247,8 @@ int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
int Runtime::DemandFunction( int Runtime::DemandFunction(
FunctionInfo* symbol_info, Function** out_function) { FunctionInfo* symbol_info, Function** out_function) {
SCOPE_profile_cpu_f("alloy");
*out_function = NULL; *out_function = NULL;
// Lock function for generation. If it's already being generated // Lock function for generation. If it's already being generated

View File

@ -82,21 +82,26 @@ void AudioSystem::ThreadStart() {
if (result == WAIT_FAILED) { if (result == WAIT_FAILED) {
DWORD err = GetLastError(); DWORD err = GetLastError();
XEASSERTALWAYS(); XEASSERTALWAYS();
break;
} }
size_t pumped = 0; size_t pumped = 0;
if (result >= WAIT_OBJECT_0 && result <= WAIT_OBJECT_0 + (maximum_client_count_ - 1)) { {
size_t index = result - WAIT_OBJECT_0; SCOPE_profile_cpu_i("apu", "Pump");
do { if (result >= WAIT_OBJECT_0 && result <= WAIT_OBJECT_0 + (maximum_client_count_ - 1)) {
xe_mutex_lock(lock_); size_t index = result - WAIT_OBJECT_0;
uint32_t client_callback = clients_[index].callback; do {
uint32_t client_callback_arg = clients_[index].wrapped_callback_arg; xe_mutex_lock(lock_);
xe_mutex_unlock(lock_); uint32_t client_callback = clients_[index].callback;
if (client_callback) { uint32_t client_callback_arg = clients_[index].wrapped_callback_arg;
processor->Execute(thread_state_, client_callback, client_callback_arg, 0); xe_mutex_unlock(lock_);
} if (client_callback) {
pumped++; processor->Execute(thread_state_, client_callback, client_callback_arg, 0);
index++; }
} while (index < maximum_client_count_ && WaitForSingleObject(client_wait_handles_[index], 0) == WAIT_OBJECT_0); pumped++;
index++;
} while (index < maximum_client_count_ && WaitForSingleObject(client_wait_handles_[index], 0) == WAIT_OBJECT_0);
}
} }
if (!running_) { if (!running_) {
@ -104,6 +109,7 @@ void AudioSystem::ThreadStart() {
} }
if (!pumped) { if (!pumped) {
SCOPE_profile_cpu_i("apu", "Sleep");
Sleep(500); Sleep(500);
} }
} }
@ -126,6 +132,8 @@ void AudioSystem::Shutdown() {
X_STATUS AudioSystem::RegisterClient( X_STATUS AudioSystem::RegisterClient(
uint32_t callback, uint32_t callback_arg, size_t* out_index) { uint32_t callback, uint32_t callback_arg, size_t* out_index) {
SCOPE_profile_cpu_f("apu");
XEASSERTTRUE(unused_clients_.size()); XEASSERTTRUE(unused_clients_.size());
xe_mutex_lock(lock_); xe_mutex_lock(lock_);
@ -157,6 +165,8 @@ X_STATUS AudioSystem::RegisterClient(
} }
void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) { void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) {
SCOPE_profile_cpu_f("apu");
xe_mutex_lock(lock_); xe_mutex_lock(lock_);
XEASSERTTRUE(index < maximum_client_count_); XEASSERTTRUE(index < maximum_client_count_);
XEASSERTTRUE(clients_[index].driver != NULL); XEASSERTTRUE(clients_[index].driver != NULL);
@ -166,6 +176,8 @@ void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) {
} }
void AudioSystem::UnregisterClient(size_t index) { void AudioSystem::UnregisterClient(size_t index) {
SCOPE_profile_cpu_f("apu");
xe_mutex_lock(lock_); xe_mutex_lock(lock_);
XEASSERTTRUE(index < maximum_client_count_); XEASSERTTRUE(index < maximum_client_count_);
DestroyDriver(clients_[index].driver); DestroyDriver(clients_[index].driver);

View File

@ -121,6 +121,8 @@ void XAudio2AudioDriver::Initialize() {
} }
void XAudio2AudioDriver::SubmitFrame(uint32_t frame_ptr) { void XAudio2AudioDriver::SubmitFrame(uint32_t frame_ptr) {
SCOPE_profile_cpu_f("apu");
// Process samples! They are big-endian floats. // Process samples! They are big-endian floats.
HRESULT hr; HRESULT hr;

View File

@ -147,6 +147,8 @@ void Processor::AddRegisterAccessCallbacks(
} }
int Processor::Execute(XenonThreadState* thread_state, uint64_t address) { int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
SCOPE_profile_cpu_f("cpu");
// Attempt to get the function. // Attempt to get the function.
Function* fn; Function* fn;
if (runtime_->ResolveFunction(address, &fn)) { if (runtime_->ResolveFunction(address, &fn)) {
@ -171,6 +173,8 @@ int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
uint64_t Processor::Execute( uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0) { XenonThreadState* thread_state, uint64_t address, uint64_t arg0) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context(); PPCContext* context = thread_state->context();
context->r[3] = arg0; context->r[3] = arg0;
if (Execute(thread_state, address)) { if (Execute(thread_state, address)) {
@ -182,6 +186,8 @@ uint64_t Processor::Execute(
uint64_t Processor::Execute( uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0, XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
uint64_t arg1) { uint64_t arg1) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context(); PPCContext* context = thread_state->context();
context->r[3] = arg0; context->r[3] = arg0;
context->r[4] = arg1; context->r[4] = arg1;

View File

@ -34,6 +34,8 @@ D3D11GeometryShader::~D3D11GeometryShader() {
} }
int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) { int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
SCOPE_profile_cpu_f("gpu");
if (handle_) { if (handle_) {
return 0; return 0;
} }
@ -74,6 +76,8 @@ int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
} }
ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) { ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): pick shared runtime mode defines. // TODO(benvanik): pick shared runtime mode defines.
D3D10_SHADER_MACRO defines[] = { D3D10_SHADER_MACRO defines[] = {
"TEST_DEFINE", "1", "TEST_DEFINE", "1",
@ -161,6 +165,7 @@ D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() {
int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output) { alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) { if (D3D11GeometryShader::Generate(vertex_shader, output)) {
return 1; return 1;
} }
@ -215,6 +220,7 @@ D3D11RectListGeometryShader::~D3D11RectListGeometryShader() {
int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output) { alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) { if (D3D11GeometryShader::Generate(vertex_shader, output)) {
return 1; return 1;
} }
@ -259,6 +265,7 @@ D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() {
int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader, int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output) { alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) { if (D3D11GeometryShader::Generate(vertex_shader, output)) {
return 1; return 1;
} }

View File

@ -190,6 +190,8 @@ void D3D11GraphicsDriver::SetShader(
} }
int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) { int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
// Ignore copies. // Ignore copies.
@ -296,6 +298,8 @@ void D3D11GraphicsDriver::DrawIndexBuffer(
XE_GPU_PRIMITIVE_TYPE prim_type, XE_GPU_PRIMITIVE_TYPE prim_type,
bool index_32bit, uint32_t index_count, bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness) { uint32_t index_base, uint32_t index_size, uint32_t endianness) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
XETRACED3D("D3D11: draw indexed %d (%d indicies) from %.8X", XETRACED3D("D3D11: draw indexed %d (%d indicies) from %.8X",
@ -321,6 +325,8 @@ void D3D11GraphicsDriver::DrawIndexBuffer(
void D3D11GraphicsDriver::DrawIndexAuto( void D3D11GraphicsDriver::DrawIndexAuto(
XE_GPU_PRIMITIVE_TYPE prim_type, XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) { uint32_t index_count) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
XETRACED3D("D3D11: draw indexed %d (%d indicies)", XETRACED3D("D3D11: draw indexed %d (%d indicies)",
@ -346,6 +352,8 @@ int D3D11GraphicsDriver::RebuildRenderTargets(
return 0; return 0;
} }
SCOPE_profile_cpu_f("gpu");
// Remove old versions. // Remove old versions.
for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) { for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
auto& cb = render_targets_.color_buffers[n]; auto& cb = render_targets_.color_buffers[n];
@ -426,6 +434,8 @@ int D3D11GraphicsDriver::RebuildRenderTargets(
} }
int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
SCOPE_profile_cpu_f("gpu");
// Most information comes from here: // Most information comes from here:
// https://chromium.googlesource.com/chromiumos/third_party/mesa/+/6173cc19c45d92ef0b7bc6aa008aa89bb29abbda/src/gallium/drivers/freedreno/freedreno_zsa.c // https://chromium.googlesource.com/chromiumos/third_party/mesa/+/6173cc19c45d92ef0b7bc6aa008aa89bb29abbda/src/gallium/drivers/freedreno/freedreno_zsa.c
// http://cgit.freedesktop.org/mesa/mesa/diff/?id=aac7f06ad843eaa696363e8e9c7781ca30cb4914 // http://cgit.freedesktop.org/mesa/mesa/diff/?id=aac7f06ad843eaa696363e8e9c7781ca30cb4914
@ -768,6 +778,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
} }
int D3D11GraphicsDriver::UpdateConstantBuffers() { int D3D11GraphicsDriver::UpdateConstantBuffers() {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
D3D11_MAPPED_SUBRESOURCE res; D3D11_MAPPED_SUBRESOURCE res;
@ -799,6 +811,8 @@ int D3D11GraphicsDriver::UpdateConstantBuffers() {
} }
int D3D11GraphicsDriver::BindShaders() { int D3D11GraphicsDriver::BindShaders() {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
xe_gpu_program_cntl_t program_cntl; xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
@ -892,6 +906,8 @@ int D3D11GraphicsDriver::BindShaders() {
} }
int D3D11GraphicsDriver::PrepareFetchers() { int D3D11GraphicsDriver::PrepareFetchers() {
SCOPE_profile_cpu_f("gpu");
// Input assembly. // Input assembly.
XEASSERTNOTNULL(state_.vertex_shader); XEASSERTNOTNULL(state_.vertex_shader);
auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs(); auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs();
@ -934,6 +950,8 @@ int D3D11GraphicsDriver::PrepareFetchers() {
} }
int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) { int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r];
@ -1009,6 +1027,8 @@ int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
} }
int D3D11GraphicsDriver::PrepareTextureFetchers() { int D3D11GraphicsDriver::PrepareTextureFetchers() {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
for (int n = 0; n < XECOUNT(state_.texture_fetchers); n++) { for (int n = 0; n < XECOUNT(state_.texture_fetchers); n++) {
@ -1275,6 +1295,8 @@ int D3D11GraphicsDriver::FetchTexture1D(
xe_gpu_texture_fetch_t& fetch, xe_gpu_texture_fetch_t& fetch,
TextureInfo& info, TextureInfo& info,
ID3D11Resource** out_texture) { ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
uint32_t address = (fetch.address << 12) + address_translation_; uint32_t address = (fetch.address << 12) + address_translation_;
uint32_t width = 1 + fetch.size_1d.width; uint32_t width = 1 + fetch.size_1d.width;
@ -1299,6 +1321,8 @@ int D3D11GraphicsDriver::FetchTexture1D(
} }
XEFORCEINLINE void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, XE_GPU_ENDIAN endianness) { XEFORCEINLINE void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, XE_GPU_ENDIAN endianness) {
SCOPE_profile_cpu_f("gpu");
switch (endianness) { switch (endianness) {
case XE_GPU_ENDIAN_8IN16: case XE_GPU_ENDIAN_8IN16:
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) { for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
@ -1344,6 +1368,8 @@ int D3D11GraphicsDriver::FetchTexture2D(
xe_gpu_texture_fetch_t& fetch, xe_gpu_texture_fetch_t& fetch,
TextureInfo& info, TextureInfo& info,
ID3D11Resource** out_texture) { ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
XEASSERTTRUE(fetch.dimension == 1); XEASSERTTRUE(fetch.dimension == 1);
uint32_t address = (fetch.address << 12) + address_translation_; uint32_t address = (fetch.address << 12) + address_translation_;
@ -1448,6 +1474,8 @@ int D3D11GraphicsDriver::FetchTexture3D(
xe_gpu_texture_fetch_t& fetch, xe_gpu_texture_fetch_t& fetch,
TextureInfo& info, TextureInfo& info,
ID3D11Resource** out_texture) { ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
XELOGE("D3D11: FetchTexture2D not yet implemented"); XELOGE("D3D11: FetchTexture2D not yet implemented");
XEASSERTALWAYS(); XEASSERTALWAYS();
return 1; return 1;
@ -1470,6 +1498,8 @@ int D3D11GraphicsDriver::FetchTextureCube(
xe_gpu_texture_fetch_t& fetch, xe_gpu_texture_fetch_t& fetch,
TextureInfo& info, TextureInfo& info,
ID3D11Resource** out_texture) { ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
XELOGE("D3D11: FetchTextureCube not yet implemented"); XELOGE("D3D11: FetchTextureCube not yet implemented");
XEASSERTALWAYS(); XEASSERTALWAYS();
return 1; return 1;
@ -1477,6 +1507,7 @@ int D3D11GraphicsDriver::FetchTextureCube(
int D3D11GraphicsDriver::PrepareTextureSampler( int D3D11GraphicsDriver::PrepareTextureSampler(
xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) { xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) {
SCOPE_profile_cpu_f("gpu");
auto& fetcher = state_.texture_fetchers[desc.fetch_slot]; auto& fetcher = state_.texture_fetchers[desc.fetch_slot];
auto& info = fetcher.info; auto& info = fetcher.info;
@ -1588,6 +1619,8 @@ int D3D11GraphicsDriver::PrepareTextureSampler(
int D3D11GraphicsDriver::PrepareIndexBuffer( int D3D11GraphicsDriver::PrepareIndexBuffer(
bool index_32bit, uint32_t index_count, bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness) { uint32_t index_base, uint32_t index_size, uint32_t endianness) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
uint32_t address = index_base + address_translation_; uint32_t address = index_base + address_translation_;
@ -1634,6 +1667,8 @@ int D3D11GraphicsDriver::PrepareIndexBuffer(
} }
int D3D11GraphicsDriver::Resolve() { int D3D11GraphicsDriver::Resolve() {
SCOPE_profile_cpu_f("gpu");
// No clue how this is supposed to work yet. // No clue how this is supposed to work yet.
ID3D11Texture2D* back_buffer = 0; ID3D11Texture2D* back_buffer = 0;
swap_chain_->GetBuffer(0, __uuidof(ID3D11Texture2D), swap_chain_->GetBuffer(0, __uuidof(ID3D11Texture2D),

View File

@ -29,6 +29,7 @@ void __stdcall D3D11GraphicsSystemVsyncCallback(
thread_name_set = true; thread_name_set = true;
Profiler::ThreadEnter("VsyncTimer"); Profiler::ThreadEnter("VsyncTimer");
} }
SCOPE_profile_cpu_f("gpu");
gs->MarkVblank(); gs->MarkVblank();
gs->DispatchInterruptCallback(0); gs->DispatchInterruptCallback(0);
@ -151,6 +152,8 @@ void D3D11GraphicsSystem::Initialize() {
} }
void D3D11GraphicsSystem::Pump() { void D3D11GraphicsSystem::Pump() {
SCOPE_profile_cpu_f("gpu");
if (swap_pending_) { if (swap_pending_) {
swap_pending_ = false; swap_pending_ = false;

View File

@ -145,6 +145,8 @@ void D3D11Shader::set_translated_src(char* value) {
} }
ID3D10Blob* D3D11Shader::Compile(const char* shader_source) { ID3D10Blob* D3D11Shader::Compile(const char* shader_source) {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): pick shared runtime mode defines. // TODO(benvanik): pick shared runtime mode defines.
D3D10_SHADER_MACRO defines[] = { D3D10_SHADER_MACRO defines[] = {
"TEST_DEFINE", "1", "TEST_DEFINE", "1",
@ -256,6 +258,7 @@ D3D11VertexShader::~D3D11VertexShader() {
} }
int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
SCOPE_profile_cpu_f("gpu");
if (handle_) { if (handle_) {
return 0; return 0;
} }
@ -411,6 +414,8 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
} }
const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) {
SCOPE_profile_cpu_f("gpu");
Output* output = new Output(); Output* output = new Output();
xe_gpu_translate_ctx_t ctx; xe_gpu_translate_ctx_t ctx;
ctx.output = output; ctx.output = output;
@ -599,6 +604,7 @@ D3D11PixelShader::~D3D11PixelShader() {
int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl, int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
D3D11VertexShader* input_shader) { D3D11VertexShader* input_shader) {
SCOPE_profile_cpu_f("gpu");
if (handle_) { if (handle_) {
return 0; return 0;
} }
@ -641,6 +647,7 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
const char* D3D11PixelShader::Translate( const char* D3D11PixelShader::Translate(
xe_gpu_program_cntl_t* program_cntl, D3D11VertexShader* input_shader) { xe_gpu_program_cntl_t* program_cntl, D3D11VertexShader* input_shader) {
SCOPE_profile_cpu_f("gpu");
Output* output = new Output(); Output* output = new Output();
xe_gpu_translate_ctx_t ctx; xe_gpu_translate_ctx_t ctx;
ctx.output = output; ctx.output = output;

View File

@ -31,6 +31,7 @@ Shader* D3D11ShaderCache::CreateCore(
xenos::XE_GPU_SHADER_TYPE type, xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length, const uint8_t* src_ptr, size_t length,
uint64_t hash) { uint64_t hash) {
SCOPE_profile_cpu_f("gpu");
switch (type) { switch (type) {
case XE_GPU_SHADER_TYPE_VERTEX: case XE_GPU_SHADER_TYPE_VERTEX:
return new D3D11VertexShader( return new D3D11VertexShader(

View File

@ -114,6 +114,8 @@ int D3D11Window::Initialize(const char* title, uint32_t width, uint32_t height)
} }
void D3D11Window::Swap() { void D3D11Window::Swap() {
SCOPE_profile_cpu_f("gpu");
// Present profiler. // Present profiler.
context_->OMSetRenderTargets(1, &render_target_view_, NULL); context_->OMSetRenderTargets(1, &render_target_view_, NULL);
Profiler::Present(); Profiler::Present();

View File

@ -125,6 +125,8 @@ void RingBufferWorker::Pump() {
void RingBufferWorker::ExecutePrimaryBuffer( void RingBufferWorker::ExecutePrimaryBuffer(
uint32_t start_index, uint32_t end_index) { uint32_t start_index, uint32_t end_index) {
SCOPE_profile_cpu_f("gpu");
// Adjust pointer base. // Adjust pointer base.
uint32_t ptr = primary_buffer_ptr_ + start_index * 4; uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF); ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);

View File

@ -55,6 +55,8 @@ Shader* ShaderCache::Find(
Shader* ShaderCache::FindOrCreate( Shader* ShaderCache::FindOrCreate(
XE_GPU_SHADER_TYPE type, XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length) { const uint8_t* src_ptr, size_t length) {
SCOPE_profile_cpu_f("gpu");
uint64_t hash = Hash(src_ptr, length); uint64_t hash = Hash(src_ptr, length);
unordered_map<uint64_t, Shader*>::iterator it = map_.find(hash); unordered_map<uint64_t, Shader*>::iterator it = map_.find(hash);
if (it != map_.end()) { if (it != map_.end()) {

View File

@ -42,6 +42,8 @@ void InputSystem::AddDriver(InputDriver* driver) {
X_RESULT InputSystem::GetCapabilities( X_RESULT InputSystem::GetCapabilities(
uint32_t user_index, uint32_t flags, X_INPUT_CAPABILITIES& out_caps) { uint32_t user_index, uint32_t flags, X_INPUT_CAPABILITIES& out_caps) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) { for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it; InputDriver* driver = *it;
if (XSUCCEEDED(driver->GetCapabilities(user_index, flags, out_caps))) { if (XSUCCEEDED(driver->GetCapabilities(user_index, flags, out_caps))) {
@ -52,6 +54,8 @@ X_RESULT InputSystem::GetCapabilities(
} }
X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) { X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) { for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it; InputDriver* driver = *it;
if (driver->GetState(user_index, out_state) == X_ERROR_SUCCESS) { if (driver->GetState(user_index, out_state) == X_ERROR_SUCCESS) {
@ -63,6 +67,8 @@ X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) {
X_RESULT InputSystem::SetState( X_RESULT InputSystem::SetState(
uint32_t user_index, X_INPUT_VIBRATION& vibration) { uint32_t user_index, X_INPUT_VIBRATION& vibration) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) { for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it; InputDriver* driver = *it;
if (XSUCCEEDED(driver->SetState(user_index, vibration))) { if (XSUCCEEDED(driver->SetState(user_index, vibration))) {
@ -74,6 +80,8 @@ X_RESULT InputSystem::SetState(
X_RESULT InputSystem::GetKeystroke( X_RESULT InputSystem::GetKeystroke(
uint32_t user_index, uint32_t flags, X_INPUT_KEYSTROKE& out_keystroke) { uint32_t user_index, uint32_t flags, X_INPUT_KEYSTROKE& out_keystroke) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) { for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it; InputDriver* driver = *it;
if (XSUCCEEDED(driver->GetKeystroke(user_index, flags, out_keystroke))) { if (XSUCCEEDED(driver->GetKeystroke(user_index, flags, out_keystroke))) {