Adding a bunch of profiling tracers.

This commit is contained in:
Ben Vanik 2014-05-28 19:19:39 -07:00
parent beb9bd11f0
commit c1812406f5
34 changed files with 156 additions and 13 deletions

View File

@ -66,6 +66,8 @@ int X64Assembler::Assemble(
FunctionInfo* symbol_info, HIRBuilder* builder,
uint32_t debug_info_flags, DebugInfo* debug_info,
Function** out_function) {
SCOPE_profile_cpu_f("alloy");
int result = 0;
// Lower HIR -> x64.

View File

@ -75,6 +75,8 @@ int X64CodeCache::Initialize() {
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
size_t stack_size) {
SCOPE_profile_cpu_f("alloy");
// Add unwind info into the allocation size. Keep things 16b aligned.
code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);

View File

@ -77,6 +77,8 @@ int X64Emitter::Emit(
HIRBuilder* builder,
uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
void*& out_code_address, size_t& out_code_size) {
SCOPE_profile_cpu_f("alloy");
// Reset.
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
source_map_count_ = 0;

View File

@ -49,6 +49,8 @@ void Compiler::Reset() {
}
int Compiler::Compile(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they
// stop changing things, etc.
for (auto it = passes_.begin(); it != passes_.end(); ++it) {

View File

@ -23,6 +23,8 @@ ConstantPropagationPass::~ConstantPropagationPass() {
}
int ConstantPropagationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Once ContextPromotion has run there will likely be a whole slew of
// constants that can be pushed through the function.
// Example:

View File

@ -51,6 +51,8 @@ int ContextPromotionPass::Initialize(Compiler* compiler) {
}
int ContextPromotionPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Like mem2reg, but because context memory is unaliasable it's easier to
// check and convert LoadContext/StoreContext into value operations.
// Example of load->value promotion:

View File

@ -30,6 +30,8 @@ ControlFlowAnalysisPass::~ControlFlowAnalysisPass() {
}
int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// TODO(benvanik): reset edges for all blocks? Needed to be re-runnable.
// Add edges.

View File

@ -36,6 +36,8 @@ DataFlowAnalysisPass::~DataFlowAnalysisPass() {
}
int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Linearize blocks so that we can detect cycles and propagate dependencies.
uint32_t block_count = LinearizeBlocks(builder);

View File

@ -23,6 +23,8 @@ DeadCodeEliminationPass::~DeadCodeEliminationPass() {
}
int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// ContextPromotion/DSE will likely leave around a lot of dead statements.
// Code generated for comparison/testing produces many unused statements and
// with proper use analysis it should be possible to remove most of them:

View File

@ -30,6 +30,8 @@ FinalizationPass::~FinalizationPass() {
}
int FinalizationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Process the HIR and prepare it for lowering.
// After this is done the HIR should be ready for emitting.

View File

@ -59,6 +59,8 @@ RegisterAllocationPass::~RegisterAllocationPass() {
}
int RegisterAllocationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Simple per-block allocator that operates on SSA form.
// Registers do not move across blocks, though this could be
// optimized with some intra-block analysis (dominators/etc).

View File

@ -23,6 +23,8 @@ SimplificationPass::~SimplificationPass() {
}
int SimplificationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
EliminateConversions(builder);
SimplifyAssignments(builder);
return 0;

View File

@ -30,6 +30,8 @@ ValidationPass::~ValidationPass() {
}
int ValidationPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
StringBuffer str;
builder->Dump(&str);
printf(str.GetString());

View File

@ -53,6 +53,8 @@ void ValueReductionPass::ComputeLastUse(Value* value) {
}
int ValueReductionPass::Run(HIRBuilder* builder) {
SCOPE_profile_cpu_f("alloy");
// Walk each block and reuse variable ordinals as much as possible.
llvm::BitVector ordinals(builder->max_value_ordinal());

View File

@ -44,6 +44,8 @@ void PPCHIRBuilder::Reset() {
}
int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
SCOPE_profile_cpu_f("alloy");
Memory* memory = frontend_->memory();
const uint8_t* p = memory->membase();

View File

@ -38,6 +38,8 @@ bool PPCScanner::IsRestGprLr(uint64_t address) {
}
int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
SCOPE_profile_cpu_f("alloy");
// This is a simple basic block analyizer. It walks the start address to the
// end address looking for branches. Each span of instructions between
// branches is considered a basic block. When the last blr (that has no
@ -286,6 +288,8 @@ int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
}
std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) {
SCOPE_profile_cpu_f("alloy");
Memory* memory = frontend_->memory();
const uint8_t* p = memory->membase();

View File

@ -86,6 +86,8 @@ int PPCTranslator::Translate(
FunctionInfo* symbol_info,
uint32_t debug_info_flags,
Function** out_function) {
SCOPE_profile_cpu_f("alloy");
// Scan the function to find its extents. We only need to do this if we
// haven't already been provided with them from some other source.
if (!symbol_info->has_end_address()) {

View File

@ -51,6 +51,8 @@ void HIRBuilder::Reset() {
}
int HIRBuilder::Finalize() {
SCOPE_profile_cpu_f("alloy");
// Scan blocks in order and add fallthrough branches. These are needed for
// analysis passes to work. We may have also added blocks out of order and
// need to ensure they fall through in the right order.
@ -141,6 +143,8 @@ void HIRBuilder::DumpOp(
}
void HIRBuilder::Dump(StringBuffer* str) {
SCOPE_profile_cpu_f("alloy");
if (attributes_) {
str->Append("; attributes = %.8X\n", attributes_);
}

View File

@ -75,6 +75,8 @@ Entry::Status EntryTable::GetOrCreate(uint64_t address, Entry** out_entry) {
}
std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) {
SCOPE_profile_cpu_f("alloy");
std::vector<Function*> fns;
LockMutex(lock_);
for (auto it = map_.begin(); it != map_.end(); ++it) {

View File

@ -74,6 +74,8 @@ Breakpoint* Function::FindBreakpoint(uint64_t address) {
}
int Function::Call(ThreadState* thread_state, uint64_t return_address) {
SCOPE_profile_cpu_f("alloy");
ThreadState* original_thread_state = ThreadState::Get();
if (original_thread_state != thread_state) {
ThreadState::Bind(thread_state);

View File

@ -161,6 +161,8 @@ SymbolInfo::Status Module::DefineVariable(VariableInfo* symbol_info) {
}
void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
SCOPE_profile_cpu_f("alloy");
LockMutex(lock_);
for (auto it = list_.begin(); it != list_.end(); ++it) {
SymbolInfo* symbol_info = *it;
@ -174,6 +176,8 @@ void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
void Module::ForEachFunction(size_t since, size_t& version,
std::function<void (FunctionInfo*)> callback) {
SCOPE_profile_cpu_f("alloy");
LockMutex(lock_);
size_t count = list_.size();
version = count;

View File

@ -159,6 +159,8 @@ std::vector<Function*> Runtime::FindFunctionsWithAddress(uint64_t address) {
}
int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
SCOPE_profile_cpu_f("alloy");
*out_function = NULL;
Entry* entry;
Entry::Status status = entry_table_.GetOrCreate(address, &entry);
@ -192,6 +194,8 @@ int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
int Runtime::LookupFunctionInfo(
uint64_t address, FunctionInfo** out_symbol_info) {
SCOPE_profile_cpu_f("alloy");
*out_symbol_info = NULL;
// TODO(benvanik): fast reject invalid addresses/log errors.
@ -220,6 +224,8 @@ int Runtime::LookupFunctionInfo(
int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
FunctionInfo** out_symbol_info) {
SCOPE_profile_cpu_f("alloy");
// Atomic create/lookup symbol in module.
// If we get back the NEW flag we must declare it now.
FunctionInfo* symbol_info = NULL;
@ -241,6 +247,8 @@ int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
int Runtime::DemandFunction(
FunctionInfo* symbol_info, Function** out_function) {
SCOPE_profile_cpu_f("alloy");
*out_function = NULL;
// Lock function for generation. If it's already being generated

View File

@ -82,21 +82,26 @@ void AudioSystem::ThreadStart() {
if (result == WAIT_FAILED) {
DWORD err = GetLastError();
XEASSERTALWAYS();
break;
}
size_t pumped = 0;
if (result >= WAIT_OBJECT_0 && result <= WAIT_OBJECT_0 + (maximum_client_count_ - 1)) {
size_t index = result - WAIT_OBJECT_0;
do {
xe_mutex_lock(lock_);
uint32_t client_callback = clients_[index].callback;
uint32_t client_callback_arg = clients_[index].wrapped_callback_arg;
xe_mutex_unlock(lock_);
if (client_callback) {
processor->Execute(thread_state_, client_callback, client_callback_arg, 0);
}
pumped++;
index++;
} while (index < maximum_client_count_ && WaitForSingleObject(client_wait_handles_[index], 0) == WAIT_OBJECT_0);
{
SCOPE_profile_cpu_i("apu", "Pump");
if (result >= WAIT_OBJECT_0 && result <= WAIT_OBJECT_0 + (maximum_client_count_ - 1)) {
size_t index = result - WAIT_OBJECT_0;
do {
xe_mutex_lock(lock_);
uint32_t client_callback = clients_[index].callback;
uint32_t client_callback_arg = clients_[index].wrapped_callback_arg;
xe_mutex_unlock(lock_);
if (client_callback) {
processor->Execute(thread_state_, client_callback, client_callback_arg, 0);
}
pumped++;
index++;
} while (index < maximum_client_count_ && WaitForSingleObject(client_wait_handles_[index], 0) == WAIT_OBJECT_0);
}
}
if (!running_) {
@ -104,6 +109,7 @@ void AudioSystem::ThreadStart() {
}
if (!pumped) {
SCOPE_profile_cpu_i("apu", "Sleep");
Sleep(500);
}
}
@ -126,6 +132,8 @@ void AudioSystem::Shutdown() {
X_STATUS AudioSystem::RegisterClient(
uint32_t callback, uint32_t callback_arg, size_t* out_index) {
SCOPE_profile_cpu_f("apu");
XEASSERTTRUE(unused_clients_.size());
xe_mutex_lock(lock_);
@ -157,6 +165,8 @@ X_STATUS AudioSystem::RegisterClient(
}
void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) {
SCOPE_profile_cpu_f("apu");
xe_mutex_lock(lock_);
XEASSERTTRUE(index < maximum_client_count_);
XEASSERTTRUE(clients_[index].driver != NULL);
@ -166,6 +176,8 @@ void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) {
}
void AudioSystem::UnregisterClient(size_t index) {
SCOPE_profile_cpu_f("apu");
xe_mutex_lock(lock_);
XEASSERTTRUE(index < maximum_client_count_);
DestroyDriver(clients_[index].driver);

View File

@ -121,6 +121,8 @@ void XAudio2AudioDriver::Initialize() {
}
void XAudio2AudioDriver::SubmitFrame(uint32_t frame_ptr) {
SCOPE_profile_cpu_f("apu");
// Process samples! They are big-endian floats.
HRESULT hr;

View File

@ -147,6 +147,8 @@ void Processor::AddRegisterAccessCallbacks(
}
int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
SCOPE_profile_cpu_f("cpu");
// Attempt to get the function.
Function* fn;
if (runtime_->ResolveFunction(address, &fn)) {
@ -171,6 +173,8 @@ int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context();
context->r[3] = arg0;
if (Execute(thread_state, address)) {
@ -182,6 +186,8 @@ uint64_t Processor::Execute(
uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
uint64_t arg1) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context();
context->r[3] = arg0;
context->r[4] = arg1;

View File

@ -34,6 +34,8 @@ D3D11GeometryShader::~D3D11GeometryShader() {
}
int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
SCOPE_profile_cpu_f("gpu");
if (handle_) {
return 0;
}
@ -74,6 +76,8 @@ int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
}
ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): pick shared runtime mode defines.
D3D10_SHADER_MACRO defines[] = {
"TEST_DEFINE", "1",
@ -161,6 +165,7 @@ D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() {
int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
return 1;
}
@ -215,6 +220,7 @@ D3D11RectListGeometryShader::~D3D11RectListGeometryShader() {
int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
return 1;
}
@ -259,6 +265,7 @@ D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() {
int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
return 1;
}

View File

@ -190,6 +190,8 @@ void D3D11GraphicsDriver::SetShader(
}
int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
// Ignore copies.
@ -296,6 +298,8 @@ void D3D11GraphicsDriver::DrawIndexBuffer(
XE_GPU_PRIMITIVE_TYPE prim_type,
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
XETRACED3D("D3D11: draw indexed %d (%d indicies) from %.8X",
@ -321,6 +325,8 @@ void D3D11GraphicsDriver::DrawIndexBuffer(
void D3D11GraphicsDriver::DrawIndexAuto(
XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
XETRACED3D("D3D11: draw indexed %d (%d indicies)",
@ -346,6 +352,8 @@ int D3D11GraphicsDriver::RebuildRenderTargets(
return 0;
}
SCOPE_profile_cpu_f("gpu");
// Remove old versions.
for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
auto& cb = render_targets_.color_buffers[n];
@ -426,6 +434,8 @@ int D3D11GraphicsDriver::RebuildRenderTargets(
}
int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
SCOPE_profile_cpu_f("gpu");
// Most information comes from here:
// https://chromium.googlesource.com/chromiumos/third_party/mesa/+/6173cc19c45d92ef0b7bc6aa008aa89bb29abbda/src/gallium/drivers/freedreno/freedreno_zsa.c
// http://cgit.freedesktop.org/mesa/mesa/diff/?id=aac7f06ad843eaa696363e8e9c7781ca30cb4914
@ -768,6 +778,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
}
int D3D11GraphicsDriver::UpdateConstantBuffers() {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
D3D11_MAPPED_SUBRESOURCE res;
@ -799,6 +811,8 @@ int D3D11GraphicsDriver::UpdateConstantBuffers() {
}
int D3D11GraphicsDriver::BindShaders() {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
@ -892,6 +906,8 @@ int D3D11GraphicsDriver::BindShaders() {
}
int D3D11GraphicsDriver::PrepareFetchers() {
SCOPE_profile_cpu_f("gpu");
// Input assembly.
XEASSERTNOTNULL(state_.vertex_shader);
auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs();
@ -934,6 +950,8 @@ int D3D11GraphicsDriver::PrepareFetchers() {
}
int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r];
@ -1009,6 +1027,8 @@ int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
}
int D3D11GraphicsDriver::PrepareTextureFetchers() {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
for (int n = 0; n < XECOUNT(state_.texture_fetchers); n++) {
@ -1275,6 +1295,8 @@ int D3D11GraphicsDriver::FetchTexture1D(
xe_gpu_texture_fetch_t& fetch,
TextureInfo& info,
ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
uint32_t address = (fetch.address << 12) + address_translation_;
uint32_t width = 1 + fetch.size_1d.width;
@ -1299,6 +1321,8 @@ int D3D11GraphicsDriver::FetchTexture1D(
}
XEFORCEINLINE void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, XE_GPU_ENDIAN endianness) {
SCOPE_profile_cpu_f("gpu");
switch (endianness) {
case XE_GPU_ENDIAN_8IN16:
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
@ -1344,6 +1368,8 @@ int D3D11GraphicsDriver::FetchTexture2D(
xe_gpu_texture_fetch_t& fetch,
TextureInfo& info,
ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
XEASSERTTRUE(fetch.dimension == 1);
uint32_t address = (fetch.address << 12) + address_translation_;
@ -1448,6 +1474,8 @@ int D3D11GraphicsDriver::FetchTexture3D(
xe_gpu_texture_fetch_t& fetch,
TextureInfo& info,
ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
XELOGE("D3D11: FetchTexture2D not yet implemented");
XEASSERTALWAYS();
return 1;
@ -1470,6 +1498,8 @@ int D3D11GraphicsDriver::FetchTextureCube(
xe_gpu_texture_fetch_t& fetch,
TextureInfo& info,
ID3D11Resource** out_texture) {
SCOPE_profile_cpu_f("gpu");
XELOGE("D3D11: FetchTextureCube not yet implemented");
XEASSERTALWAYS();
return 1;
@ -1477,6 +1507,7 @@ int D3D11GraphicsDriver::FetchTextureCube(
int D3D11GraphicsDriver::PrepareTextureSampler(
xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) {
SCOPE_profile_cpu_f("gpu");
auto& fetcher = state_.texture_fetchers[desc.fetch_slot];
auto& info = fetcher.info;
@ -1588,6 +1619,8 @@ int D3D11GraphicsDriver::PrepareTextureSampler(
int D3D11GraphicsDriver::PrepareIndexBuffer(
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness) {
SCOPE_profile_cpu_f("gpu");
RegisterFile& rf = register_file_;
uint32_t address = index_base + address_translation_;
@ -1634,6 +1667,8 @@ int D3D11GraphicsDriver::PrepareIndexBuffer(
}
int D3D11GraphicsDriver::Resolve() {
SCOPE_profile_cpu_f("gpu");
// No clue how this is supposed to work yet.
ID3D11Texture2D* back_buffer = 0;
swap_chain_->GetBuffer(0, __uuidof(ID3D11Texture2D),

View File

@ -29,6 +29,7 @@ void __stdcall D3D11GraphicsSystemVsyncCallback(
thread_name_set = true;
Profiler::ThreadEnter("VsyncTimer");
}
SCOPE_profile_cpu_f("gpu");
gs->MarkVblank();
gs->DispatchInterruptCallback(0);
@ -151,6 +152,8 @@ void D3D11GraphicsSystem::Initialize() {
}
void D3D11GraphicsSystem::Pump() {
SCOPE_profile_cpu_f("gpu");
if (swap_pending_) {
swap_pending_ = false;

View File

@ -145,6 +145,8 @@ void D3D11Shader::set_translated_src(char* value) {
}
ID3D10Blob* D3D11Shader::Compile(const char* shader_source) {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): pick shared runtime mode defines.
D3D10_SHADER_MACRO defines[] = {
"TEST_DEFINE", "1",
@ -256,6 +258,7 @@ D3D11VertexShader::~D3D11VertexShader() {
}
int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
SCOPE_profile_cpu_f("gpu");
if (handle_) {
return 0;
}
@ -411,6 +414,8 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
}
const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) {
SCOPE_profile_cpu_f("gpu");
Output* output = new Output();
xe_gpu_translate_ctx_t ctx;
ctx.output = output;
@ -599,6 +604,7 @@ D3D11PixelShader::~D3D11PixelShader() {
int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
D3D11VertexShader* input_shader) {
SCOPE_profile_cpu_f("gpu");
if (handle_) {
return 0;
}
@ -641,6 +647,7 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
const char* D3D11PixelShader::Translate(
xe_gpu_program_cntl_t* program_cntl, D3D11VertexShader* input_shader) {
SCOPE_profile_cpu_f("gpu");
Output* output = new Output();
xe_gpu_translate_ctx_t ctx;
ctx.output = output;

View File

@ -31,6 +31,7 @@ Shader* D3D11ShaderCache::CreateCore(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash) {
SCOPE_profile_cpu_f("gpu");
switch (type) {
case XE_GPU_SHADER_TYPE_VERTEX:
return new D3D11VertexShader(

View File

@ -114,6 +114,8 @@ int D3D11Window::Initialize(const char* title, uint32_t width, uint32_t height)
}
void D3D11Window::Swap() {
SCOPE_profile_cpu_f("gpu");
// Present profiler.
context_->OMSetRenderTargets(1, &render_target_view_, NULL);
Profiler::Present();

View File

@ -125,6 +125,8 @@ void RingBufferWorker::Pump() {
void RingBufferWorker::ExecutePrimaryBuffer(
uint32_t start_index, uint32_t end_index) {
SCOPE_profile_cpu_f("gpu");
// Adjust pointer base.
uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);

View File

@ -55,6 +55,8 @@ Shader* ShaderCache::Find(
Shader* ShaderCache::FindOrCreate(
XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length) {
SCOPE_profile_cpu_f("gpu");
uint64_t hash = Hash(src_ptr, length);
unordered_map<uint64_t, Shader*>::iterator it = map_.find(hash);
if (it != map_.end()) {

View File

@ -42,6 +42,8 @@ void InputSystem::AddDriver(InputDriver* driver) {
X_RESULT InputSystem::GetCapabilities(
uint32_t user_index, uint32_t flags, X_INPUT_CAPABILITIES& out_caps) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it;
if (XSUCCEEDED(driver->GetCapabilities(user_index, flags, out_caps))) {
@ -52,6 +54,8 @@ X_RESULT InputSystem::GetCapabilities(
}
X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it;
if (driver->GetState(user_index, out_state) == X_ERROR_SUCCESS) {
@ -63,6 +67,8 @@ X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) {
X_RESULT InputSystem::SetState(
uint32_t user_index, X_INPUT_VIBRATION& vibration) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it;
if (XSUCCEEDED(driver->SetState(user_index, vibration))) {
@ -74,6 +80,8 @@ X_RESULT InputSystem::SetState(
X_RESULT InputSystem::GetKeystroke(
uint32_t user_index, uint32_t flags, X_INPUT_KEYSTROKE& out_keystroke) {
SCOPE_profile_cpu_f("hid");
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
InputDriver* driver = *it;
if (XSUCCEEDED(driver->GetKeystroke(user_index, flags, out_keystroke))) {