Adding a bunch of profiling tracers.
This commit is contained in:
parent
beb9bd11f0
commit
c1812406f5
|
@ -66,6 +66,8 @@ int X64Assembler::Assemble(
|
|||
FunctionInfo* symbol_info, HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, DebugInfo* debug_info,
|
||||
Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
int result = 0;
|
||||
|
||||
// Lower HIR -> x64.
|
||||
|
|
|
@ -75,6 +75,8 @@ int X64CodeCache::Initialize() {
|
|||
|
||||
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
|
||||
size_t stack_size) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Add unwind info into the allocation size. Keep things 16b aligned.
|
||||
code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);
|
||||
|
||||
|
|
|
@ -77,6 +77,8 @@ int X64Emitter::Emit(
|
|||
HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, runtime::DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Reset.
|
||||
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
|
||||
source_map_count_ = 0;
|
||||
|
|
|
@ -49,6 +49,8 @@ void Compiler::Reset() {
|
|||
}
|
||||
|
||||
int Compiler::Compile(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// TODO(benvanik): sophisticated stuff. Run passes in parallel, run until they
|
||||
// stop changing things, etc.
|
||||
for (auto it = passes_.begin(); it != passes_.end(); ++it) {
|
||||
|
|
|
@ -23,6 +23,8 @@ ConstantPropagationPass::~ConstantPropagationPass() {
|
|||
}
|
||||
|
||||
int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Once ContextPromotion has run there will likely be a whole slew of
|
||||
// constants that can be pushed through the function.
|
||||
// Example:
|
||||
|
|
|
@ -51,6 +51,8 @@ int ContextPromotionPass::Initialize(Compiler* compiler) {
|
|||
}
|
||||
|
||||
int ContextPromotionPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Like mem2reg, but because context memory is unaliasable it's easier to
|
||||
// check and convert LoadContext/StoreContext into value operations.
|
||||
// Example of load->value promotion:
|
||||
|
|
|
@ -30,6 +30,8 @@ ControlFlowAnalysisPass::~ControlFlowAnalysisPass() {
|
|||
}
|
||||
|
||||
int ControlFlowAnalysisPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// TODO(benvanik): reset edges for all blocks? Needed to be re-runnable.
|
||||
|
||||
// Add edges.
|
||||
|
|
|
@ -36,6 +36,8 @@ DataFlowAnalysisPass::~DataFlowAnalysisPass() {
|
|||
}
|
||||
|
||||
int DataFlowAnalysisPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Linearize blocks so that we can detect cycles and propagate dependencies.
|
||||
uint32_t block_count = LinearizeBlocks(builder);
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ DeadCodeEliminationPass::~DeadCodeEliminationPass() {
|
|||
}
|
||||
|
||||
int DeadCodeEliminationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// ContextPromotion/DSE will likely leave around a lot of dead statements.
|
||||
// Code generated for comparison/testing produces many unused statements and
|
||||
// with proper use analysis it should be possible to remove most of them:
|
||||
|
|
|
@ -30,6 +30,8 @@ FinalizationPass::~FinalizationPass() {
|
|||
}
|
||||
|
||||
int FinalizationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Process the HIR and prepare it for lowering.
|
||||
// After this is done the HIR should be ready for emitting.
|
||||
|
||||
|
|
|
@ -59,6 +59,8 @@ RegisterAllocationPass::~RegisterAllocationPass() {
|
|||
}
|
||||
|
||||
int RegisterAllocationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Simple per-block allocator that operates on SSA form.
|
||||
// Registers do not move across blocks, though this could be
|
||||
// optimized with some intra-block analysis (dominators/etc).
|
||||
|
|
|
@ -23,6 +23,8 @@ SimplificationPass::~SimplificationPass() {
|
|||
}
|
||||
|
||||
int SimplificationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
EliminateConversions(builder);
|
||||
SimplifyAssignments(builder);
|
||||
return 0;
|
||||
|
|
|
@ -30,6 +30,8 @@ ValidationPass::~ValidationPass() {
|
|||
}
|
||||
|
||||
int ValidationPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
StringBuffer str;
|
||||
builder->Dump(&str);
|
||||
printf(str.GetString());
|
||||
|
|
|
@ -53,6 +53,8 @@ void ValueReductionPass::ComputeLastUse(Value* value) {
|
|||
}
|
||||
|
||||
int ValueReductionPass::Run(HIRBuilder* builder) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Walk each block and reuse variable ordinals as much as possible.
|
||||
|
||||
llvm::BitVector ordinals(builder->max_value_ordinal());
|
||||
|
|
|
@ -44,6 +44,8 @@ void PPCHIRBuilder::Reset() {
|
|||
}
|
||||
|
||||
int PPCHIRBuilder::Emit(FunctionInfo* symbol_info, bool with_debug_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
Memory* memory = frontend_->memory();
|
||||
const uint8_t* p = memory->membase();
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ bool PPCScanner::IsRestGprLr(uint64_t address) {
|
|||
}
|
||||
|
||||
int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// This is a simple basic block analyizer. It walks the start address to the
|
||||
// end address looking for branches. Each span of instructions between
|
||||
// branches is considered a basic block. When the last blr (that has no
|
||||
|
@ -286,6 +288,8 @@ int PPCScanner::FindExtents(FunctionInfo* symbol_info) {
|
|||
}
|
||||
|
||||
std::vector<BlockInfo> PPCScanner::FindBlocks(FunctionInfo* symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
Memory* memory = frontend_->memory();
|
||||
const uint8_t* p = memory->membase();
|
||||
|
||||
|
|
|
@ -86,6 +86,8 @@ int PPCTranslator::Translate(
|
|||
FunctionInfo* symbol_info,
|
||||
uint32_t debug_info_flags,
|
||||
Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Scan the function to find its extents. We only need to do this if we
|
||||
// haven't already been provided with them from some other source.
|
||||
if (!symbol_info->has_end_address()) {
|
||||
|
|
|
@ -51,6 +51,8 @@ void HIRBuilder::Reset() {
|
|||
}
|
||||
|
||||
int HIRBuilder::Finalize() {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Scan blocks in order and add fallthrough branches. These are needed for
|
||||
// analysis passes to work. We may have also added blocks out of order and
|
||||
// need to ensure they fall through in the right order.
|
||||
|
@ -141,6 +143,8 @@ void HIRBuilder::DumpOp(
|
|||
}
|
||||
|
||||
void HIRBuilder::Dump(StringBuffer* str) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
if (attributes_) {
|
||||
str->Append("; attributes = %.8X\n", attributes_);
|
||||
}
|
||||
|
|
|
@ -75,6 +75,8 @@ Entry::Status EntryTable::GetOrCreate(uint64_t address, Entry** out_entry) {
|
|||
}
|
||||
|
||||
std::vector<Function*> EntryTable::FindWithAddress(uint64_t address) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
std::vector<Function*> fns;
|
||||
LockMutex(lock_);
|
||||
for (auto it = map_.begin(); it != map_.end(); ++it) {
|
||||
|
|
|
@ -74,6 +74,8 @@ Breakpoint* Function::FindBreakpoint(uint64_t address) {
|
|||
}
|
||||
|
||||
int Function::Call(ThreadState* thread_state, uint64_t return_address) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
ThreadState* original_thread_state = ThreadState::Get();
|
||||
if (original_thread_state != thread_state) {
|
||||
ThreadState::Bind(thread_state);
|
||||
|
|
|
@ -161,6 +161,8 @@ SymbolInfo::Status Module::DefineVariable(VariableInfo* symbol_info) {
|
|||
}
|
||||
|
||||
void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
LockMutex(lock_);
|
||||
for (auto it = list_.begin(); it != list_.end(); ++it) {
|
||||
SymbolInfo* symbol_info = *it;
|
||||
|
@ -174,6 +176,8 @@ void Module::ForEachFunction(std::function<void (FunctionInfo*)> callback) {
|
|||
|
||||
void Module::ForEachFunction(size_t since, size_t& version,
|
||||
std::function<void (FunctionInfo*)> callback) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
LockMutex(lock_);
|
||||
size_t count = list_.size();
|
||||
version = count;
|
||||
|
|
|
@ -159,6 +159,8 @@ std::vector<Function*> Runtime::FindFunctionsWithAddress(uint64_t address) {
|
|||
}
|
||||
|
||||
int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
*out_function = NULL;
|
||||
Entry* entry;
|
||||
Entry::Status status = entry_table_.GetOrCreate(address, &entry);
|
||||
|
@ -192,6 +194,8 @@ int Runtime::ResolveFunction(uint64_t address, Function** out_function) {
|
|||
|
||||
int Runtime::LookupFunctionInfo(
|
||||
uint64_t address, FunctionInfo** out_symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
*out_symbol_info = NULL;
|
||||
|
||||
// TODO(benvanik): fast reject invalid addresses/log errors.
|
||||
|
@ -220,6 +224,8 @@ int Runtime::LookupFunctionInfo(
|
|||
|
||||
int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
|
||||
FunctionInfo** out_symbol_info) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
// Atomic create/lookup symbol in module.
|
||||
// If we get back the NEW flag we must declare it now.
|
||||
FunctionInfo* symbol_info = NULL;
|
||||
|
@ -241,6 +247,8 @@ int Runtime::LookupFunctionInfo(Module* module, uint64_t address,
|
|||
|
||||
int Runtime::DemandFunction(
|
||||
FunctionInfo* symbol_info, Function** out_function) {
|
||||
SCOPE_profile_cpu_f("alloy");
|
||||
|
||||
*out_function = NULL;
|
||||
|
||||
// Lock function for generation. If it's already being generated
|
||||
|
|
|
@ -82,8 +82,12 @@ void AudioSystem::ThreadStart() {
|
|||
if (result == WAIT_FAILED) {
|
||||
DWORD err = GetLastError();
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
|
||||
size_t pumped = 0;
|
||||
{
|
||||
SCOPE_profile_cpu_i("apu", "Pump");
|
||||
if (result >= WAIT_OBJECT_0 && result <= WAIT_OBJECT_0 + (maximum_client_count_ - 1)) {
|
||||
size_t index = result - WAIT_OBJECT_0;
|
||||
do {
|
||||
|
@ -98,12 +102,14 @@ void AudioSystem::ThreadStart() {
|
|||
index++;
|
||||
} while (index < maximum_client_count_ && WaitForSingleObject(client_wait_handles_[index], 0) == WAIT_OBJECT_0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!running_) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pumped) {
|
||||
SCOPE_profile_cpu_i("apu", "Sleep");
|
||||
Sleep(500);
|
||||
}
|
||||
}
|
||||
|
@ -126,6 +132,8 @@ void AudioSystem::Shutdown() {
|
|||
|
||||
X_STATUS AudioSystem::RegisterClient(
|
||||
uint32_t callback, uint32_t callback_arg, size_t* out_index) {
|
||||
SCOPE_profile_cpu_f("apu");
|
||||
|
||||
XEASSERTTRUE(unused_clients_.size());
|
||||
xe_mutex_lock(lock_);
|
||||
|
||||
|
@ -157,6 +165,8 @@ X_STATUS AudioSystem::RegisterClient(
|
|||
}
|
||||
|
||||
void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) {
|
||||
SCOPE_profile_cpu_f("apu");
|
||||
|
||||
xe_mutex_lock(lock_);
|
||||
XEASSERTTRUE(index < maximum_client_count_);
|
||||
XEASSERTTRUE(clients_[index].driver != NULL);
|
||||
|
@ -166,6 +176,8 @@ void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) {
|
|||
}
|
||||
|
||||
void AudioSystem::UnregisterClient(size_t index) {
|
||||
SCOPE_profile_cpu_f("apu");
|
||||
|
||||
xe_mutex_lock(lock_);
|
||||
XEASSERTTRUE(index < maximum_client_count_);
|
||||
DestroyDriver(clients_[index].driver);
|
||||
|
|
|
@ -121,6 +121,8 @@ void XAudio2AudioDriver::Initialize() {
|
|||
}
|
||||
|
||||
void XAudio2AudioDriver::SubmitFrame(uint32_t frame_ptr) {
|
||||
SCOPE_profile_cpu_f("apu");
|
||||
|
||||
// Process samples! They are big-endian floats.
|
||||
HRESULT hr;
|
||||
|
||||
|
|
|
@ -147,6 +147,8 @@ void Processor::AddRegisterAccessCallbacks(
|
|||
}
|
||||
|
||||
int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
||||
// Attempt to get the function.
|
||||
Function* fn;
|
||||
if (runtime_->ResolveFunction(address, &fn)) {
|
||||
|
@ -171,6 +173,8 @@ int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
|
|||
|
||||
uint64_t Processor::Execute(
|
||||
XenonThreadState* thread_state, uint64_t address, uint64_t arg0) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
||||
PPCContext* context = thread_state->context();
|
||||
context->r[3] = arg0;
|
||||
if (Execute(thread_state, address)) {
|
||||
|
@ -182,6 +186,8 @@ uint64_t Processor::Execute(
|
|||
uint64_t Processor::Execute(
|
||||
XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
|
||||
uint64_t arg1) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
||||
PPCContext* context = thread_state->context();
|
||||
context->r[3] = arg0;
|
||||
context->r[4] = arg1;
|
||||
|
|
|
@ -34,6 +34,8 @@ D3D11GeometryShader::~D3D11GeometryShader() {
|
|||
}
|
||||
|
||||
int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
if (handle_) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -74,6 +76,8 @@ int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
|
|||
}
|
||||
|
||||
ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// TODO(benvanik): pick shared runtime mode defines.
|
||||
D3D10_SHADER_MACRO defines[] = {
|
||||
"TEST_DEFINE", "1",
|
||||
|
@ -161,6 +165,7 @@ D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() {
|
|||
|
||||
int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -215,6 +220,7 @@ D3D11RectListGeometryShader::~D3D11RectListGeometryShader() {
|
|||
|
||||
int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -259,6 +265,7 @@ D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() {
|
|||
|
||||
int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -190,6 +190,8 @@ void D3D11GraphicsDriver::SetShader(
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
|
||||
// Ignore copies.
|
||||
|
@ -296,6 +298,8 @@ void D3D11GraphicsDriver::DrawIndexBuffer(
|
|||
XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
|
||||
XETRACED3D("D3D11: draw indexed %d (%d indicies) from %.8X",
|
||||
|
@ -321,6 +325,8 @@ void D3D11GraphicsDriver::DrawIndexBuffer(
|
|||
void D3D11GraphicsDriver::DrawIndexAuto(
|
||||
XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
uint32_t index_count) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
|
||||
XETRACED3D("D3D11: draw indexed %d (%d indicies)",
|
||||
|
@ -346,6 +352,8 @@ int D3D11GraphicsDriver::RebuildRenderTargets(
|
|||
return 0;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Remove old versions.
|
||||
for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
|
||||
auto& cb = render_targets_.color_buffers[n];
|
||||
|
@ -426,6 +434,8 @@ int D3D11GraphicsDriver::RebuildRenderTargets(
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Most information comes from here:
|
||||
// https://chromium.googlesource.com/chromiumos/third_party/mesa/+/6173cc19c45d92ef0b7bc6aa008aa89bb29abbda/src/gallium/drivers/freedreno/freedreno_zsa.c
|
||||
// http://cgit.freedesktop.org/mesa/mesa/diff/?id=aac7f06ad843eaa696363e8e9c7781ca30cb4914
|
||||
|
@ -768,6 +778,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::UpdateConstantBuffers() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
|
@ -799,6 +811,8 @@ int D3D11GraphicsDriver::UpdateConstantBuffers() {
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::BindShaders() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
xe_gpu_program_cntl_t program_cntl;
|
||||
program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||
|
@ -892,6 +906,8 @@ int D3D11GraphicsDriver::BindShaders() {
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::PrepareFetchers() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Input assembly.
|
||||
XEASSERTNOTNULL(state_.vertex_shader);
|
||||
auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs();
|
||||
|
@ -934,6 +950,8 @@ int D3D11GraphicsDriver::PrepareFetchers() {
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
|
||||
xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r];
|
||||
|
@ -1009,6 +1027,8 @@ int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::PrepareTextureFetchers() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
|
||||
for (int n = 0; n < XECOUNT(state_.texture_fetchers); n++) {
|
||||
|
@ -1275,6 +1295,8 @@ int D3D11GraphicsDriver::FetchTexture1D(
|
|||
xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo& info,
|
||||
ID3D11Resource** out_texture) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
uint32_t address = (fetch.address << 12) + address_translation_;
|
||||
|
||||
uint32_t width = 1 + fetch.size_1d.width;
|
||||
|
@ -1299,6 +1321,8 @@ int D3D11GraphicsDriver::FetchTexture1D(
|
|||
}
|
||||
|
||||
XEFORCEINLINE void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, XE_GPU_ENDIAN endianness) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
switch (endianness) {
|
||||
case XE_GPU_ENDIAN_8IN16:
|
||||
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
|
||||
|
@ -1344,6 +1368,8 @@ int D3D11GraphicsDriver::FetchTexture2D(
|
|||
xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo& info,
|
||||
ID3D11Resource** out_texture) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XEASSERTTRUE(fetch.dimension == 1);
|
||||
|
||||
uint32_t address = (fetch.address << 12) + address_translation_;
|
||||
|
@ -1448,6 +1474,8 @@ int D3D11GraphicsDriver::FetchTexture3D(
|
|||
xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo& info,
|
||||
ID3D11Resource** out_texture) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XELOGE("D3D11: FetchTexture2D not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
|
@ -1470,6 +1498,8 @@ int D3D11GraphicsDriver::FetchTextureCube(
|
|||
xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo& info,
|
||||
ID3D11Resource** out_texture) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XELOGE("D3D11: FetchTextureCube not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
|
@ -1477,6 +1507,7 @@ int D3D11GraphicsDriver::FetchTextureCube(
|
|||
|
||||
int D3D11GraphicsDriver::PrepareTextureSampler(
|
||||
xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
auto& fetcher = state_.texture_fetchers[desc.fetch_slot];
|
||||
auto& info = fetcher.info;
|
||||
|
@ -1588,6 +1619,8 @@ int D3D11GraphicsDriver::PrepareTextureSampler(
|
|||
int D3D11GraphicsDriver::PrepareIndexBuffer(
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
RegisterFile& rf = register_file_;
|
||||
|
||||
uint32_t address = index_base + address_translation_;
|
||||
|
@ -1634,6 +1667,8 @@ int D3D11GraphicsDriver::PrepareIndexBuffer(
|
|||
}
|
||||
|
||||
int D3D11GraphicsDriver::Resolve() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// No clue how this is supposed to work yet.
|
||||
ID3D11Texture2D* back_buffer = 0;
|
||||
swap_chain_->GetBuffer(0, __uuidof(ID3D11Texture2D),
|
||||
|
|
|
@ -29,6 +29,7 @@ void __stdcall D3D11GraphicsSystemVsyncCallback(
|
|||
thread_name_set = true;
|
||||
Profiler::ThreadEnter("VsyncTimer");
|
||||
}
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
gs->MarkVblank();
|
||||
gs->DispatchInterruptCallback(0);
|
||||
|
@ -151,6 +152,8 @@ void D3D11GraphicsSystem::Initialize() {
|
|||
}
|
||||
|
||||
void D3D11GraphicsSystem::Pump() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
if (swap_pending_) {
|
||||
swap_pending_ = false;
|
||||
|
||||
|
|
|
@ -145,6 +145,8 @@ void D3D11Shader::set_translated_src(char* value) {
|
|||
}
|
||||
|
||||
ID3D10Blob* D3D11Shader::Compile(const char* shader_source) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// TODO(benvanik): pick shared runtime mode defines.
|
||||
D3D10_SHADER_MACRO defines[] = {
|
||||
"TEST_DEFINE", "1",
|
||||
|
@ -256,6 +258,7 @@ D3D11VertexShader::~D3D11VertexShader() {
|
|||
}
|
||||
|
||||
int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (handle_) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -411,6 +414,8 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
|
|||
}
|
||||
|
||||
const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
Output* output = new Output();
|
||||
xe_gpu_translate_ctx_t ctx;
|
||||
ctx.output = output;
|
||||
|
@ -599,6 +604,7 @@ D3D11PixelShader::~D3D11PixelShader() {
|
|||
|
||||
int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
|
||||
D3D11VertexShader* input_shader) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (handle_) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -641,6 +647,7 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
|
|||
|
||||
const char* D3D11PixelShader::Translate(
|
||||
xe_gpu_program_cntl_t* program_cntl, D3D11VertexShader* input_shader) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
Output* output = new Output();
|
||||
xe_gpu_translate_ctx_t ctx;
|
||||
ctx.output = output;
|
||||
|
|
|
@ -31,6 +31,7 @@ Shader* D3D11ShaderCache::CreateCore(
|
|||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
switch (type) {
|
||||
case XE_GPU_SHADER_TYPE_VERTEX:
|
||||
return new D3D11VertexShader(
|
||||
|
|
|
@ -114,6 +114,8 @@ int D3D11Window::Initialize(const char* title, uint32_t width, uint32_t height)
|
|||
}
|
||||
|
||||
void D3D11Window::Swap() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Present profiler.
|
||||
context_->OMSetRenderTargets(1, &render_target_view_, NULL);
|
||||
Profiler::Present();
|
||||
|
|
|
@ -125,6 +125,8 @@ void RingBufferWorker::Pump() {
|
|||
|
||||
void RingBufferWorker::ExecutePrimaryBuffer(
|
||||
uint32_t start_index, uint32_t end_index) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Adjust pointer base.
|
||||
uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
|
||||
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
|
||||
|
|
|
@ -55,6 +55,8 @@ Shader* ShaderCache::Find(
|
|||
Shader* ShaderCache::FindOrCreate(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
uint64_t hash = Hash(src_ptr, length);
|
||||
unordered_map<uint64_t, Shader*>::iterator it = map_.find(hash);
|
||||
if (it != map_.end()) {
|
||||
|
|
|
@ -42,6 +42,8 @@ void InputSystem::AddDriver(InputDriver* driver) {
|
|||
|
||||
X_RESULT InputSystem::GetCapabilities(
|
||||
uint32_t user_index, uint32_t flags, X_INPUT_CAPABILITIES& out_caps) {
|
||||
SCOPE_profile_cpu_f("hid");
|
||||
|
||||
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
|
||||
InputDriver* driver = *it;
|
||||
if (XSUCCEEDED(driver->GetCapabilities(user_index, flags, out_caps))) {
|
||||
|
@ -52,6 +54,8 @@ X_RESULT InputSystem::GetCapabilities(
|
|||
}
|
||||
|
||||
X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) {
|
||||
SCOPE_profile_cpu_f("hid");
|
||||
|
||||
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
|
||||
InputDriver* driver = *it;
|
||||
if (driver->GetState(user_index, out_state) == X_ERROR_SUCCESS) {
|
||||
|
@ -63,6 +67,8 @@ X_RESULT InputSystem::GetState(uint32_t user_index, X_INPUT_STATE& out_state) {
|
|||
|
||||
X_RESULT InputSystem::SetState(
|
||||
uint32_t user_index, X_INPUT_VIBRATION& vibration) {
|
||||
SCOPE_profile_cpu_f("hid");
|
||||
|
||||
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
|
||||
InputDriver* driver = *it;
|
||||
if (XSUCCEEDED(driver->SetState(user_index, vibration))) {
|
||||
|
@ -74,6 +80,8 @@ X_RESULT InputSystem::SetState(
|
|||
|
||||
X_RESULT InputSystem::GetKeystroke(
|
||||
uint32_t user_index, uint32_t flags, X_INPUT_KEYSTROKE& out_keystroke) {
|
||||
SCOPE_profile_cpu_f("hid");
|
||||
|
||||
for (auto it = drivers_.begin(); it != drivers_.end(); ++it) {
|
||||
InputDriver* driver = *it;
|
||||
if (XSUCCEEDED(driver->GetKeystroke(user_index, flags, out_keystroke))) {
|
||||
|
|
Loading…
Reference in New Issue