From 541832e4f00f3f30e3aba9a17a1efdd9ceace418 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Wed, 8 Apr 2009 17:58:58 +0000 Subject: [PATCH] DSP: Fix sln file now that old LLE is dead. Add preliminary DSPAnalyzer, which looks for idle skip opportunities. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2930 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Dolphin.sln | 22 ---- .../Plugin_DSP_LLE_Test.vcproj | 8 ++ .../Src/DSPAnalyzer.cpp | 104 ++++++++++++++++++ .../Plugin_DSP_LLE-testing/Src/DSPAnalyzer.h | 45 ++++++++ .../Src/DSPInterpreter.cpp | 12 +- .../Src/disassemble.cpp | 38 +++---- .../Src/gdsp_interface.cpp | 2 + .../Src/gdsp_interpreter.cpp | 7 +- Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp | 9 -- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 29 ++--- 10 files changed, 209 insertions(+), 67 deletions(-) create mode 100644 Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.cpp create mode 100644 Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.h diff --git a/Source/Dolphin.sln b/Source/Dolphin.sln index 7275ebe67b..90f1ceea55 100644 --- a/Source/Dolphin.sln +++ b/Source/Dolphin.sln @@ -31,12 +31,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Bochs_disasm", "..\External EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zlib", "..\Externals\zlib\zlib.vcproj", "{3E03C179-8251-46E4-81F4-466F114BAC63}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugin_DSP_LLE", "Plugins\Plugin_DSP_LLE\Plugin_DSP_LLE.vcproj", "{C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}" - ProjectSection(ProjectDependencies) = postProject - {FBAFB369-07EB-4460-9CAD-08BE5789DAB6} = {FBAFB369-07EB-4460-9CAD-08BE5789DAB6} - {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} - EndProjectSection -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DiscIO", "Core\DiscIO\DiscIO.vcproj", "{B7F1A9FB-BEA8-416E-9460-AE35A6A5165C}" ProjectSection(ProjectDependencies) = postProject {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} @@ -253,22 +247,6 @@ Global {3E03C179-8251-46E4-81F4-466F114BAC63}.Release|Win32.Build.0 = Release|Win32 {3E03C179-8251-46E4-81F4-466F114BAC63}.Release|x64.ActiveCfg = Release|x64 {3E03C179-8251-46E4-81F4-466F114BAC63}.Release|x64.Build.0 = Release|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Debug|Win32.ActiveCfg = Debug|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Debug|Win32.Build.0 = Debug|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Debug|x64.ActiveCfg = Debug|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Debug|x64.Build.0 = Debug|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.DebugFast|Win32.ActiveCfg = DebugFast|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.DebugFast|Win32.Build.0 = DebugFast|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.DebugFast|x64.ActiveCfg = DebugFast|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.DebugFast|x64.Build.0 = DebugFast|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release_JITIL|Win32.ActiveCfg = Release|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release_JITIL|Win32.Build.0 = Release|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release_JITIL|x64.ActiveCfg = Release|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release_JITIL|x64.Build.0 = Release|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release|Win32.ActiveCfg = Release|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release|Win32.Build.0 = Release|Win32 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release|x64.ActiveCfg = Release|x64 - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8}.Release|x64.Build.0 = Release|x64 {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C}.Debug|Win32.ActiveCfg = Debug|Win32 {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C}.Debug|Win32.Build.0 = Debug|Win32 {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C}.Debug|x64.ActiveCfg = Debug|x64 diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Plugin_DSP_LLE_Test.vcproj b/Source/Plugins/Plugin_DSP_LLE-testing/Plugin_DSP_LLE_Test.vcproj index 9557f4119a..084ed36c3d 100644 --- a/Source/Plugins/Plugin_DSP_LLE-testing/Plugin_DSP_LLE_Test.vcproj +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Plugin_DSP_LLE_Test.vcproj @@ -598,6 +598,14 @@ + + + + diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.cpp new file mode 100644 index 0000000000..f95d8e89dc --- /dev/null +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.cpp @@ -0,0 +1,104 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "DSPAnalyzer.h" +#include "DSPInterpreter.h" +#include "DSPTables.h" +#include "gdsp_memory.h" + +namespace DSPAnalyzer { + +// Holds data about all instructions in RAM. +u8 inst_flags[ISPACE]; + +// Good candidates for idle skipping is mail wait loops. If we're time slicing +// between the main CPU and the DSP, if the DSP runs into one of these, it might +// as well give up its time slice immediately. + +// Max signature length is 6. A 0 in a signature is ignored. +#define NUM_IDLE_SIGS 2 +#define MAX_IDLE_SIG_SIZE 6 +const u16 idle_skip_sigs[NUM_IDLE_SIGS][MAX_IDLE_SIG_SIZE + 1] = +{ + { 0x27fc, // LRS $31, @DMBH + 0x03c0, 0x8000, // ANDCF $31, #0x8000 + 0x029d, 0x027a, // JLZ 0x027a + 0x02df, 0 }, // RET + { 0x27fe, // LRS $31, @CMBH + 0x03c0, 0x8000, // ANDCF $31, #0x8000 + 0x029c, 0x0280, // JLNZ 0x0280 + 0x02df, 0 }, // RET +}; + +void Reset() +{ + memset(inst_flags, 0, sizeof(inst_flags)); +} + +void AnalyzeRange(int start_addr, int end_addr) +{ + // First we run an extremely simplified version of a disassembler to find + // where all instructions start. + + // This may not be 100% accurate in case of jump tables, but should be good + // enough as a start. + int addr = start_addr; + while (addr < end_addr) + { + UDSPInstruction inst = dsp_imem_read(addr); + const DSPOPCTemplate *opcode = GetOpTemplate(inst); + if (!opcode) + { + addr++; + continue; + } + inst_flags[addr] |= CODE_START_OF_INST; + addr += opcode->size; + } + + // Next, we'll scan for potential idle skips. + for (int s = 0; s < NUM_IDLE_SIGS; s++) + { + for (int addr = start_addr; addr < end_addr; addr++) + { + bool found = false; + for (int i = 0; i < MAX_IDLE_SIG_SIZE + 1; i++) + { + if (idle_skip_sigs[s][i] == 0) + found = true; + if (idle_skip_sigs[s][i] != dsp_imem_read(addr + i)) + break; + } + if (found) + { + NOTICE_LOG(DSPLLE, "Idle skip location found at %02x", addr); + inst_flags[addr] |= CODE_IDLE_SKIP; + // TODO: actually use this flag somewhere. + } + } + } + NOTICE_LOG(DSPLLE, "Finished analysis."); +} + +void Analyze() +{ + Reset(); + AnalyzeRange(0x0000, 0x1000); // IRAM + AnalyzeRange(0x8000, 0x9000); // IROM +} + +} // namespace diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.h b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.h new file mode 100644 index 0000000000..c7e4c7dfa9 --- /dev/null +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPAnalyzer.h @@ -0,0 +1,45 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +// Basic code analysis. + +#include "DSPInterpreter.h" + +namespace DSPAnalyzer { + +#define ISPACE 65536 + +enum +{ + CODE_START_OF_INST = 1, + CODE_IDLE_SKIP = 2, +}; + +// Easy to query array covering the whole of instruction memory. +// Just index by address. +// This one will be helpful for debuggers and jits. +extern u8 code_flags[ISPACE]; + +// This one should be called every time IRAM changes - which is basically +// every time that a new ucode gets uploaded, and never else. At that point, +// we can do as much static analysis as we want - but we should always throw +// all old analysis away. Luckily the entire address space is only 64K code +// words and the actual code space 8K instructions in total, so we can do +// some pretty expensive analysis if necessary. +void Analyze(); + +} // namespace \ No newline at end of file diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp index a00cf7a8af..f5e29bcca4 100644 --- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp @@ -554,7 +554,7 @@ void clr(const UDSPInstruction& opc) void clrl(const UDSPInstruction& opc) { u16 reg = DSP_REG_ACL0 + ((opc.hex >> 11) & 0x1); - g_dsp.r[reg] &= 0x0000; + g_dsp.r[reg] = 0; // Should this be 64bit? // nakee: it says the whole reg in duddie's doc sounds weird @@ -1359,7 +1359,15 @@ void sbset(const UDSPInstruction& opc) // FIXME inside -// No idea what most of this is supposed to do. +// This seem to be a bunch of bit setters, possibly flippig bits in SR. +// These bits may have effects on the operation of the multiplier or +// accumulators. +// Hermes' demo sets the following defaults, hence that's the most important +// mode to explore for the moment: +// SET40 +// CLR15 +// M0 +// Gonna be fun to explore all 8 possible combinations .. ugh. void srbith(const UDSPInstruction& opc) { switch ((opc.hex >> 8) & 0xf) diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/disassemble.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/disassemble.cpp index 89862ac138..9732d89912 100644 --- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/disassemble.cpp +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/disassemble.cpp @@ -110,30 +110,30 @@ pdlabel_t regnames[] = {0x0b, "R11", "Register 11",}, {0x0c, "ST0", "Call stack",}, {0x0d, "ST1", "Data stack",}, - {0x0e, "ST2", "Loop address stack",}, + {0x0e, "ST2", "Loop addr stack",}, {0x0f, "ST3", "Loop counter",}, - {0x00, "ACH0", "Accumulator High 0",}, - {0x11, "ACH1", "Accumulator High 1",}, + {0x00, "ACH0", "Accu High 0",}, + {0x11, "ACH1", "Accu High 1",}, {0x12, "CR", "Config Register",}, {0x13, "SR", "Special Register",}, - {0x14, "PROD.L", "PROD L",}, - {0x15, "PROD.M1", "PROD M1",}, - {0x16, "PROD.H", "PROD H",}, - {0x17, "PROD.M2", "PROD M2",}, - {0x18, "AX0.L", "Additional Accumulators Low 0",}, - {0x19, "AX1.L", "Additional Accumulators Low 1",}, - {0x1a, "AX0.H", "Additional Accumulators High 0",}, - {0x1b, "AX1.H", "Additional Accumulators High 1",}, + {0x14, "PROD.L", "Prod L",}, + {0x15, "PROD.M1", "Prod M1",}, + {0x16, "PROD.H", "Prod H",}, + {0x17, "PROD.M2", "Prod M2",}, + {0x18, "AX0.L", "Extra Accu L 0",}, + {0x19, "AX1.L", "Extra Accu L 1",}, + {0x1a, "AX0.H", "Extra Accu H 0",}, + {0x1b, "AX1.H", "Extra Accu H 1",}, {0x1c, "AC0.L", "Register 28",}, {0x1d, "AC1.L", "Register 29",}, {0x1e, "AC0.M", "Register 00",}, {0x1f, "AC1.M", "Register 00",}, -// additional to resolve special names - {0x20, "ACC0", "Accumulators 0",}, - {0x21, "ACC1", "Accumulators 1",}, - {0x22, "AX0", "Additional Accumulators 0",}, - {0x23, "AX1", "Additional Accumulators 1",}, + // To resolve special names. + {0x20, "ACC0", "Accu Full 0",}, + {0x21, "ACC1", "Accu Full 1",}, + {0x22, "AX0", "Extra Accu 0",}, + {0x23, "AX1", "Extra Accu 1",}, }; const char* pdname(u16 val) @@ -143,7 +143,7 @@ const char* pdname(u16 val) for (int i = 0; i < (int)(sizeof(pdlabels) / sizeof(pdlabel_t)); i++) { if (pdlabels[i].addr == val) - return(pdlabels[i].name); + return pdlabels[i].name; } sprintf(tmpstr, "0x%04x", val); @@ -565,7 +565,7 @@ void gd_dis_open_unkop() } } -const char* gd_dis_get_reg_name(u16 reg) +const char *gd_dis_get_reg_name(u16 reg) { - return(regnames[reg].name); + return regnames[reg].name; } diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interface.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interface.cpp index d9f68ca12a..b76cecf1dd 100644 --- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interface.cpp +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interface.cpp @@ -29,6 +29,7 @@ #include "Thread.h" #include "MemoryUtil.h" +#include "DSPAnalyzer.h" #include "gdsp_aram.h" #include "gdsp_interpreter.h" #include "gdsp_interface.h" @@ -231,6 +232,7 @@ void gdsp_idma_in(u16 dsp_addr, u32 addr, u32 size) g_dsp.iram_crc = GenerateCRC(g_dsp.cpu_ram + (addr & 0x0fffffff), size); INFO_LOG(DSPLLE, "*** Copy new UCode from 0x%08x to 0x%04x (crc: %8x)\n", addr, dsp_addr, g_dsp.iram_crc); + DSPAnalyzer::Analyze(); if (g_dsp.dump_imem) DumpDSPCode(&dst[dsp_addr], size, g_dsp.iram_crc); } diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interpreter.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interpreter.cpp index 915a449f0c..5bcc6cb937 100644 --- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interpreter.cpp +++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_interpreter.cpp @@ -27,6 +27,7 @@ #include #include "DSPTables.h" +#include "DSPAnalyzer.h" #include "gdsp_interface.h" #include "gdsp_opcodes_helper.h" @@ -80,15 +81,16 @@ void gdsp_init() } } - // Fill memories with junk. + // Fill IRAM with HALT opcodes. for (int i = 0; i < DSP_IRAM_SIZE; i++) { g_dsp.iram[i] = 0x0021; // HALT opcode } + // Just zero out DRAM. for (int i = 0; i < DSP_DRAM_SIZE; i++) { - g_dsp.dram[i] = 0x0021; // HALT opcode + g_dsp.dram[i] = 0x0021; } // copied from a real console after the custom UCode has been loaded @@ -105,6 +107,7 @@ void gdsp_init() // Mostly keep IRAM write protected. We unprotect only when DMA-ing // in new ucodes. WriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false); + DSPAnalyzer::Analyze(); } void gdsp_shutdown() diff --git a/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp b/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp index 3275ebff5c..c6133d2a0b 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp @@ -771,12 +771,3 @@ bool OpenGL_ReportFBOError(const char *function, const char *file, int line) } return true; } - -void HandleCgError(CGcontext ctx, CGerror err, void* appdata) -{ - ERROR_LOG(VIDEO, "Cg error: %s", cgGetErrorString(err)); - const char* listing = cgGetLastListing(g_cgcontext); - if (listing != NULL) { - ERROR_LOG(VIDEO, " last listing: %s", listing); - } -} diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index d1d0a086d0..d0db0871ac 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -139,7 +139,6 @@ static int s_targetwidth; // Size of render buffer FBO. static int s_targetheight; -extern void HandleCgError(CGcontext ctx, CGerror err, void *appdata); namespace { @@ -174,8 +173,16 @@ void SetDefaultRectTexParams() glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR); } -} // namespace +void HandleCgError(CGcontext ctx, CGerror err, void* appdata) +{ + ERROR_LOG(VIDEO, "Cg error: %s", cgGetErrorString(err)); + const char* listing = cgGetLastListing(g_cgcontext); + if (listing != NULL) { + ERROR_LOG(VIDEO, " last listing: %s", listing); + } +} +} // namespace bool Renderer::Init() { @@ -1082,21 +1089,17 @@ void Renderer::Swap(const TRectangle& rc) // Use linear filtering. glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - - glBegin(GL_QUADS); - glTexCoord2f(0, v_min); glVertex2f(-1, -1); - glTexCoord2f(0, v_max); glVertex2f(-1, 1); - glTexCoord2f(u_max, v_max); glVertex2f( 1, 1); - glTexCoord2f(u_max, v_min); glVertex2f( 1, -1); - glEnd(); - // Restore filtering. - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + static const float vtx_data[8] = {-1, -1, -1, 1, 1, 1, 1, -1}; + const float uv_data[8] = {0, v_min, 0, v_max, u_max, v_max, u_max, v_min}; + glBindBuffer(GL_ARRAY_BUFFER, 0); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + glVertexPointer(2, GL_FLOAT, 0, (void *)vtx_data); + glTexCoordPointer(2, GL_FLOAT, 0, (void *)uv_data); + glDrawArrays(GL_QUADS, 0, 4); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); TextureMngr::DisableStage(0); - // End of non-framebuffer_blit workaround. } // Wireframe