From 6e61c3249545ea67ed746f078b3f696823ec1955 Mon Sep 17 00:00:00 2001 From: Shawn Hoffman Date: Sun, 16 Aug 2009 00:37:01 +0000 Subject: [PATCH] dspspy: fix a bug with ConsoleHelper dspspy: remove some alignment for some vars. (should not have caused issues anyways) dspCodeUtil: pad ucodes converted to headers to 32byte multiples with nops (this is only mimicing what is seen in nintendo ucodes, probably because their's are originating on disc) Common.h: fix typo for gcc version of GC_ALIGNED32 (wtf? how did this work before?!) createtest.pl: add "jmp end_of_test" to the end of generated ucodes. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3994 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/Common.h | 4 +- Source/Core/DSPCore/Src/DSPCodeUtil.cpp | 47 ++- Source/Core/DSPCore/Src/DSPMemoryMap.cpp | 2 +- Source/DSPSpy/ConsoleHelper.h | 3 +- Source/DSPSpy/dsp_interface.cpp | 4 + Source/DSPSpy/main_spy.cpp | 17 +- Source/DSPSpy/util/createtest.pl | 1 + Source/DSPSpy/util/dump_roms.ds | 326 +++++++++--------- .../Plugin_DSP_HLE/Src/UCodes/UCodes.cpp | 1 + 9 files changed, 205 insertions(+), 200 deletions(-) diff --git a/Source/Core/Common/Src/Common.h b/Source/Core/Common/Src/Common.h index 7f942476ff..82134ceab0 100644 --- a/Source/Core/Common/Src/Common.h +++ b/Source/Core/Common/Src/Common.h @@ -96,7 +96,7 @@ #endif } -// Debug definions +// Debug definitions #if defined(_DEBUG) #include #undef CHECK_HEAP_INTEGRITY @@ -120,7 +120,7 @@ #endif // Alignment #define GC_ALIGNED16(x) __attribute__((aligned(16))) x - #define GC_ALIGNED32(x) __attribute__((aligned(16))) x + #define GC_ALIGNED32(x) __attribute__((aligned(32))) x #define GC_ALIGNED64(x) __attribute__((aligned(64))) x #define GC_ALIGNED16_DECL(x) __attribute__((aligned(16))) x #define GC_ALIGNED64_DECL(x) __attribute__((aligned(64))) x diff --git a/Source/Core/DSPCore/Src/DSPCodeUtil.cpp b/Source/Core/DSPCore/Src/DSPCodeUtil.cpp index af1656a6c5..250d42df10 100644 --- a/Source/Core/DSPCore/Src/DSPCodeUtil.cpp +++ b/Source/Core/DSPCore/Src/DSPCodeUtil.cpp @@ -116,29 +116,26 @@ void GenRandomCode(int size, std::vector &code) void CodeToHeader(const std::vector &code, std::string _filename, const char *name, std::string &header) { - std::vector code_copy = code; - // Add some nops at the end to align the size a bit. - while (code_copy.size() & 7) - code_copy.push_back(0); + std::vector code_padded = code; + // Pad with nops to 32byte boundary + while (code_padded.size() & 0x7f) + code_padded.push_back(0); + char buffer[1024]; header.clear(); - header.reserve(code.size() * 4); + header.reserve(code_padded.size() * 4); header.append("#define NUM_UCODES 1\n\n"); std::string filename; SplitPath(_filename, NULL, &filename, NULL); header.append(StringFromFormat("const char* UCODE_NAMES[NUM_UCODES] = {\"%s\"};\n\n", filename.c_str())); - header.append("#ifndef _MSCVER\n"); header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] = {\n"); - header.append("#else\n"); - header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] __attribute__ ((aligned (64))) = {\n"); - header.append("#endif\n\n"); - + header.append("\t{\n\t\t"); - for (u32 j = 0; j < code.size(); j++) + for (u32 j = 0; j < code_padded.size(); j++) { if (j && ((j & 15) == 0)) header.append("\n\t\t"); - sprintf(buffer, "0x%04x, ", code[j]); + sprintf(buffer, "0x%04x, ", code_padded[j]); header.append(buffer); } header.append("\n\t},\n"); @@ -149,10 +146,18 @@ void CodeToHeader(const std::vector &code, std::string _filename, void CodesToHeader(const std::vector *codes, const std::vector* filenames, int numCodes, const char *name, std::string &header) { + std::vector > codes_padded; char buffer[1024]; int reserveSize = 0; for(int i = 0; i < numCodes; i++) - reserveSize += (int)codes[i].size(); + { + codes_padded.push_back(codes[i]); + // Pad with nops to 32byte boundary + while (codes_padded.at(i).size() & 0x7f) + codes_padded.at(i).push_back(0); + + reserveSize += (int)codes_padded.at(i).size(); + } header.clear(); @@ -169,27 +174,19 @@ void CodesToHeader(const std::vector *codes, const std::vector header.append(buffer); } header.append("};\n\n"); - header.append("#ifndef _MSCVER\n"); header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] = {\n"); - header.append("#else\n"); - header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] __attribute__ ((aligned (64))) = {\n"); - header.append("#endif\n\n"); - for(int i = 0; i < numCodes; i++) { + for(int i = 0; i < numCodes; i++) + { if(codes[i].size() == 0) continue; - std::vector code_copy = codes[i]; - // Add some nops at the end to align the size a bit. - while (code_copy.size() & 7) - code_copy.push_back(0); - header.append("\t{\n\t\t"); - for (u32 j = 0; j < codes[i].size(); j++) + for (u32 j = 0; j < codes_padded.at(i).size(); j++) { if (j && ((j & 15) == 0)) header.append("\n\t\t"); - sprintf(buffer, "0x%04x, ", codes[i][j]); + sprintf(buffer, "0x%04x, ", codes_padded.at(i).at(j)); header.append(buffer); } header.append("\n\t},\n"); diff --git a/Source/Core/DSPCore/Src/DSPMemoryMap.cpp b/Source/Core/DSPCore/Src/DSPMemoryMap.cpp index 45f26af684..659c179122 100644 --- a/Source/Core/DSPCore/Src/DSPMemoryMap.cpp +++ b/Source/Core/DSPCore/Src/DSPMemoryMap.cpp @@ -52,7 +52,7 @@ u16 dsp_dmem_read(u16 addr) return g_dsp.dram[addr & DSP_DRAM_MASK]; case 0x1: // 1xxx COEF - // DEBUG_LOG(DSPLLE, "%04x : Coef Read @ %04x", g_dsp.pc, addr); + DEBUG_LOG(DSPLLE, "%04x : Coef Read @ %04x", g_dsp.pc, addr); return g_dsp.coef[addr & DSP_COEF_MASK]; case 0xf: // Fxxx HW regs diff --git a/Source/DSPSpy/ConsoleHelper.h b/Source/DSPSpy/ConsoleHelper.h index 1e2a32e9c5..b46f42d85d 100644 --- a/Source/DSPSpy/ConsoleHelper.h +++ b/Source/DSPSpy/ConsoleHelper.h @@ -76,7 +76,8 @@ inline void CON_BlankRow(const int y) int columns = 0, rows = 0; CON_GetMetrics(&columns, &rows); char* blank = new char[columns]; - std::fill(blank, &blank[columns], ' '); + std::fill(blank, blank + columns, ' '); + blank[columns] = '\0'; CON_Printf(0, y, "%s", blank); delete blank; } diff --git a/Source/DSPSpy/dsp_interface.cpp b/Source/DSPSpy/dsp_interface.cpp index 77cea60102..5dbaddfbae 100644 --- a/Source/DSPSpy/dsp_interface.cpp +++ b/Source/DSPSpy/dsp_interface.cpp @@ -19,6 +19,10 @@ void IDSP::SendTask(void *addr, u16 iram_addr, u16 len, u16 start) { + // addr main ram addr 4byte aligned (1 Gekko word) + // iram_addr dsp addr 4byte aligned (2 DSP words) + // len block length in bytes multiple of 4 + // start dsp iram entry point while (CheckMailTo()); SendMailTo(0x80F3A001); while (CheckMailTo()); diff --git a/Source/DSPSpy/main_spy.cpp b/Source/DSPSpy/main_spy.cpp index 57ce76a312..3654a1ad7a 100644 --- a/Source/DSPSpy/main_spy.cpp +++ b/Source/DSPSpy/main_spy.cpp @@ -62,7 +62,7 @@ // #include "virtual_dsp.h" // Used for communications with the DSP, such as dumping registers etc. -u16 dspbuffer[16 * 1024] __attribute__ ((aligned (0x4000))); +u16 dspbuffer[16 * 1024]; static void *xfb = NULL; void (*reboot)() = (void(*)())0x80001800; @@ -327,8 +327,11 @@ void handle_dsp_mail(void) DCFlushRange(dspbufC, 0x2000); // Then send the code. DCFlushRange((void *)dsp_code[curUcode], 0x2000); - // Fill whole iram with code, entry point is just after exception vectors...0x10 - real_dsp.SendTask((void *)MEM_VIRTUAL_TO_PHYSICAL(dsp_code[curUcode]), 0, 4000, 0x10); + // DMA ucode to iram base, entry point is just after exception vectors...0x10 + // (shuffle2) 5256 is the highest I could get the dma block length to on my wii - still needs to be looked into + // for the tstaxh test, 5256 only yields up to step 325. There are 532 send_backs in the ucode, and it takes up + // almost all of the iram. + real_dsp.SendTask((void *)MEM_VIRTUAL_TO_PHYSICAL(dsp_code[curUcode]), 0, 5256, 0x10); runningUcode = curUcode + 1; @@ -338,7 +341,7 @@ void handle_dsp_mail(void) else if ((mail & 0xffff0000) == 0x8bad0000) { // dsp_base.inc is reporting an exception happened - CON_PrintRow(4, 25, "%s caused exception %x", UCODE_NAMES[curUcode], mail & 0xff); + CON_PrintRow(4, 25, "%s caused exception %x at step %i", UCODE_NAMES[curUcode], mail & 0xff, dsp_steps); } else if (mail == 0x8888dead) { @@ -426,13 +429,11 @@ void dump_all_ucodes(void) // Then write all the dumps. written += fwrite(dspreg_out, 1, dsp_steps * 32 * 2, f); fclose(f); - char temp[100]; - sprintf(temp, "Dump Successful. Wrote %d bytes, steps: %d", written, dsp_steps); - UpdateLastMessage(temp); + CON_PrintRow(4, 24, "Dump %i Successful. Wrote %d bytes, steps: %d", UCodeToDump, written, dsp_steps); } else { - UpdateLastMessage("SD Write Error"); + CON_PrintRow(4, 24, "Dump %i Failed", UCodeToDump); break; } } diff --git a/Source/DSPSpy/util/createtest.pl b/Source/DSPSpy/util/createtest.pl index 8417b18689..8ea556059d 100755 --- a/Source/DSPSpy/util/createtest.pl +++ b/Source/DSPSpy/util/createtest.pl @@ -94,6 +94,7 @@ for(my $i = 0;$i < scalar(@cmdList);$i++) { open(OUTPUT, ">$name$j.tst"); print OUTPUT generateSRFull($header, $body, $j*$ucodes, ($j+1)*$ucodes-1); + print OUTPUT "jmp end_of_test"; close(OUTPUT); print NAMES "$name$j.tst"; diff --git a/Source/DSPSpy/util/dump_roms.ds b/Source/DSPSpy/util/dump_roms.ds index fb8df643cf..16b021b59a 100644 --- a/Source/DSPSpy/util/dump_roms.ds +++ b/Source/DSPSpy/util/dump_roms.ds @@ -1,186 +1,186 @@ -; This ucode can copy the dsp instruction rom and coefficient table. -; irom: -; 0x8000 in instruction space -; coef: -; 0x1000 in data space -; -; Both irom and coef are 0x1000 words in length - remember, DSP -; uses 16bit words -; -; The DSP has two address spaces, to load data from instruction -; space you need to use 'i'-prefixed instructions. - - +; This ucode can copy the dsp instruction rom and coefficient table. +; irom: +; 0x8000 in instruction space +; coef: +; 0x1000 in data space +; +; Both irom and coef are 0x1000 words in length - remember, DSP +; uses 16bit words +; +; The DSP has two address spaces, to load data from instruction +; space you need to use 'i'-prefixed instructions. + + /********************************/ /** HANDY THANGERS **/ -/********************************/ -; External -MEM_BASE: equ 0x0000 -MEM_HI: equ MEM_BASE -MEM_LO: equ MEM_BASE+1 -; DSP -DRAM_BASE: equ 0x0000 - -; Config reg controls dma behavior +/********************************/ +; External +MEM_BASE: equ 0x0000 +MEM_HI: equ MEM_BASE +MEM_LO: equ MEM_BASE+1 +; DSP +DRAM_BASE: equ 0x0000 + +; Config reg controls dma behavior CR_TO_DSP: equ 0 -CR_TO_CPU: equ 1 -CR_IRAM: equ 2 -CR_DRAM: equ 0 - -IROM_BASE: equ 0x8000 -COEF_BASE: equ 0x1000 -DUMP_SIZE: equ 0x2000 ; in bytes! - - +CR_TO_CPU: equ 1 +CR_IRAM: equ 2 +CR_DRAM: equ 0 + +IROM_BASE: equ 0x8000 +COEF_BASE: equ 0x1000 +DUMP_SIZE: equ 0x2000 ; in bytes! + + /**************************************************************/ /* CODE START */ -/**************************************************************/ -; iram 0x00 - Exception vectors -; 8 vectors, 2 opcodes each - jmp exception0 - jmp exception1 - jmp exception2 - jmp exception3 - jmp exception4 - jmp exception5 - jmp exception6 - jmp exception7 - -; iram 0x10 - Our entry point - sbset #0x02 - sbset #0x03 - sbclr #0x04 - sbset #0x05 - sbset #0x06 - -; ??? - s16 - lri $CR, #0x00ff - +/**************************************************************/ +; iram 0x00 - Exception vectors +; 8 vectors, 2 opcodes each + jmp exception0 + jmp exception1 + jmp exception2 + jmp exception3 + jmp exception4 + jmp exception5 + jmp exception6 + jmp exception7 + +; iram 0x10 - Our entry point + sbset #0x02 + sbset #0x03 + sbclr #0x04 + sbset #0x05 + sbset #0x06 + +; ??? + s16 + lri $CR, #0x00ff + /**************************************************************/ /* MAIN */ -/**************************************************************/ -; This ucode is meant only to dump the ROMs, and as such is -; self-contained and skimpy -main: - clr $acc1 - clr $acc0 - -; This consumes ALL of dram! We must be careful until we dma it! - call copy_irom_to_dram -; Send mail saying irom dump is done - call wait_for_dsp_mbox - si @DMBH, #0x8888 - si @DMBL, #0xc0de - si @DIRQ, #0x0001 -; Get address to dma to, dma, and wait till done - call dma_dram_to_cmbl - -; Now we can start over for the coef - call copy_coef_to_dram -; Send mail saying coef dump is done - call wait_for_dsp_mbox - si @DMBH, #0x8888 - si @DMBL, #0xda7a - si @DIRQ, #0x0001 -; Get address to dma to, dma, and wait till done - call dma_dram_to_cmbl - -; Die -do_halt: - halt - +/**************************************************************/ +; This ucode is meant only to dump the ROMs, and as such is +; self-contained and skimpy +main: + clr $acc1 + clr $acc0 + +; This consumes ALL of dram! We must be careful until we dma it! + call copy_irom_to_dram +; Send mail saying irom dump is done + call wait_for_dsp_mbox + si @DMBH, #0x8888 + si @DMBL, #0xc0de + si @DIRQ, #0x0001 +; Get address to dma to, dma, and wait till done + call dma_dram_to_cmbl + +; Now we can start over for the coef + call copy_coef_to_dram +; Send mail saying coef dump is done + call wait_for_dsp_mbox + si @DMBH, #0x8888 + si @DMBL, #0xda7a + si @DIRQ, #0x0001 +; Get address to dma to, dma, and wait till done + call dma_dram_to_cmbl + +; Die +do_halt: + halt + /**************************************************************/ /* HELPER FUNCTIONS */ -/**************************************************************/ +/**************************************************************/ /********************************/ /** DUMPING FUNCTIONS **/ -/********************************/ +/********************************/ ; Dump irom from 0x8000 in instruction space copy_irom_to_dram: - lri $ar0, #IROM_BASE - lri $ar1, #DRAM_BASE - lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word - bloop $ar2, copy_irom_to_dram_end - ilrri $ac0.m, @$ar0 - ; Now ac0.m is 16bits of irom! - srri @$ar1, $ac0.m -copy_irom_to_dram_end: - nop - ret - + lri $ar0, #IROM_BASE + lri $ar1, #DRAM_BASE + lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word + bloop $ar2, copy_irom_to_dram_end + ilrri $ac0.m, @$ar0 + ; Now ac0.m is 16bits of irom! + srri @$ar1, $ac0.m +copy_irom_to_dram_end: + nop + ret + ; Dump coef from 0x1000 in data space copy_coef_to_dram: lri $ar0, #COEF_BASE lri $ar1, #DRAM_BASE - lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word - bloop $ar2, copy_coef_to_dram_end - lrri $ac0.m, @$ar0 - ; Now ac0.m is 16bits of coef! - srri @$ar1, $ac0.m -copy_coef_to_dram_end: - nop - ret - + lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word + bloop $ar2, copy_coef_to_dram_end + lrri $ac0.m, @$ar0 + ; Now ac0.m is 16bits of coef! + srri @$ar1, $ac0.m +copy_coef_to_dram_end: + nop + ret + /********************************/ /** DMA **/ -/********************************/ -; DMA implementation which does not write to dram -; We take advantage of the fact that we know the mail is going to -; contain the address which we should dma to -dma_dram_to_cmbl: - call wait_for_cpu_mbox - lrs $ac0.m, @CMBL - andi $ac1.m, #0x7fff - -; Directly do dma; writing the length kicks it off - sr @DSMAH, $ac1.m - sr @DSMAL, $ac0.m - si @DSPA, #DRAM_BASE - si @DSCR, #(CR_TO_CPU|CR_DRAM) - si @DSBL, #DUMP_SIZE - -; Waits for previous DMA to complete by watching a bit in DSCR. -wait_dma: - lrs $ac1.m, @DSCR - andcf $ac1.m, #0x0004 - jlz wait_dma - ret - +/********************************/ +; DMA implementation which does not write to dram +; We take advantage of the fact that we know the mail is going to +; contain the address which we should dma to +dma_dram_to_cmbl: + call wait_for_cpu_mbox + lrs $ac0.m, @CMBL + andi $ac1.m, #0x7fff + +; Directly do dma; writing the length kicks it off + sr @DSMAH, $ac1.m + sr @DSMAL, $ac0.m + si @DSPA, #DRAM_BASE + si @DSCR, #(CR_TO_CPU|CR_DRAM) + si @DSBL, #DUMP_SIZE + +; Waits for previous DMA to complete by watching a bit in DSCR. +wait_dma: + lrs $ac1.m, @DSCR + andcf $ac1.m, #0x0004 + jlz wait_dma + ret + /********************************/ /** MAILBOX **/ -/********************************/ -; Waits for a mail to arrive in the DSP in-mailbox. -wait_for_dsp_mbox: - lrs $ac1.m, @DMBH - andcf $ac1.m, #0x8000 - jlz wait_for_dsp_mbox - ret - -; Waits for the CPU to grab a mail that we just sent from the DSP. -wait_for_cpu_mbox: - lrs $ac1.m, @CMBH - andcf $ac1.m, #0x8000 - jlnz wait_for_cpu_mbox - ret - +/********************************/ +; Waits for a mail to arrive in the DSP in-mailbox. +wait_for_dsp_mbox: + lrs $ac1.m, @DMBH + andcf $ac1.m, #0x8000 + jlz wait_for_dsp_mbox + ret + +; Waits for the CPU to grab a mail that we just sent from the DSP. +wait_for_cpu_mbox: + lrs $ac1.m, @CMBH + andcf $ac1.m, #0x8000 + jlnz wait_for_cpu_mbox + ret + /********************************/ /** EXCEPTION HANDLERS **/ -/********************************/ -; ...zey do nutzing! -exception0: - rti -exception1: - rti -exception2: - rti -exception3: - rti -exception4: - rti -exception5: - rti -exception6: - rti -exception7: - rti +/********************************/ +; ...zey do nutzing! +exception0: + rti +exception1: + rti +exception2: + rti +exception3: + rti +exception4: + rti +exception5: + rti +exception6: + rti +exception7: + rti diff --git a/Source/Plugins/Plugin_DSP_HLE/Src/UCodes/UCodes.cpp b/Source/Plugins/Plugin_DSP_HLE/Src/UCodes/UCodes.cpp index b313f5422f..b0a2657343 100644 --- a/Source/Plugins/Plugin_DSP_HLE/Src/UCodes/UCodes.cpp +++ b/Source/Plugins/Plugin_DSP_HLE/Src/UCodes/UCodes.cpp @@ -70,6 +70,7 @@ IUCode* UCodeFactory(u32 _CRC, CMailHandler& _rMailHandler) return new CUCode_Zelda(_rMailHandler, _CRC); // WII CRCs + case 0x2ea36ce6: // Wii THP demo case 0xb7eb9a9c: // Wii Pikmin - PAL case 0xeaeb38cc: // Wii Pikmin 2 - PAL case 0x6c3f6f94: // zelda - PAL