dspspy: fix a bug with ConsoleHelper

dspspy: remove some alignment for some vars. (should not have caused issues anyways)
dspCodeUtil: pad ucodes converted to headers to 32byte multiples with nops (this is only mimicing what is seen in nintendo ucodes, probably because their's are originating on disc)
Common.h: fix typo for gcc version of GC_ALIGNED32 (wtf? how did this work before?!)
createtest.pl: add "jmp end_of_test" to the end of generated ucodes.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3994 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Shawn Hoffman 2009-08-16 00:37:01 +00:00
parent 2060cae79b
commit 6e61c32495
9 changed files with 205 additions and 200 deletions

View File

@ -96,7 +96,7 @@
#endif
}
// Debug definions
// Debug definitions
#if defined(_DEBUG)
#include <crtdbg.h>
#undef CHECK_HEAP_INTEGRITY
@ -120,7 +120,7 @@
#endif
// Alignment
#define GC_ALIGNED16(x) __attribute__((aligned(16))) x
#define GC_ALIGNED32(x) __attribute__((aligned(16))) x
#define GC_ALIGNED32(x) __attribute__((aligned(32))) x
#define GC_ALIGNED64(x) __attribute__((aligned(64))) x
#define GC_ALIGNED16_DECL(x) __attribute__((aligned(16))) x
#define GC_ALIGNED64_DECL(x) __attribute__((aligned(64))) x

View File

@ -116,29 +116,26 @@ void GenRandomCode(int size, std::vector<u16> &code)
void CodeToHeader(const std::vector<u16> &code, std::string _filename,
const char *name, std::string &header)
{
std::vector<u16> code_copy = code;
// Add some nops at the end to align the size a bit.
while (code_copy.size() & 7)
code_copy.push_back(0);
std::vector<u16> code_padded = code;
// Pad with nops to 32byte boundary
while (code_padded.size() & 0x7f)
code_padded.push_back(0);
char buffer[1024];
header.clear();
header.reserve(code.size() * 4);
header.reserve(code_padded.size() * 4);
header.append("#define NUM_UCODES 1\n\n");
std::string filename;
SplitPath(_filename, NULL, &filename, NULL);
header.append(StringFromFormat("const char* UCODE_NAMES[NUM_UCODES] = {\"%s\"};\n\n", filename.c_str()));
header.append("#ifndef _MSCVER\n");
header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] = {\n");
header.append("#else\n");
header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] __attribute__ ((aligned (64))) = {\n");
header.append("#endif\n\n");
header.append("\t{\n\t\t");
for (u32 j = 0; j < code.size(); j++)
for (u32 j = 0; j < code_padded.size(); j++)
{
if (j && ((j & 15) == 0))
header.append("\n\t\t");
sprintf(buffer, "0x%04x, ", code[j]);
sprintf(buffer, "0x%04x, ", code_padded[j]);
header.append(buffer);
}
header.append("\n\t},\n");
@ -149,10 +146,18 @@ void CodeToHeader(const std::vector<u16> &code, std::string _filename,
void CodesToHeader(const std::vector<u16> *codes, const std::vector<std::string>* filenames,
int numCodes, const char *name, std::string &header)
{
std::vector<std::vector<u16> > codes_padded;
char buffer[1024];
int reserveSize = 0;
for(int i = 0; i < numCodes; i++)
reserveSize += (int)codes[i].size();
{
codes_padded.push_back(codes[i]);
// Pad with nops to 32byte boundary
while (codes_padded.at(i).size() & 0x7f)
codes_padded.at(i).push_back(0);
reserveSize += (int)codes_padded.at(i).size();
}
header.clear();
@ -169,27 +174,19 @@ void CodesToHeader(const std::vector<u16> *codes, const std::vector<std::string>
header.append(buffer);
}
header.append("};\n\n");
header.append("#ifndef _MSCVER\n");
header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] = {\n");
header.append("#else\n");
header.append("const unsigned short dsp_code[NUM_UCODES][0x1000] __attribute__ ((aligned (64))) = {\n");
header.append("#endif\n\n");
for(int i = 0; i < numCodes; i++) {
for(int i = 0; i < numCodes; i++)
{
if(codes[i].size() == 0)
continue;
std::vector<u16> code_copy = codes[i];
// Add some nops at the end to align the size a bit.
while (code_copy.size() & 7)
code_copy.push_back(0);
header.append("\t{\n\t\t");
for (u32 j = 0; j < codes[i].size(); j++)
for (u32 j = 0; j < codes_padded.at(i).size(); j++)
{
if (j && ((j & 15) == 0))
header.append("\n\t\t");
sprintf(buffer, "0x%04x, ", codes[i][j]);
sprintf(buffer, "0x%04x, ", codes_padded.at(i).at(j));
header.append(buffer);
}
header.append("\n\t},\n");

View File

@ -52,7 +52,7 @@ u16 dsp_dmem_read(u16 addr)
return g_dsp.dram[addr & DSP_DRAM_MASK];
case 0x1: // 1xxx COEF
// DEBUG_LOG(DSPLLE, "%04x : Coef Read @ %04x", g_dsp.pc, addr);
DEBUG_LOG(DSPLLE, "%04x : Coef Read @ %04x", g_dsp.pc, addr);
return g_dsp.coef[addr & DSP_COEF_MASK];
case 0xf: // Fxxx HW regs

View File

@ -76,7 +76,8 @@ inline void CON_BlankRow(const int y)
int columns = 0, rows = 0;
CON_GetMetrics(&columns, &rows);
char* blank = new char[columns];
std::fill(blank, &blank[columns], ' ');
std::fill(blank, blank + columns, ' ');
blank[columns] = '\0';
CON_Printf(0, y, "%s", blank);
delete blank;
}

View File

@ -19,6 +19,10 @@
void IDSP::SendTask(void *addr, u16 iram_addr, u16 len, u16 start)
{
// addr main ram addr 4byte aligned (1 Gekko word)
// iram_addr dsp addr 4byte aligned (2 DSP words)
// len block length in bytes multiple of 4
// start dsp iram entry point
while (CheckMailTo());
SendMailTo(0x80F3A001);
while (CheckMailTo());

View File

@ -62,7 +62,7 @@
// #include "virtual_dsp.h"
// Used for communications with the DSP, such as dumping registers etc.
u16 dspbuffer[16 * 1024] __attribute__ ((aligned (0x4000)));
u16 dspbuffer[16 * 1024];
static void *xfb = NULL;
void (*reboot)() = (void(*)())0x80001800;
@ -327,8 +327,11 @@ void handle_dsp_mail(void)
DCFlushRange(dspbufC, 0x2000);
// Then send the code.
DCFlushRange((void *)dsp_code[curUcode], 0x2000);
// Fill whole iram with code, entry point is just after exception vectors...0x10
real_dsp.SendTask((void *)MEM_VIRTUAL_TO_PHYSICAL(dsp_code[curUcode]), 0, 4000, 0x10);
// DMA ucode to iram base, entry point is just after exception vectors...0x10
// (shuffle2) 5256 is the highest I could get the dma block length to on my wii - still needs to be looked into
// for the tstaxh test, 5256 only yields up to step 325. There are 532 send_backs in the ucode, and it takes up
// almost all of the iram.
real_dsp.SendTask((void *)MEM_VIRTUAL_TO_PHYSICAL(dsp_code[curUcode]), 0, 5256, 0x10);
runningUcode = curUcode + 1;
@ -338,7 +341,7 @@ void handle_dsp_mail(void)
else if ((mail & 0xffff0000) == 0x8bad0000)
{
// dsp_base.inc is reporting an exception happened
CON_PrintRow(4, 25, "%s caused exception %x", UCODE_NAMES[curUcode], mail & 0xff);
CON_PrintRow(4, 25, "%s caused exception %x at step %i", UCODE_NAMES[curUcode], mail & 0xff, dsp_steps);
}
else if (mail == 0x8888dead)
{
@ -426,13 +429,11 @@ void dump_all_ucodes(void)
// Then write all the dumps.
written += fwrite(dspreg_out, 1, dsp_steps * 32 * 2, f);
fclose(f);
char temp[100];
sprintf(temp, "Dump Successful. Wrote %d bytes, steps: %d", written, dsp_steps);
UpdateLastMessage(temp);
CON_PrintRow(4, 24, "Dump %i Successful. Wrote %d bytes, steps: %d", UCodeToDump, written, dsp_steps);
}
else
{
UpdateLastMessage("SD Write Error");
CON_PrintRow(4, 24, "Dump %i Failed", UCodeToDump);
break;
}
}

View File

@ -94,6 +94,7 @@ for(my $i = 0;$i < scalar(@cmdList);$i++) {
open(OUTPUT, ">$name$j.tst");
print OUTPUT generateSRFull($header, $body, $j*$ucodes,
($j+1)*$ucodes-1);
print OUTPUT "jmp end_of_test";
close(OUTPUT);
print NAMES "$name$j.tst";

View File

@ -1,186 +1,186 @@
; This ucode can copy the dsp instruction rom and coefficient table.
; irom:
; 0x8000 in instruction space
; coef:
; 0x1000 in data space
;
; Both irom and coef are 0x1000 words in length - remember, DSP
; uses 16bit words
;
; The DSP has two address spaces, to load data from instruction
; space you need to use 'i'-prefixed instructions.
; This ucode can copy the dsp instruction rom and coefficient table.
; irom:
; 0x8000 in instruction space
; coef:
; 0x1000 in data space
;
; Both irom and coef are 0x1000 words in length - remember, DSP
; uses 16bit words
;
; The DSP has two address spaces, to load data from instruction
; space you need to use 'i'-prefixed instructions.
/********************************/
/** HANDY THANGERS **/
/********************************/
; External
MEM_BASE: equ 0x0000
MEM_HI: equ MEM_BASE
MEM_LO: equ MEM_BASE+1
; DSP
DRAM_BASE: equ 0x0000
; Config reg controls dma behavior
/********************************/
; External
MEM_BASE: equ 0x0000
MEM_HI: equ MEM_BASE
MEM_LO: equ MEM_BASE+1
; DSP
DRAM_BASE: equ 0x0000
; Config reg controls dma behavior
CR_TO_DSP: equ 0
CR_TO_CPU: equ 1
CR_IRAM: equ 2
CR_DRAM: equ 0
IROM_BASE: equ 0x8000
COEF_BASE: equ 0x1000
DUMP_SIZE: equ 0x2000 ; in bytes!
CR_TO_CPU: equ 1
CR_IRAM: equ 2
CR_DRAM: equ 0
IROM_BASE: equ 0x8000
COEF_BASE: equ 0x1000
DUMP_SIZE: equ 0x2000 ; in bytes!
/**************************************************************/
/* CODE START */
/**************************************************************/
; iram 0x00 - Exception vectors
; 8 vectors, 2 opcodes each
jmp exception0
jmp exception1
jmp exception2
jmp exception3
jmp exception4
jmp exception5
jmp exception6
jmp exception7
; iram 0x10 - Our entry point
sbset #0x02
sbset #0x03
sbclr #0x04
sbset #0x05
sbset #0x06
; ???
s16
lri $CR, #0x00ff
/**************************************************************/
; iram 0x00 - Exception vectors
; 8 vectors, 2 opcodes each
jmp exception0
jmp exception1
jmp exception2
jmp exception3
jmp exception4
jmp exception5
jmp exception6
jmp exception7
; iram 0x10 - Our entry point
sbset #0x02
sbset #0x03
sbclr #0x04
sbset #0x05
sbset #0x06
; ???
s16
lri $CR, #0x00ff
/**************************************************************/
/* MAIN */
/**************************************************************/
; This ucode is meant only to dump the ROMs, and as such is
; self-contained and skimpy
main:
clr $acc1
clr $acc0
; This consumes ALL of dram! We must be careful until we dma it!
call copy_irom_to_dram
; Send mail saying irom dump is done
call wait_for_dsp_mbox
si @DMBH, #0x8888
si @DMBL, #0xc0de
si @DIRQ, #0x0001
; Get address to dma to, dma, and wait till done
call dma_dram_to_cmbl
; Now we can start over for the coef
call copy_coef_to_dram
; Send mail saying coef dump is done
call wait_for_dsp_mbox
si @DMBH, #0x8888
si @DMBL, #0xda7a
si @DIRQ, #0x0001
; Get address to dma to, dma, and wait till done
call dma_dram_to_cmbl
; Die
do_halt:
halt
/**************************************************************/
; This ucode is meant only to dump the ROMs, and as such is
; self-contained and skimpy
main:
clr $acc1
clr $acc0
; This consumes ALL of dram! We must be careful until we dma it!
call copy_irom_to_dram
; Send mail saying irom dump is done
call wait_for_dsp_mbox
si @DMBH, #0x8888
si @DMBL, #0xc0de
si @DIRQ, #0x0001
; Get address to dma to, dma, and wait till done
call dma_dram_to_cmbl
; Now we can start over for the coef
call copy_coef_to_dram
; Send mail saying coef dump is done
call wait_for_dsp_mbox
si @DMBH, #0x8888
si @DMBL, #0xda7a
si @DIRQ, #0x0001
; Get address to dma to, dma, and wait till done
call dma_dram_to_cmbl
; Die
do_halt:
halt
/**************************************************************/
/* HELPER FUNCTIONS */
/**************************************************************/
/**************************************************************/
/********************************/
/** DUMPING FUNCTIONS **/
/********************************/
/********************************/
; Dump irom from 0x8000 in instruction space
copy_irom_to_dram:
lri $ar0, #IROM_BASE
lri $ar1, #DRAM_BASE
lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word
bloop $ar2, copy_irom_to_dram_end
ilrri $ac0.m, @$ar0
; Now ac0.m is 16bits of irom!
srri @$ar1, $ac0.m
copy_irom_to_dram_end:
nop
ret
lri $ar0, #IROM_BASE
lri $ar1, #DRAM_BASE
lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word
bloop $ar2, copy_irom_to_dram_end
ilrri $ac0.m, @$ar0
; Now ac0.m is 16bits of irom!
srri @$ar1, $ac0.m
copy_irom_to_dram_end:
nop
ret
; Dump coef from 0x1000 in data space
copy_coef_to_dram:
lri $ar0, #COEF_BASE
lri $ar1, #DRAM_BASE
lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word
bloop $ar2, copy_coef_to_dram_end
lrri $ac0.m, @$ar0
; Now ac0.m is 16bits of coef!
srri @$ar1, $ac0.m
copy_coef_to_dram_end:
nop
ret
lri $ar2, #DUMP_SIZE/2 ; each iteration copies a word
bloop $ar2, copy_coef_to_dram_end
lrri $ac0.m, @$ar0
; Now ac0.m is 16bits of coef!
srri @$ar1, $ac0.m
copy_coef_to_dram_end:
nop
ret
/********************************/
/** DMA **/
/********************************/
; DMA implementation which does not write to dram
; We take advantage of the fact that we know the mail is going to
; contain the address which we should dma to
dma_dram_to_cmbl:
call wait_for_cpu_mbox
lrs $ac0.m, @CMBL
andi $ac1.m, #0x7fff
; Directly do dma; writing the length kicks it off
sr @DSMAH, $ac1.m
sr @DSMAL, $ac0.m
si @DSPA, #DRAM_BASE
si @DSCR, #(CR_TO_CPU|CR_DRAM)
si @DSBL, #DUMP_SIZE
; Waits for previous DMA to complete by watching a bit in DSCR.
wait_dma:
lrs $ac1.m, @DSCR
andcf $ac1.m, #0x0004
jlz wait_dma
ret
/********************************/
; DMA implementation which does not write to dram
; We take advantage of the fact that we know the mail is going to
; contain the address which we should dma to
dma_dram_to_cmbl:
call wait_for_cpu_mbox
lrs $ac0.m, @CMBL
andi $ac1.m, #0x7fff
; Directly do dma; writing the length kicks it off
sr @DSMAH, $ac1.m
sr @DSMAL, $ac0.m
si @DSPA, #DRAM_BASE
si @DSCR, #(CR_TO_CPU|CR_DRAM)
si @DSBL, #DUMP_SIZE
; Waits for previous DMA to complete by watching a bit in DSCR.
wait_dma:
lrs $ac1.m, @DSCR
andcf $ac1.m, #0x0004
jlz wait_dma
ret
/********************************/
/** MAILBOX **/
/********************************/
; Waits for a mail to arrive in the DSP in-mailbox.
wait_for_dsp_mbox:
lrs $ac1.m, @DMBH
andcf $ac1.m, #0x8000
jlz wait_for_dsp_mbox
ret
; Waits for the CPU to grab a mail that we just sent from the DSP.
wait_for_cpu_mbox:
lrs $ac1.m, @CMBH
andcf $ac1.m, #0x8000
jlnz wait_for_cpu_mbox
ret
/********************************/
; Waits for a mail to arrive in the DSP in-mailbox.
wait_for_dsp_mbox:
lrs $ac1.m, @DMBH
andcf $ac1.m, #0x8000
jlz wait_for_dsp_mbox
ret
; Waits for the CPU to grab a mail that we just sent from the DSP.
wait_for_cpu_mbox:
lrs $ac1.m, @CMBH
andcf $ac1.m, #0x8000
jlnz wait_for_cpu_mbox
ret
/********************************/
/** EXCEPTION HANDLERS **/
/********************************/
; ...zey do nutzing!
exception0:
rti
exception1:
rti
exception2:
rti
exception3:
rti
exception4:
rti
exception5:
rti
exception6:
rti
exception7:
rti
/********************************/
; ...zey do nutzing!
exception0:
rti
exception1:
rti
exception2:
rti
exception3:
rti
exception4:
rti
exception5:
rti
exception6:
rti
exception7:
rti

View File

@ -70,6 +70,7 @@ IUCode* UCodeFactory(u32 _CRC, CMailHandler& _rMailHandler)
return new CUCode_Zelda(_rMailHandler, _CRC);
// WII CRCs
case 0x2ea36ce6: // Wii THP demo
case 0xb7eb9a9c: // Wii Pikmin - PAL
case 0xeaeb38cc: // Wii Pikmin 2 - PAL
case 0x6c3f6f94: // zelda - PAL