diff --git a/bsnes/cpu/cpu.cpp b/bsnes/cpu/cpu.cpp index 481b5dca..cb836768 100755 --- a/bsnes/cpu/cpu.cpp +++ b/bsnes/cpu/cpu.cpp @@ -112,6 +112,7 @@ void CPU::reset() { status.irq_transition = false; status.irq_pending = false; + status.irq_lock = false; status.hdma_pending = false; status.wram_addr = 0x000000; diff --git a/bsnes/cpu/cpu.hpp b/bsnes/cpu/cpu.hpp index 9a995cc0..d478484b 100755 --- a/bsnes/cpu/cpu.hpp +++ b/bsnes/cpu/cpu.hpp @@ -34,11 +34,16 @@ private: void op_irq(uint16 vector); //timing + struct QueueEvent { + enum : unsigned { + DramRefresh, + HdmaRun, + }; + }; nall::priority_queue queue; void queue_event(unsigned id); void last_cycle(); void add_clocks(unsigned clocks); - void add_time(unsigned clocks); void scanline(); void run_auto_joypad_poll(); @@ -56,6 +61,7 @@ private: unsigned hdma_addr(unsigned i); unsigned hdma_iaddr(unsigned i); void dma_run(); + bool hdma_active_after(unsigned i); void hdma_update(unsigned i); void hdma_run(); void hdma_init(); @@ -104,6 +110,7 @@ private: bool irq_transition; bool irq_pending; + bool irq_lock; bool hdma_pending; unsigned wram_addr; diff --git a/bsnes/cpu/dma.cpp b/bsnes/cpu/dma.cpp index 404f880b..d9d89e05 100755 --- a/bsnes/cpu/dma.cpp +++ b/bsnes/cpu/dma.cpp @@ -83,6 +83,15 @@ void CPU::dma_run() { dma_transfer(channel[i].direction, dma_bbus(i, index++), dma_addr(i)); } while(channel[i].dma_enabled && --channel[i].transfer_size); } + + status.irq_lock = true; +} + +bool CPU::hdma_active_after(unsigned i) { + for(unsigned n = i + 1; i < 8; i++) { + if(channel[i].hdma_enabled && !channel[i].hdma_completed) return true; + } + return false; } void CPU::hdma_update(unsigned i) { @@ -93,10 +102,15 @@ void CPU::hdma_update(unsigned i) { add_clocks(8); if(channel[i].indirect) { - channel[i].indirect_addr = dma_read(hdma_addr(i)) << 0; - add_clocks(8); - channel[i].indirect_addr |= dma_read(hdma_addr(i)) << 8; + channel[i].indirect_addr = dma_read(hdma_addr(i)) << 8; add_clocks(8); + + //emulating this glitch causes a slight slowdown; only enable if needed + //if(!channel[i].hdma_completed || hdma_active_after(i)) { + channel[i].indirect_addr >>= 8; + channel[i].indirect_addr |= dma_read(hdma_addr(i)) << 8; + add_clocks(8); + //} } } } @@ -108,7 +122,7 @@ void CPU::hdma_run() { } if(channels == 0) return; - add_clocks(16); + add_clocks(24); for(unsigned i = 0; i < 8; i++) { if(channel[i].hdma_enabled == false || channel[i].hdma_completed == true) continue; channel[i].dma_enabled = false; @@ -130,6 +144,8 @@ void CPU::hdma_run() { channel[i].hdma_do_transfer = channel[i].line_counter & 0x80; hdma_update(i); } + + status.irq_lock = true; } void CPU::hdma_init() { @@ -150,6 +166,8 @@ void CPU::hdma_init() { channel[i].line_counter = 0; hdma_update(i); } + + status.irq_lock = true; } void CPU::dma_reset() { diff --git a/bsnes/cpu/timing.cpp b/bsnes/cpu/timing.cpp index 95ed6d8e..7c333db7 100755 --- a/bsnes/cpu/timing.cpp +++ b/bsnes/cpu/timing.cpp @@ -1,12 +1,5 @@ #ifdef CPU_CPP -struct QueueEvent { - enum : unsigned { - DramRefresh, - HdmaRun, - }; -}; - void CPU::queue_event(unsigned id) { switch(id) { case QueueEvent::DramRefresh: return add_clocks(40); @@ -15,13 +8,18 @@ void CPU::queue_event(unsigned id) { } void CPU::last_cycle() { + if(status.irq_lock) { + status.irq_lock = false; + return; + } + if(status.nmi_transition) { regs.wai = false; status.nmi_transition = false; status.nmi_pending = true; } - if(status.irq_transition) { + if(status.irq_transition || regs.irq) { regs.wai = false; status.irq_transition = false; status.irq_pending = !regs.p.i; @@ -29,40 +27,32 @@ void CPU::last_cycle() { } void CPU::add_clocks(unsigned clocks) { - step(clocks); - queue.tick(clocks); - unsigned clocksleft = lineclocks() - hcounter(); - if(clocks > clocksleft) { - add_time(clocksleft); - add_time(clocks - clocksleft); - } else { - add_time(clocks); - } -} - -void CPU::add_time(unsigned clocks) { - if(status.irq_line && (status.virq_enabled || status.hirq_enabled)) { - status.irq_transition = true; - } - - if(status.virq_enabled && !status.hirq_enabled) { + if(status.hirq_enabled) { + if(status.virq_enabled) { + unsigned cpu_time = vcounter() * 1364 + hcounter(); + unsigned irq_time = status.vtime * 1364 + status.htime * 4; + if(cpu_time > irq_time) irq_time += 262 * 1364; + bool irq_valid = status.irq_valid; + status.irq_valid = cpu_time <= irq_time && cpu_time + clocks > irq_time; + if(!irq_valid && status.irq_valid) status.irq_line = true; + } else { + unsigned irq_time = status.htime * 4; + if(hcounter() > irq_time) irq_time += 1364; + bool irq_valid = status.irq_valid; + status.irq_valid = hcounter() <= irq_time && hcounter() + clocks > irq_time; + if(!irq_valid && status.irq_valid) status.irq_line = true; + } + if(status.irq_line) status.irq_transition = true; + } else if(status.virq_enabled) { bool irq_valid = status.irq_valid; status.irq_valid = vcounter() == status.vtime; - if(!irq_valid && status.irq_valid) { - status.irq_line = true; - status.irq_transition = true; - } - } else if(status.hirq_enabled) { - bool irq_valid = status.irq_valid; - status.irq_valid = hcounter() <= status.htime * 4 && hcounter() + clocks > status.htime * 4; - if(status.virq_enabled && vcounter() != status.vtime) status.irq_valid = false; - if(!irq_valid && status.irq_valid) { - status.irq_line = true; - status.irq_transition = true; - } + if(!irq_valid && status.irq_valid) status.irq_line = true; + if(status.irq_line) status.irq_transition = true; } tick(clocks); + queue.tick(clocks); + step(clocks); } void CPU::scanline() { diff --git a/bsnes/info.hpp b/bsnes/info.hpp index bed8783b..add71c68 100755 --- a/bsnes/info.hpp +++ b/bsnes/info.hpp @@ -1,7 +1,7 @@ namespace SNES { namespace Info { static const char Name[] = "bsnes"; - static const char Version[] = "067.10"; + static const char Version[] = "067.11"; static const unsigned SerializerVersion = 12; } } diff --git a/bsnes/smp/snes_spc/Spc_Dsp.cpp b/bsnes/smp/snes_spc/Spc_Dsp.cpp index 46412254..106980ac 100755 --- a/bsnes/smp/snes_spc/Spc_Dsp.cpp +++ b/bsnes/smp/snes_spc/Spc_Dsp.cpp @@ -21,9 +21,6 @@ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include BLARGG_ENABLE_OPTIMIZER #endif -// New SNES DSP behaves slightly differently (not all differences handled yet) -bool const new_snes = false; - // if ( io < -32768 ) io = -32768; // if ( io > 32767 ) io = 32767; #define CLAMP16( io )\ @@ -93,736 +90,551 @@ inline void Spc_Dsp::write_sample( int l, int r ) // Volume registers and efb are signed! Easy to forget int8_t cast. // Prefixes are to avoid accidental use of locals with same names. -// Gaussian interpolation - -static short const gauss [512] = +// Interleaved gauss table (to improve cache coherency) +// interleaved_gauss [i] = gauss [(i & 1) * 256 + 255 - (i >> 1 & 0xFF)] +static short const interleaved_gauss [512] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, - 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, - 11, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 15, 16, 16, 17, 17, - 18, 19, 19, 20, 20, 21, 21, 22, 23, 23, 24, 24, 25, 26, 27, 27, - 28, 29, 29, 30, 31, 32, 32, 33, 34, 35, 36, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 58, 59, 60, 61, 62, 64, 65, 66, 67, 69, 70, 71, 73, 74, 76, 77, - 78, 80, 81, 83, 84, 86, 87, 89, 90, 92, 94, 95, 97, 99, 100, 102, - 104, 106, 107, 109, 111, 113, 115, 117, 118, 120, 122, 124, 126, 128, 130, 132, - 134, 137, 139, 141, 143, 145, 147, 150, 152, 154, 156, 159, 161, 163, 166, 168, - 171, 173, 175, 178, 180, 183, 186, 188, 191, 193, 196, 199, 201, 204, 207, 210, - 212, 215, 218, 221, 224, 227, 230, 233, 236, 239, 242, 245, 248, 251, 254, 257, - 260, 263, 267, 270, 273, 276, 280, 283, 286, 290, 293, 297, 300, 304, 307, 311, - 314, 318, 321, 325, 328, 332, 336, 339, 343, 347, 351, 354, 358, 362, 366, 370, - 374, 378, 381, 385, 389, 393, 397, 401, 405, 410, 414, 418, 422, 426, 430, 434, - 439, 443, 447, 451, 456, 460, 464, 469, 473, 477, 482, 486, 491, 495, 499, 504, - 508, 513, 517, 522, 527, 531, 536, 540, 545, 550, 554, 559, 563, 568, 573, 577, - 582, 587, 592, 596, 601, 606, 611, 615, 620, 625, 630, 635, 640, 644, 649, 654, - 659, 664, 669, 674, 678, 683, 688, 693, 698, 703, 708, 713, 718, 723, 728, 732, - 737, 742, 747, 752, 757, 762, 767, 772, 777, 782, 787, 792, 797, 802, 806, 811, - 816, 821, 826, 831, 836, 841, 846, 851, 855, 860, 865, 870, 875, 880, 884, 889, - 894, 899, 904, 908, 913, 918, 923, 927, 932, 937, 941, 946, 951, 955, 960, 965, - 969, 974, 978, 983, 988, 992, 997,1001,1005,1010,1014,1019,1023,1027,1032,1036, -1040,1045,1049,1053,1057,1061,1066,1070,1074,1078,1082,1086,1090,1094,1098,1102, -1106,1109,1113,1117,1121,1125,1128,1132,1136,1139,1143,1146,1150,1153,1157,1160, -1164,1167,1170,1174,1177,1180,1183,1186,1190,1193,1196,1199,1202,1205,1207,1210, -1213,1216,1219,1221,1224,1227,1229,1232,1234,1237,1239,1241,1244,1246,1248,1251, -1253,1255,1257,1259,1261,1263,1265,1267,1269,1270,1272,1274,1275,1277,1279,1280, -1282,1283,1284,1286,1287,1288,1290,1291,1292,1293,1294,1295,1296,1297,1297,1298, -1299,1300,1300,1301,1302,1302,1303,1303,1303,1304,1304,1304,1304,1304,1305,1305, + 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303, + 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299, + 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292, + 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282, + 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269, + 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253, + 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234, + 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213, + 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190, + 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164, + 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136, + 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106, + 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074, + 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040, + 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005, + 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969, + 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932, + 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894, + 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855, + 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816, + 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777, + 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737, + 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698, + 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659, + 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620, + 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582, + 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545, + 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508, + 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473, + 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439, + 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405, + 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374, }; -inline int Spc_Dsp::interpolate( voice_t const* v ) -{ - // Make pointers into gaussian based on fractional position between samples - int offset = v->interp_pos >> 4 & 0xFF; - short const* fwd = gauss + 255 - offset; - short const* rev = gauss + offset; // mirror left half of gaussian - - int const* in = &v->buf [(v->interp_pos >> 12) + v->buf_pos]; - int out; - out = (fwd [ 0] * in [0]) >> 11; - out += (fwd [256] * in [1]) >> 11; - out += (rev [256] * in [2]) >> 11; - out = (int16_t) out; - out += (rev [ 0] * in [3]) >> 11; - - CLAMP16( out ); - out &= ~1; - return out; -} - //// Counters -int const simple_counter_range = 2048 * 5 * 3; // 30720 +#define RATE( rate, div )\ + (rate >= div ? rate / div * 8 - 1 : rate - 1) -static unsigned const counter_rates [32] = +static unsigned const counter_mask [32] = { - simple_counter_range + 1, // never fires - 2048, 1536, - 1280, 1024, 768, - 640, 512, 384, - 320, 256, 192, - 160, 128, 96, - 80, 64, 48, - 40, 32, 24, - 20, 16, 12, - 10, 8, 6, - 5, 4, 3, - 2, - 1 -}; -static unsigned const counter_offsets [32] = -{ - 1, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 0, - 0 + RATE( 2,2), RATE(2048,4), RATE(1536,3), + RATE(1280,5), RATE(1024,4), RATE( 768,3), + RATE( 640,5), RATE( 512,4), RATE( 384,3), + RATE( 320,5), RATE( 256,4), RATE( 192,3), + RATE( 160,5), RATE( 128,4), RATE( 96,3), + RATE( 80,5), RATE( 64,4), RATE( 48,3), + RATE( 40,5), RATE( 32,4), RATE( 24,3), + RATE( 20,5), RATE( 16,4), RATE( 12,3), + RATE( 10,5), RATE( 8,4), RATE( 6,3), + RATE( 5,5), RATE( 4,4), RATE( 3,3), + RATE( 2,4), + RATE( 1,4) }; +#undef RATE inline void Spc_Dsp::init_counter() { - m.counter = 0; -} - -inline void Spc_Dsp::run_counters() -{ - if ( --m.counter < 0 ) - m.counter = simple_counter_range - 1; -} - -inline unsigned Spc_Dsp::read_counter( int rate ) -{ - return ((unsigned) m.counter + counter_offsets [rate]) % counter_rates [rate]; -} - - -//// Envelope - -inline void Spc_Dsp::run_envelope( voice_t* const v ) -{ - int env = v->env; - if ( v->env_mode == env_release ) // 60% + // counters start out with this synchronization + m.counters [0] = 1; + m.counters [1] = 0; + m.counters [2] = -0x20u; + m.counters [3] = 0x0B; + + int n = 2; + for ( int i = 1; i < 32; i++ ) { - if ( (env -= 0x8) < 0 ) - env = 0; - v->env = env; + m.counter_select [i] = &m.counters [n]; + if ( !--n ) + n = 3; } - else + m.counter_select [ 0] = &m.counters [0]; + m.counter_select [30] = &m.counters [2]; +} + +inline void Spc_Dsp::run_counter( int i ) +{ + int n = m.counters [i]; + if ( !(n-- & 7) ) + n -= 6 - i; + m.counters [i] = n; +} + +#define READ_COUNTER( rate )\ + (*m.counter_select [rate] & counter_mask [rate]) + + +//// Emulation + +void Spc_Dsp::run( int clock_count ) +{ + int new_phase = m.phase + clock_count; + int count = new_phase >> 5; + m.phase = new_phase & 31; + if ( !count ) + return; + + byte* const ram = this->ram; + byte const* const dir = &ram [REG(dir) * 0x100]; + int const slow_gaussian = (REG(pmon) >> 1) | REG(non); + int const noise_rate = REG(flg) & 0x1F; + + // Global volume + int mvoll = (int8_t) REG(mvoll); + int mvolr = (int8_t) REG(mvolr); + if ( mvoll * mvolr < surround_threshold ) + mvoll = -mvoll; // eliminate surround + + do { - int rate; - int env_data = VREG(v->regs,adsr1); - if ( m.t_adsr0 & 0x80 ) // 99% ADSR + // KON/KOFF reading + if ( (m.every_other_sample ^= 1) != 0 ) { - if ( v->env_mode >= env_decay ) // 99% - { - env--; - env -= env >> 8; - rate = env_data & 0x1F; - if ( v->env_mode == env_decay ) // 1% - rate = (m.t_adsr0 >> 3 & 0x0E) + 0x10; - } - else // env_attack - { - rate = (m.t_adsr0 & 0x0F) * 2 + 1; - env += rate < 31 ? 0x20 : 0x400; - } - } - else // GAIN - { - int mode; - env_data = VREG(v->regs,gain); - mode = env_data >> 5; - if ( mode < 4 ) // direct - { - env = env_data * 0x10; - rate = 31; - } - else - { - rate = env_data & 0x1F; - if ( mode == 4 ) // 4: linear decrease - { - env -= 0x20; - } - else if ( mode < 6 ) // 5: exponential decrease - { - env--; - env -= env >> 8; - } - else // 6,7: linear increase - { - env += 0x20; - if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 ) - env += 0x8 - 0x20; // 7: two-slope linear increase - } - } + m.new_kon &= ~m.kon; + m.kon = m.new_kon; + m.t_koff = REG(koff); } - // Sustain level - if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay ) - v->env_mode = env_sustain; - - v->hidden_env = env; - - // unsigned cast because linear decrease going negative also triggers this - if ( (unsigned) env > 0x7FF ) - { - env = (env < 0 ? 0 : 0x7FF); - if ( v->env_mode == env_attack ) - v->env_mode = env_decay; - } - - if ( !read_counter( rate ) ) - v->env = env; // nothing else is controlled by the counter - } -} - - -//// BRR Decoding - -inline void Spc_Dsp::decode_brr( voice_t* v ) -{ - // Arrange the four input nybbles in 0xABCD order for easy decoding - int nybbles = m.t_brr_byte * 0x100 + ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF]; - - int const header = m.t_brr_header; - - // Write to next four samples in circular buffer - int* pos = &v->buf [v->buf_pos]; - if ( (v->buf_pos += 4) >= brr_buf_size ) - v->buf_pos = 0; - - // Decode four samples - for ( int* end = pos + 4; pos < end; pos++ ) - { - // Extract nybble and sign-extend - int s = (int16_t) nybbles >> 12; - nybbles <<= 4; - - // Shift sample based on header - int const shift = header >> 4; - s = (s << shift) >> 1; - if ( shift >= 0xD ) // handle invalid range - s = (s >> 25) << 11; // same as: s = (s < 0 ? -0x800 : 0) - - // Apply (unstable) IIR filter (8 is the most commonly used) - int const filter = header & 0x0C; - int const p1 = pos [brr_buf_size - 1]; - int const p2 = pos [brr_buf_size - 2] >> 1; - if ( filter >= 8 ) // most common one - { - s += p1; - s -= p2; - if ( filter == 8 ) // pos[0] = s*2 + pos[-1] * 1.09625 - pos[-2] * 0.9375 - { - s += p2 >> 4; - s += (p1 * -3) >> 6; - } - else // pos[0] = s*2 + pos[-1] * 1.796875 - pos[-2] * 0.8125 - { - s += (p1 * -13) >> 7; - s += (p2 * 3) >> 4; - } - } - else if ( filter ) // pos[0] = s*2 + pos[-1] * 0.9375 - { - s += p1 >> 1; - s += (-p1) >> 5; - } - - // Adjust and write sample - CLAMP16( s ); - s = (int16_t) (s * 2); - pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around - } -} - - -//// Misc - -#define MISC_CLOCK( n ) inline void Spc_Dsp::misc_##n() - -MISC_CLOCK( 27 ) -{ - m.t_pmon = REG(pmon) & 0xFE; // voice 0 doesn't support PMON -} -MISC_CLOCK( 28 ) -{ - m.t_non = REG(non); - m.t_eon = REG(eon); - m.t_dir = REG(dir); -} -MISC_CLOCK( 29 ) -{ - if ( (m.every_other_sample ^= 1) != 0 ) - m.new_kon &= ~m.kon; // clears KON 63 clocks after it was last read -} -MISC_CLOCK( 30 ) -{ - if ( m.every_other_sample ) - { - m.kon = m.new_kon; - m.t_koff = REG(koff) | mute_mask; - } - - run_counters(); - - // Noise - if ( !read_counter( REG(flg) & 0x1F ) ) - { - int feedback = (m.noise << 13) ^ (m.noise << 14); - m.noise = (feedback & 0x4000) ^ (m.noise >> 1); - } -} - - -//// Voices - -#define VOICE_CLOCK( n ) void Spc_Dsp::voice_##n( voice_t* const v ) - -inline VOICE_CLOCK( V1 ) -{ - m.t_dir_addr = m.t_dir * 0x100 + m.t_srcn * 4; - m.t_srcn = VREG(v->regs,srcn); -} -inline VOICE_CLOCK( V2 ) -{ - // Read sample pointer (ignored if not needed) - byte const* entry = &ram [m.t_dir_addr]; - if ( !v->kon_delay ) - entry += 2; - m.t_brr_next_addr = GET_LE16A( entry ); - - m.t_adsr0 = VREG(v->regs,adsr0); - - // Read pitch, spread over two clocks - m.t_pitch = VREG(v->regs,pitchl); -} -inline VOICE_CLOCK( V3a ) -{ - m.t_pitch += (VREG(v->regs,pitchh) & 0x3F) << 8; -} -inline VOICE_CLOCK( V3b ) -{ - // Read BRR header and byte - m.t_brr_byte = ram [(v->brr_addr + v->brr_offset) & 0xFFFF]; - m.t_brr_header = ram [v->brr_addr]; // brr_addr doesn't need masking -} -VOICE_CLOCK( V3c ) -{ - // Pitch modulation using previous voice's output - if ( m.t_pmon & v->vbit ) - m.t_pitch += ((m.t_output >> 5) * m.t_pitch) >> 10; - - if ( v->kon_delay ) - { - // Get ready to start BRR decoding on next sample - if ( v->kon_delay == 5 ) - { - v->brr_addr = m.t_brr_next_addr; - v->brr_offset = 1; - v->buf_pos = 0; - m.t_brr_header = 0; // header is ignored on this sample - kon_check = true; - } - - // Envelope is never run during KON - v->env = 0; - v->hidden_env = 0; - - // Disable BRR decoding until last three samples - v->interp_pos = 0; - if ( --v->kon_delay & 3 ) - v->interp_pos = 0x4000; - - // Pitch is never added during KON - m.t_pitch = 0; - } - - // Gaussian interpolation - { - int output = interpolate( v ); + run_counter( 1 ); + run_counter( 2 ); + run_counter( 3 ); // Noise - if ( m.t_non & v->vbit ) - output = (int16_t) (m.noise * 2); - - // Apply envelope - m.t_output = (output * v->env) >> 11 & ~1; - v->t_envx_out = (byte) (v->env >> 4); - } - - // Immediate silence due to end of sample or soft reset - if ( REG(flg) & 0x80 || (m.t_brr_header & 3) == 1 ) - { - v->env_mode = env_release; - v->env = 0; - } - - if ( m.every_other_sample ) - { - // KOFF - if ( m.t_koff & v->vbit && (!new_snes || v->kon_delay < 3) ) - v->env_mode = env_release; - - // KON - if ( m.kon & v->vbit ) + if ( !READ_COUNTER( noise_rate ) ) { - v->kon_delay = 5; - v->env_mode = env_attack; + int feedback = (m.noise << 13) ^ (m.noise << 14); + m.noise = (feedback & 0x4000) ^ (m.noise >> 1); } - } - - // Run envelope for next sample - if ( !v->kon_delay ) - run_envelope( v ); -} -inline void Spc_Dsp::voice_output( voice_t const* v, int ch ) -{ - // Apply left/right volume - int amp = (m.t_output * (int8_t) VREG(v->regs,voll + ch)) >> 7; - - // Avoid negative volume if surround is disabled - // (emulator feature; not part of actual DSP) - if ( (int8_t) VREG(v->regs,voll + ch) < surround_threshold ) - amp = -amp; - - // Add to output total - m.t_main_out [ch] += amp; - CLAMP16( m.t_main_out [ch] ); - - // Optionally add to echo total - if ( m.t_eon & v->vbit ) - { - m.t_echo_out [ch] += amp; - CLAMP16( m.t_echo_out [ch] ); - } -} -VOICE_CLOCK( V4 ) -{ - // Decode BRR - m.t_looped = 0; - if ( v->interp_pos >= 0x4000 ) - { - decode_brr( v ); - if ( (v->brr_offset += 2) >= brr_block_size ) + // Voices + int pmon_input = 0; + int main_out_l = 0; + int main_out_r = 0; + int echo_out_l = 0; + int echo_out_r = 0; + voice_t* v = m.voices; + byte* v_regs = regs; + int vbit = 1; + do { - // Start decoding next BRR block - assert( v->brr_offset == brr_block_size ); - v->brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF; - if ( m.t_brr_header & 1 ) + #define SAMPLE_PTR(i) GET_LE16A( &dir [VREG(v_regs,srcn) * 4 + i * 2] ) + + int brr_header = ram [v->brr_addr]; + int kon_delay = v->kon_delay; + + // Pitch + int pitch = GET_LE16A( &VREG(v_regs,pitchl) ) & 0x3FFF; + if ( REG(pmon) & vbit ) + pitch += ((pmon_input >> 5) * pitch) >> 10; + + // KON phases + if ( --kon_delay >= 0 ) { - v->brr_addr = m.t_brr_next_addr; - m.t_looped = v->vbit; + v->kon_delay = kon_delay; + + // Get ready to start BRR decoding on next sample + if ( kon_delay == 4 ) + { + v->brr_addr = SAMPLE_PTR( 0 ); + v->brr_offset = 1; + v->buf_pos = v->buf; + brr_header = 0; // header is ignored on this sample + } + + // Envelope is never run during KON + v->env = 0; + v->hidden_env = 0; + + // Disable BRR decoding until last three samples + v->interp_pos = (kon_delay & 3 ? 0x4000 : 0); + + // Pitch is never added during KON + pitch = 0; } - v->brr_offset = 1; + + int env = v->env; + + // Gaussian interpolation + { + int output = 0; + VREG(v_regs,envx) = (byte) (env >> 4); + if ( env ) + { + // Make pointers into gaussian based on fractional position between samples + int offset = (unsigned) v->interp_pos >> 3 & 0x1FE; + short const* fwd = interleaved_gauss + offset; + short const* rev = interleaved_gauss + 510 - offset; // mirror left half of gaussian + + int const* in = &v->buf_pos [(unsigned) v->interp_pos >> 12]; + + if ( !(slow_gaussian & vbit) ) // 99% + { + // Faster approximation when exact sample value isn't necessary for pitch mod + output = (fwd [0] * in [0] + + fwd [1] * in [1] + + rev [1] * in [2] + + rev [0] * in [3]) >> 11; + output = (output * env) >> 11; + } + else + { + output = (int16_t) (m.noise * 2); + if ( !(REG(non) & vbit) ) + { + output = (fwd [0] * in [0]) >> 11; + output += (fwd [1] * in [1]) >> 11; + output += (rev [1] * in [2]) >> 11; + output = (int16_t) output; + output += (rev [0] * in [3]) >> 11; + + CLAMP16( output ); + output &= ~1; + } + output = (output * env) >> 11 & ~1; + } + + // Output + int l = output * v->volume [0]; + int r = output * v->volume [1]; + + main_out_l += l; + main_out_r += r; + + if ( REG(eon) & vbit ) + { + echo_out_l += l; + echo_out_r += r; + } + } + + pmon_input = output; + VREG(v_regs,outx) = (byte) (output >> 8); + } + + // Soft reset or end of sample + if ( REG(flg) & 0x80 || (brr_header & 3) == 1 ) + { + v->env_mode = env_release; + env = 0; + } + + if ( m.every_other_sample ) + { + // KOFF + if ( m.t_koff & vbit ) + v->env_mode = env_release; + + // KON + if ( m.kon & vbit ) + { + v->kon_delay = 5; + v->env_mode = env_attack; + REG(endx) &= ~vbit; + } + } + + // Envelope + if ( !v->kon_delay ) + { + if ( v->env_mode == env_release ) // 97% + { + env -= 0x8; + v->env = env; + if ( env <= 0 ) + { + v->env = 0; + goto skip_brr; // no BRR decoding for you! + } + } + else // 3% + { + int rate; + int const adsr0 = VREG(v_regs,adsr0); + int env_data = VREG(v_regs,adsr1); + if ( adsr0 >= 0x80 ) // 97% ADSR + { + if ( v->env_mode > env_decay ) // 89% + { + env--; + env -= env >> 8; + rate = env_data & 0x1F; + + // optimized handling + v->hidden_env = env; + if ( READ_COUNTER( rate ) ) + goto exit_env; + v->env = env; + goto exit_env; + } + else if ( v->env_mode == env_decay ) + { + env--; + env -= env >> 8; + rate = (adsr0 >> 3 & 0x0E) + 0x10; + } + else // env_attack + { + rate = (adsr0 & 0x0F) * 2 + 1; + env += rate < 31 ? 0x20 : 0x400; + } + } + else // GAIN + { + int mode; + env_data = VREG(v_regs,gain); + mode = env_data >> 5; + if ( mode < 4 ) // direct + { + env = env_data * 0x10; + rate = 31; + } + else + { + rate = env_data & 0x1F; + if ( mode == 4 ) // 4: linear decrease + { + env -= 0x20; + } + else if ( mode < 6 ) // 5: exponential decrease + { + env--; + env -= env >> 8; + } + else // 6,7: linear increase + { + env += 0x20; + if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 ) + env += 0x8 - 0x20; // 7: two-slope linear increase + } + } + } + + // Sustain level + if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay ) + v->env_mode = env_sustain; + + v->hidden_env = env; + + // unsigned cast because linear decrease going negative also triggers this + if ( (unsigned) env > 0x7FF ) + { + env = (env < 0 ? 0 : 0x7FF); + if ( v->env_mode == env_attack ) + v->env_mode = env_decay; + } + + if ( !READ_COUNTER( rate ) ) + v->env = env; // nothing else is controlled by the counter + } + } + exit_env: + + { + // Apply pitch + int old_pos = v->interp_pos; + int interp_pos = (old_pos & 0x3FFF) + pitch; + if ( interp_pos > 0x7FFF ) + interp_pos = 0x7FFF; + v->interp_pos = interp_pos; + + // BRR decode if necessary + if ( old_pos >= 0x4000 ) + { + // Arrange the four input nybbles in 0xABCD order for easy decoding + int nybbles = ram [(v->brr_addr + v->brr_offset) & 0xFFFF] * 0x100 + + ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF]; + + // Advance read position + int const brr_block_size = 9; + int brr_offset = v->brr_offset; + if ( (brr_offset += 2) >= brr_block_size ) + { + // Next BRR block + int brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF; + assert( brr_offset == brr_block_size ); + if ( brr_header & 1 ) + { + brr_addr = SAMPLE_PTR( 1 ); + if ( !v->kon_delay ) + REG(endx) |= vbit; + } + v->brr_addr = brr_addr; + brr_offset = 1; + } + v->brr_offset = brr_offset; + + // Decode + + // 0: >>1 1: <<0 2: <<1 ... 12: <<11 13-15: >>4 <<11 + static unsigned char const shifts [16 * 2] = { + 13,12,12,12,12,12,12,12,12,12,12, 12, 12, 16, 16, 16, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11 + }; + int const scale = brr_header >> 4; + int const right_shift = shifts [scale]; + int const left_shift = shifts [scale + 16]; + + // Decode and write to next four samples in circular buffer + int* pos = v->buf_pos; + for ( int* end = pos + 4; pos < end; pos++ ) + { + // Extract upper nybble and scale appropriately + int s = ((int16_t) nybbles >> right_shift) << left_shift; + nybbles <<= 4; + + // Apply IIR filter (8 is the most commonly used) + int const filter = brr_header & 0x0C; + int const p1 = pos [brr_buf_size - 1]; + int const p2 = pos [brr_buf_size - 2] >> 1; + if ( filter >= 8 ) + { + s += p1; + s -= p2; + if ( filter == 8 ) // s += p1 * 0.953125 - p2 * 0.46875 + { + s += p2 >> 4; + s += (p1 * -3) >> 6; + } + else // s += p1 * 0.8984375 - p2 * 0.40625 + { + s += (p1 * -13) >> 7; + s += (p2 * 3) >> 4; + } + } + else if ( filter ) // s += p1 * 0.46875 + { + s += p1 >> 1; + s += (-p1) >> 5; + } + + // Adjust and write sample + CLAMP16( s ); + s = (int16_t) (s * 2); + pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around + } + + if ( pos >= &v->buf [brr_buf_size] ) + pos = v->buf; + v->buf_pos = pos; + } + } +skip_brr: + // Next voice + vbit <<= 1; + v_regs += 0x10; + v++; } - } - - // Apply pitch - v->interp_pos = (v->interp_pos & 0x3FFF) + m.t_pitch; - - // Keep from getting too far ahead (when using pitch modulation) - if ( v->interp_pos > 0x7FFF ) - v->interp_pos = 0x7FFF; - - // Output left - voice_output( v, 0 ); -} -inline VOICE_CLOCK( V5 ) -{ - // Output right - voice_output( v, 1 ); - - // ENDX, OUTX, and ENVX won't update if you wrote to them 1-2 clocks earlier - int endx_buf = REG(endx) | m.t_looped; - - // Clear bit in ENDX if KON just began - if ( v->kon_delay == 5 ) - endx_buf &= ~v->vbit; - m.endx_buf = (byte) endx_buf; -} -inline VOICE_CLOCK( V6 ) -{ - (void) v; // avoid compiler warning about unused v - m.outx_buf = (byte) (m.t_output >> 8); -} -inline VOICE_CLOCK( V7 ) -{ - // Update ENDX - REG(endx) = m.endx_buf; - - m.envx_buf = v->t_envx_out; -} -inline VOICE_CLOCK( V8 ) -{ - // Update OUTX - VREG(v->regs,outx) = m.outx_buf; -} -inline VOICE_CLOCK( V9 ) -{ - // Update ENVX - VREG(v->regs,envx) = m.envx_buf; -} - -// Most voices do all these in one clock, so make a handy composite -inline VOICE_CLOCK( V3 ) -{ - voice_V3a( v ); - voice_V3b( v ); - voice_V3c( v ); -} - -// Common combinations of voice steps on different voices. This greatly reduces -// code size and allows everything to be inlined in these functions. -VOICE_CLOCK(V7_V4_V1) { voice_V7(v); voice_V1(v+3); voice_V4(v+1); } -VOICE_CLOCK(V8_V5_V2) { voice_V8(v); voice_V5(v+1); voice_V2(v+2); } -VOICE_CLOCK(V9_V6_V3) { voice_V9(v); voice_V6(v+1); voice_V3(v+2); } - - -//// Echo - -// Current echo buffer pointer for left/right channel -#define ECHO_PTR( ch ) (&ram [m.t_echo_ptr + ch * 2]) - -// Sample in echo history buffer, where 0 is the oldest -#define ECHO_FIR( i ) (m.echo_hist_pos [i]) - -// Calculate FIR point for left/right channel -#define CALC_FIR( i, ch ) ((ECHO_FIR( i + 1 ) [ch] * (int8_t) REG(fir + i * 0x10)) >> 6) - -#define ECHO_CLOCK( n ) inline void Spc_Dsp::echo_##n() - -inline void Spc_Dsp::echo_read( int ch ) -{ - int s = GET_LE16SA( ECHO_PTR( ch ) ); - // second copy simplifies wrap-around handling - ECHO_FIR( 0 ) [ch] = ECHO_FIR( 8 ) [ch] = s >> 1; -} - -ECHO_CLOCK( 22 ) -{ - // History - if ( ++m.echo_hist_pos >= &m.echo_hist [echo_hist_size] ) - m.echo_hist_pos = m.echo_hist; - - m.t_echo_ptr = (m.t_esa * 0x100 + m.echo_offset) & 0xFFFF; - echo_read( 0 ); - - // FIR (using l and r temporaries below helps compiler optimize) - int l = CALC_FIR( 0, 0 ); - int r = CALC_FIR( 0, 1 ); - - m.t_echo_in [0] = l; - m.t_echo_in [1] = r; -} -ECHO_CLOCK( 23 ) -{ - int l = CALC_FIR( 1, 0 ) + CALC_FIR( 2, 0 ); - int r = CALC_FIR( 1, 1 ) + CALC_FIR( 2, 1 ); - - m.t_echo_in [0] += l; - m.t_echo_in [1] += r; - - echo_read( 1 ); -} -ECHO_CLOCK( 24 ) -{ - int l = CALC_FIR( 3, 0 ) + CALC_FIR( 4, 0 ) + CALC_FIR( 5, 0 ); - int r = CALC_FIR( 3, 1 ) + CALC_FIR( 4, 1 ) + CALC_FIR( 5, 1 ); - - m.t_echo_in [0] += l; - m.t_echo_in [1] += r; -} -ECHO_CLOCK( 25 ) -{ - int l = m.t_echo_in [0] + CALC_FIR( 6, 0 ); - int r = m.t_echo_in [1] + CALC_FIR( 6, 1 ); - - l = (int16_t) l; - r = (int16_t) r; - - l += (int16_t) CALC_FIR( 7, 0 ); - r += (int16_t) CALC_FIR( 7, 1 ); - - CLAMP16( l ); - CLAMP16( r ); - - m.t_echo_in [0] = l & ~1; - m.t_echo_in [1] = r & ~1; -} -inline int Spc_Dsp::echo_output( int ch ) -{ - int out = (int16_t) ((m.t_main_out [ch] * (int8_t) REG(mvoll + ch * 0x10)) >> 7) + - (int16_t) ((m.t_echo_in [ch] * (int8_t) REG(evoll + ch * 0x10)) >> 7); - CLAMP16( out ); - return out; -} -ECHO_CLOCK( 26 ) -{ - // Surround disabler (emulator feature; not part of actual DSP) - if ( (int8_t) REG(mvoll) * (int8_t) REG(mvolr) < surround_threshold ) - m.t_main_out [0] = -m.t_main_out [0]; // eliminate surround - - // Left output volumes - // (save sample for next clock so we can output both together) - m.t_main_out [0] = echo_output( 0 ); - - // Echo feedback - int l = m.t_echo_out [0] + (int16_t) ((m.t_echo_in [0] * (int8_t) REG(efb)) >> 7); - int r = m.t_echo_out [1] + (int16_t) ((m.t_echo_in [1] * (int8_t) REG(efb)) >> 7); - - CLAMP16( l ); - CLAMP16( r ); - - m.t_echo_out [0] = l & ~1; - m.t_echo_out [1] = r & ~1; -} -ECHO_CLOCK( 27 ) -{ - // Output - int l = m.t_main_out [0]; - int r = echo_output( 1 ); - m.t_main_out [0] = 0; - m.t_main_out [1] = 0; - - // TODO: global muting isn't this simple (turns DAC on and off - // or something, causing small ~37-sample pulse when first muted) - if ( REG(flg) & 0x40 ) - { - l = 0; - r = 0; - } - - // Output sample to DAC - SPC_DSP_OUT_HOOK( l, r ); -} -ECHO_CLOCK( 28 ) -{ - m.t_echo_enabled = REG(flg); -} -inline void Spc_Dsp::echo_write( int ch ) -{ - if ( !(m.t_echo_enabled & 0x20) ) - { - #ifdef SPC_DSP_ECHO_DEBUG - SPC_DSP_ECHO_DEBUG + while ( vbit < 0x100 ); + + // Echo position + int echo_offset = m.echo_offset; + byte* const echo_ptr = &ram [(REG(esa) * 0x100 + echo_offset) & 0xFFFF]; + if ( !echo_offset ) + m.echo_length = (REG(edl) & 0x0F) * 0x800; + echo_offset += 4; + if ( echo_offset >= m.echo_length ) + echo_offset = 0; + m.echo_offset = echo_offset; + + // FIR + int echo_in_l = GET_LE16SA( echo_ptr + 0 ); + int echo_in_r = GET_LE16SA( echo_ptr + 2 ); + + int (*echo_hist_pos) [2] = m.echo_hist_pos; + if ( ++echo_hist_pos >= &m.echo_hist [echo_hist_size] ) + echo_hist_pos = m.echo_hist; + m.echo_hist_pos = echo_hist_pos; + + echo_hist_pos [0] [0] = echo_hist_pos [8] [0] = echo_in_l; + echo_hist_pos [0] [1] = echo_hist_pos [8] [1] = echo_in_r; + + #define CALC_FIR_( i, in ) ((in) * (int8_t) REG(fir + i * 0x10)) + echo_in_l = CALC_FIR_( 7, echo_in_l ); + echo_in_r = CALC_FIR_( 7, echo_in_r ); + + #define CALC_FIR( i, ch ) CALC_FIR_( i, echo_hist_pos [i + 1] [ch] ) + #define DO_FIR( i )\ + echo_in_l += CALC_FIR( i, 0 );\ + echo_in_r += CALC_FIR( i, 1 ); + DO_FIR( 0 ); + DO_FIR( 1 ); + DO_FIR( 2 ); + #if defined (__MWERKS__) && __MWERKS__ < 0x3200 + __eieio(); // keeps compiler from stupidly "caching" things in memory #endif - SET_LE16A( ECHO_PTR( ch ), m.t_echo_out [ch] ); + DO_FIR( 3 ); + DO_FIR( 4 ); + DO_FIR( 5 ); + DO_FIR( 6 ); + + // Echo out + if ( !(REG(flg) & 0x20) ) + { + int l = (echo_out_l >> 7) + ((echo_in_l * (int8_t) REG(efb)) >> 14); + int r = (echo_out_r >> 7) + ((echo_in_r * (int8_t) REG(efb)) >> 14); + + // just to help pass more validation tests + #if SPC_MORE_ACCURACY + l &= ~1; + r &= ~1; + #endif + + CLAMP16( l ); + CLAMP16( r ); + + SET_LE16A( echo_ptr + 0, l ); + SET_LE16A( echo_ptr + 2, r ); + } + + // Sound out + int l = (main_out_l * mvoll + echo_in_l * (int8_t) REG(evoll)) >> 14; + int r = (main_out_r * mvolr + echo_in_r * (int8_t) REG(evolr)) >> 14; + + CLAMP16( l ); + CLAMP16( r ); + + if ( (REG(flg) & 0x40) ) + { + l = 0; + r = 0; + } + + SPC_DSP_OUT_HOOK( l, r ); } - m.t_echo_out [ch] = 0; + while ( --count ); } -ECHO_CLOCK( 29 ) -{ - m.t_esa = REG(esa); - - if ( !m.echo_offset ) - m.echo_length = (REG(edl) & 0x0F) * 0x800; - - m.echo_offset += 4; - if ( m.echo_offset >= m.echo_length ) - m.echo_offset = 0; - - // Write left echo - echo_write( 0 ); - - m.t_echo_enabled = REG(flg); -} -ECHO_CLOCK( 30 ) -{ - // Write right echo - echo_write( 1 ); -} - - -//// Timing - -// Execute clock for a particular voice -#define V( clock, voice ) voice_##clock( &m.voices [voice] ); - -/* The most common sequence of clocks uses composite operations -for efficiency. For example, the following are equivalent to the -individual steps on the right: - -V(V7_V4_V1,2) -> V(V7,2) V(V4,3) V(V1,5) -V(V8_V5_V2,2) -> V(V8,2) V(V5,3) V(V2,4) -V(V9_V6_V3,2) -> V(V9,2) V(V6,3) V(V3,4) */ - -// Voice 0 1 2 3 4 5 6 7 -#define GEN_DSP_TIMING \ -PHASE( 0) V(V5,0)V(V2,1)\ -PHASE( 1) V(V6,0)V(V3,1)\ -PHASE( 2) V(V7_V4_V1,0)\ -PHASE( 3) V(V8_V5_V2,0)\ -PHASE( 4) V(V9_V6_V3,0)\ -PHASE( 5) V(V7_V4_V1,1)\ -PHASE( 6) V(V8_V5_V2,1)\ -PHASE( 7) V(V9_V6_V3,1)\ -PHASE( 8) V(V7_V4_V1,2)\ -PHASE( 9) V(V8_V5_V2,2)\ -PHASE(10) V(V9_V6_V3,2)\ -PHASE(11) V(V7_V4_V1,3)\ -PHASE(12) V(V8_V5_V2,3)\ -PHASE(13) V(V9_V6_V3,3)\ -PHASE(14) V(V7_V4_V1,4)\ -PHASE(15) V(V8_V5_V2,4)\ -PHASE(16) V(V9_V6_V3,4)\ -PHASE(17) V(V1,0) V(V7,5)V(V4,6)\ -PHASE(18) V(V8_V5_V2,5)\ -PHASE(19) V(V9_V6_V3,5)\ -PHASE(20) V(V1,1) V(V7,6)V(V4,7)\ -PHASE(21) V(V8,6)V(V5,7) V(V2,0) /* t_brr_next_addr order dependency */\ -PHASE(22) V(V3a,0) V(V9,6)V(V6,7) echo_22();\ -PHASE(23) V(V7,7) echo_23();\ -PHASE(24) V(V8,7) echo_24();\ -PHASE(25) V(V3b,0) V(V9,7) echo_25();\ -PHASE(26) echo_26();\ -PHASE(27) misc_27(); echo_27();\ -PHASE(28) misc_28(); echo_28();\ -PHASE(29) misc_29(); echo_29();\ -PHASE(30) misc_30();V(V3c,0) echo_30();\ -PHASE(31) V(V4,0) V(V1,2)\ - -#if !SPC_DSP_CUSTOM_RUN - -void Spc_Dsp::run( int clocks_remain ) -{ - require( clocks_remain > 0 ); - - int const phase = m.phase; - m.phase = (phase + clocks_remain) & 31; - switch ( phase ) - { - loop: - - #define PHASE( n ) if ( n && !--clocks_remain ) break; case n: - GEN_DSP_TIMING - #undef PHASE - - if ( --clocks_remain ) - goto loop; - } -} - -#endif //// Setup +void Spc_Dsp::apply_output_enables() +{ + for ( int i = 0; i < voice_count; i++ ) + update_voice_vol( i * 0x10 ); +} + void Spc_Dsp::init( void* ram_64k ) { ram = (byte*) ram_64k; - disable_surround( false ); + disable_surround( false ); // must be before mute_voices mute_voices( 0 ); set_output( NULL, 0 ); reset(); @@ -856,28 +668,25 @@ void Spc_Dsp::soft_reset() m.phase = 0; init_counter(); - - kon_check = false; } void Spc_Dsp::load( byte const new_regs [register_count] ) { - memcpy( regs, new_regs, register_count ); + memcpy( regs, new_regs, sizeof regs ); BLARGG_CLEAR( &m ); for ( int i = voice_count; --i >= 0; ) { - voice_t* v = &m.voices [i]; - v->brr_offset = 1; - v->vbit = 1 << i; - v->regs = ®s [i * 0x10]; + voice_t& v = m.voices [i]; + v.brr_offset = 1; + v.buf_pos = v.buf; } m.new_kon = REG(kon); - m.t_dir = REG(dir); - m.t_esa = REG(esa); soft_reset(); REG(flg) = new_regs [r_flg]; // soft_reset() overwrites this + + apply_output_enables(); } void Spc_Dsp::reset() @@ -892,6 +701,6 @@ void Spc_Dsp::reset() 0x75,0xF5,0x06,0x97,0x10,0xC3,0x24,0xBB,0x00,0x00,0x7B,0x7A,0xE0,0x60,0x12,0x0F, 0xF7,0x74,0x1C,0xE5,0x39,0x3D,0x73,0xC1,0x00,0x00,0x7A,0xB3,0xFF,0x4E,0x7B,0xFF }; - + load( initial_regs ); } diff --git a/bsnes/smp/snes_spc/Spc_Dsp.h b/bsnes/smp/snes_spc/Spc_Dsp.h index 95f136d7..0da18456 100755 --- a/bsnes/smp/snes_spc/Spc_Dsp.h +++ b/bsnes/smp/snes_spc/Spc_Dsp.h @@ -1,4 +1,4 @@ -// Highly accurate SNES SPC-700 DSP emulator +// Fast SNES SPC-700 DSP emulator (about 3x speed of accurate one) // snes_spc 0.9.5 #ifndef BLARGG_SPC_DSP_H @@ -8,16 +8,18 @@ BLARGG_NAMESPACE_BEGIN -extern "C" { typedef void (*dsp_copy_func_t)( unsigned char** io, void* state, size_t ); } - struct Spc_Dsp { public: typedef BOOST::uint8_t byte; // Setup - + // Initializes DSP and has it use the 64K RAM provided void init( void* ram_64k ); + + // Sets function that is called when output buffer is filled, or NULL for none + blargg_callback set_output_callback; + //void set_output_callback( void (*func)( void* user_data ), void* user_data ); // Sets destination for output samples. If begin is NULL, doesn't generate any. typedef short sample_t; @@ -26,52 +28,42 @@ public: // Current position in output buffer, or NULL if no buffer set sample_t* output_ptr() const; - // Sets function that is called when output buffer is filled, or NULL for none - blargg_callback set_output_callback; - //void set_output_callback( void (*func)( void* user_data ), void* user_data ); - -// Emulation + // Number of samples written to output buffer since last set, or 0 if no buffer set. + int sample_count() const; +// Emulation + // Resets DSP to power-on state void reset(); // Emulates pressing reset switch on SNES void soft_reset(); - // Reads/writes DSP registers. For accuracy, you must first call run() + // Reads/writes DSP registers. For accuracy, you must first call spc_run_dsp() // to catch the DSP up to present. int read ( int addr ) const; void write( int addr, int data ); // Runs DSP for specified number of clocks (~1024000 per second). Every 32 clocks - // a pair of samples is be generated. + // a pair of samples is generated. void run( int clock_count ); - + // Sound control - // Using these reduces emulation accuracy. - - // Mutes voices corresponding to non-zero bits in mask (issues repeated KOFF events). + // Mutes voices corresponding to non-zero bits in mask (overrides VxVOL with 0). + // Reduces emulation accuracy. enum { voice_count = 8 }; - void mute_voices( int mask ) { mute_mask = mask; } + void mute_voices( int mask ) { mute_mask = mask; } // If true, prevents channels and global volumes from being phase-negated void disable_surround( bool disable = true ); - + // State // Resets DSP and uses supplied values to initialize registers enum { register_count = 128 }; void load( byte const regs [register_count] ); - // Saves/loads exact emulator state - enum { state_size = 640 }; // maximum space needed when saving - typedef dsp_copy_func_t copy_func_t; - void copy_state( unsigned char** io, copy_func_t ); - - // Returns non-zero if new key-on events occurred since last call - bool check_kon(); - // DSP register addresses // Global registers @@ -109,21 +101,17 @@ public: struct voice_t { int buf [brr_buf_size*2];// decoded samples (twice the size to simplify wrap handling) - int buf_pos; // place in buffer where next samples will be decoded + int* buf_pos; // place in buffer where next samples will be decoded int interp_pos; // relative fractional position in sample (0x1000 = 1.0) int brr_addr; // address of current BRR block int brr_offset; // current decoding offset in BRR block - byte* regs; // pointer to voice's DSP registers - int vbit; // bitmask for voice: 0x01 for voice 0, 0x02 for voice 1, etc. int kon_delay; // KON delay/current setup phase env_mode_t env_mode; int env; // current envelope level int hidden_env; // used by GAIN mode 7, very obscure quirk - byte t_envx_out; + int volume [2]; // copy of volume from DSP registers, with surround disabled }; private: - enum { brr_block_size = 9 }; - // non-emulation state byte* ram; // 64K shared RAM between DSP and SMP int mute_mask; @@ -133,145 +121,81 @@ private: sample_t* output_end; sample_t* user_output_end; sample_t dummy_buf [2]; - bool kon_check; // set when a new KON occurs struct state_t { int every_other_sample; // toggles every sample int kon; // KON value when last checked int noise; - int counter; int echo_offset; // offset from ESA in echo buffer int echo_length; // number of bytes that echo_offset will stop at int phase; // next clock cycle to run (0-31) + unsigned counters [4]; - // Hidden registers also written to when main register is written to - int new_kon; - byte endx_buf; - byte envx_buf; - byte outx_buf; - - // Temporary state between clocks - - // read once per sample - int t_pmon; - int t_non; - int t_eon; - int t_dir; + int new_kon; int t_koff; - // read a few clocks ahead then used - int t_brr_next_addr; - int t_adsr0; - int t_brr_header; - int t_brr_byte; - int t_srcn; - int t_esa; - int t_echo_enabled; - - // internal state that is recalculated every sample - int t_dir_addr; - int t_pitch; - int t_output; - int t_looped; - int t_echo_ptr; - - // left/right sums - int t_main_out [2]; - int t_echo_out [2]; - int t_echo_in [2]; - - voice_t voices [voice_count]; - // Echo history keeps most recent 8 samples (twice the size to simplify wrap handling) int (*echo_hist_pos) [2]; // &echo_hist [0 to 7] int echo_hist [echo_hist_size * 2] [2]; + + unsigned* counter_select [32]; + voice_t voices [voice_count]; }; state_t m; byte regs [register_count]; void init_counter(); - void run_counters(); - unsigned read_counter( int rate ); - - int interpolate( voice_t const* v ); - void run_envelope( voice_t* const v ); - void decode_brr( voice_t* v ); - - void misc_27(); - void misc_28(); - void misc_29(); - void misc_30(); - - void voice_output( voice_t const* v, int ch ); - void voice_V1( voice_t* const ); - void voice_V2( voice_t* const ); - void voice_V3( voice_t* const ); - void voice_V3a( voice_t* const ); - void voice_V3b( voice_t* const ); - void voice_V3c( voice_t* const ); - void voice_V4( voice_t* const ); - void voice_V5( voice_t* const ); - void voice_V6( voice_t* const ); - void voice_V7( voice_t* const ); - void voice_V8( voice_t* const ); - void voice_V9( voice_t* const ); - void voice_V7_V4_V1( voice_t* const ); - void voice_V8_V5_V2( voice_t* const ); - void voice_V9_V6_V3( voice_t* const ); - - void echo_read( int ch ); - int echo_output( int ch ); - void echo_write( int ch ); - void echo_22(); - void echo_23(); - void echo_24(); - void echo_25(); - void echo_26(); - void echo_27(); - void echo_28(); - void echo_29(); - void echo_30(); - + void run_counter( int ); + void update_voice_vol( int addr ); void set_null_output(); void write_sample( int l, int r ); + void apply_output_enables(); }; -#include - inline int Spc_Dsp::read( int addr ) const { assert( (unsigned) addr < register_count ); - return regs [addr]; } +inline void Spc_Dsp::update_voice_vol( int addr ) +{ + int l = (int8_t) regs [addr + v_voll]; + int r = (int8_t) regs [addr + v_volr]; + + if ( l * r < surround_threshold ) + { + // signs differ, so negate those that are negative + l ^= l >> 7; + r ^= r >> 7; + } + + int index = addr >> 4; + voice_t& v = m.voices [index]; + int enabled = ~mute_mask >> index & 1; + v.volume [0] = l * enabled; + v.volume [1] = r * enabled; +} + inline void Spc_Dsp::write( int addr, int data ) { assert( (unsigned) addr < register_count ); regs [addr] = (byte) data; - switch ( addr & 0x0F ) + int low = addr & 0x0F; + if ( low < 0x2 ) // voice volumes + { + update_voice_vol( low ^ addr /* addr & 0xF0 */ ); + } + else if ( low == 0xC ) { - case v_envx: - m.envx_buf = (byte) data; - break; - - case v_outx: - m.outx_buf = (byte) data; - break; - - case 0x0C: if ( addr == r_kon ) m.new_kon = (byte) data; if ( addr == r_endx ) // always cleared, regardless of data written - { - m.endx_buf = 0; regs [r_endx] = 0; - } - break; } } @@ -280,40 +204,22 @@ inline void Spc_Dsp::disable_surround( bool disable ) surround_threshold = disable ? 0 : -0x4000; } -inline bool Spc_Dsp::check_kon() -{ - bool old = kon_check; - kon_check = 0; - return old; -} - inline Spc_Dsp::sample_t* Spc_Dsp::output_ptr() const { // Don't return pointer into dummy_buf return (output_ptr_ != dummy_buf ? output_ptr_ : user_output_end); } -class SPC_State_Copier { - Spc_Dsp::copy_func_t func; - unsigned char** buf; -public: - SPC_State_Copier( unsigned char** p, Spc_Dsp::copy_func_t f ) { func = f; buf = p; } - void copy( void* state, size_t size ); - int copy_int( int state, int size ); - void skip( int count ); - - // Reads uint8_t and then skips that many bytes. If writing, writes - // uint8_t of 0. This allows future expansion at this point, by writing - // non-zero and additional data. - void extra(); -}; - -#define SPC_COPY( type, state )\ -{\ - state = (BOOST::type) copier.copy_int( state, sizeof (BOOST::type) );\ - check( (BOOST::type) state == state );\ +inline int Spc_Dsp::sample_count() const +{ + sample_t* p = output_ptr(); + return (p ? p - output_begin : 0); } +#define SPC_NO_COPY_STATE_FUNCS 1 + +#define SPC_LESS_ACCURATE 1 + BLARGG_NAMESPACE_END #endif