From fa0f1c1e98ade9ed4ea8afe007bea8b05325a971 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Thu, 12 Aug 2010 10:39:41 +1000 Subject: [PATCH] Update to 20100811 release. byuu says: 12-15% faster than v067.10, and my Atom never goes below 58fps for normal lo-res games at this point. Just a little more and I can leave Async on. That's pretty much it though for the low hanging fruit. Everything else will be a lot of work for a little gain. Speedups are from range testing across scanline boundaries and from using blargg's fast DSP core. Snes9X is now only 1.93x faster than bsnes, and bsnes is now faster than Super Sleuth. I also fixed the Circuit USA menus (HDMA timing adjustment), Wild Guns flickering (IRQ lock) and Jumpin' Derby (external IRQ triggering.) There's definitely a lot of troublesome games, mostly the same ones we had in the past (Koushien 2, Robocop vs The Terminator, etc.) I'm definitely going to debug Starfox, but I may not bother with some of the more obscure ones. --- bsnes/cpu/cpu.cpp | 1 + bsnes/cpu/cpu.hpp | 9 +- bsnes/cpu/dma.cpp | 26 +- bsnes/cpu/timing.cpp | 64 +- bsnes/info.hpp | 2 +- bsnes/smp/snes_spc/Spc_Dsp.cpp | 1221 ++++++++++++++------------------ bsnes/smp/snes_spc/Spc_Dsp.h | 214 ++---- 7 files changed, 634 insertions(+), 903 deletions(-) diff --git a/bsnes/cpu/cpu.cpp b/bsnes/cpu/cpu.cpp index 481b5dca..cb836768 100755 --- a/bsnes/cpu/cpu.cpp +++ b/bsnes/cpu/cpu.cpp @@ -112,6 +112,7 @@ void CPU::reset() { status.irq_transition = false; status.irq_pending = false; + status.irq_lock = false; status.hdma_pending = false; status.wram_addr = 0x000000; diff --git a/bsnes/cpu/cpu.hpp b/bsnes/cpu/cpu.hpp index 9a995cc0..d478484b 100755 --- a/bsnes/cpu/cpu.hpp +++ b/bsnes/cpu/cpu.hpp @@ -34,11 +34,16 @@ private: void op_irq(uint16 vector); //timing + struct QueueEvent { + enum : unsigned { + DramRefresh, + HdmaRun, + }; + }; nall::priority_queue queue; void queue_event(unsigned id); void last_cycle(); void add_clocks(unsigned clocks); - void add_time(unsigned clocks); void scanline(); void run_auto_joypad_poll(); @@ -56,6 +61,7 @@ private: unsigned hdma_addr(unsigned i); unsigned hdma_iaddr(unsigned i); void dma_run(); + bool hdma_active_after(unsigned i); void hdma_update(unsigned i); void hdma_run(); void hdma_init(); @@ -104,6 +110,7 @@ private: bool irq_transition; bool irq_pending; + bool irq_lock; bool hdma_pending; unsigned wram_addr; diff --git a/bsnes/cpu/dma.cpp b/bsnes/cpu/dma.cpp index 404f880b..d9d89e05 100755 --- a/bsnes/cpu/dma.cpp +++ b/bsnes/cpu/dma.cpp @@ -83,6 +83,15 @@ void CPU::dma_run() { dma_transfer(channel[i].direction, dma_bbus(i, index++), dma_addr(i)); } while(channel[i].dma_enabled && --channel[i].transfer_size); } + + status.irq_lock = true; +} + +bool CPU::hdma_active_after(unsigned i) { + for(unsigned n = i + 1; i < 8; i++) { + if(channel[i].hdma_enabled && !channel[i].hdma_completed) return true; + } + return false; } void CPU::hdma_update(unsigned i) { @@ -93,10 +102,15 @@ void CPU::hdma_update(unsigned i) { add_clocks(8); if(channel[i].indirect) { - channel[i].indirect_addr = dma_read(hdma_addr(i)) << 0; - add_clocks(8); - channel[i].indirect_addr |= dma_read(hdma_addr(i)) << 8; + channel[i].indirect_addr = dma_read(hdma_addr(i)) << 8; add_clocks(8); + + //emulating this glitch causes a slight slowdown; only enable if needed + //if(!channel[i].hdma_completed || hdma_active_after(i)) { + channel[i].indirect_addr >>= 8; + channel[i].indirect_addr |= dma_read(hdma_addr(i)) << 8; + add_clocks(8); + //} } } } @@ -108,7 +122,7 @@ void CPU::hdma_run() { } if(channels == 0) return; - add_clocks(16); + add_clocks(24); for(unsigned i = 0; i < 8; i++) { if(channel[i].hdma_enabled == false || channel[i].hdma_completed == true) continue; channel[i].dma_enabled = false; @@ -130,6 +144,8 @@ void CPU::hdma_run() { channel[i].hdma_do_transfer = channel[i].line_counter & 0x80; hdma_update(i); } + + status.irq_lock = true; } void CPU::hdma_init() { @@ -150,6 +166,8 @@ void CPU::hdma_init() { channel[i].line_counter = 0; hdma_update(i); } + + status.irq_lock = true; } void CPU::dma_reset() { diff --git a/bsnes/cpu/timing.cpp b/bsnes/cpu/timing.cpp index 95ed6d8e..7c333db7 100755 --- a/bsnes/cpu/timing.cpp +++ b/bsnes/cpu/timing.cpp @@ -1,12 +1,5 @@ #ifdef CPU_CPP -struct QueueEvent { - enum : unsigned { - DramRefresh, - HdmaRun, - }; -}; - void CPU::queue_event(unsigned id) { switch(id) { case QueueEvent::DramRefresh: return add_clocks(40); @@ -15,13 +8,18 @@ void CPU::queue_event(unsigned id) { } void CPU::last_cycle() { + if(status.irq_lock) { + status.irq_lock = false; + return; + } + if(status.nmi_transition) { regs.wai = false; status.nmi_transition = false; status.nmi_pending = true; } - if(status.irq_transition) { + if(status.irq_transition || regs.irq) { regs.wai = false; status.irq_transition = false; status.irq_pending = !regs.p.i; @@ -29,40 +27,32 @@ void CPU::last_cycle() { } void CPU::add_clocks(unsigned clocks) { - step(clocks); - queue.tick(clocks); - unsigned clocksleft = lineclocks() - hcounter(); - if(clocks > clocksleft) { - add_time(clocksleft); - add_time(clocks - clocksleft); - } else { - add_time(clocks); - } -} - -void CPU::add_time(unsigned clocks) { - if(status.irq_line && (status.virq_enabled || status.hirq_enabled)) { - status.irq_transition = true; - } - - if(status.virq_enabled && !status.hirq_enabled) { + if(status.hirq_enabled) { + if(status.virq_enabled) { + unsigned cpu_time = vcounter() * 1364 + hcounter(); + unsigned irq_time = status.vtime * 1364 + status.htime * 4; + if(cpu_time > irq_time) irq_time += 262 * 1364; + bool irq_valid = status.irq_valid; + status.irq_valid = cpu_time <= irq_time && cpu_time + clocks > irq_time; + if(!irq_valid && status.irq_valid) status.irq_line = true; + } else { + unsigned irq_time = status.htime * 4; + if(hcounter() > irq_time) irq_time += 1364; + bool irq_valid = status.irq_valid; + status.irq_valid = hcounter() <= irq_time && hcounter() + clocks > irq_time; + if(!irq_valid && status.irq_valid) status.irq_line = true; + } + if(status.irq_line) status.irq_transition = true; + } else if(status.virq_enabled) { bool irq_valid = status.irq_valid; status.irq_valid = vcounter() == status.vtime; - if(!irq_valid && status.irq_valid) { - status.irq_line = true; - status.irq_transition = true; - } - } else if(status.hirq_enabled) { - bool irq_valid = status.irq_valid; - status.irq_valid = hcounter() <= status.htime * 4 && hcounter() + clocks > status.htime * 4; - if(status.virq_enabled && vcounter() != status.vtime) status.irq_valid = false; - if(!irq_valid && status.irq_valid) { - status.irq_line = true; - status.irq_transition = true; - } + if(!irq_valid && status.irq_valid) status.irq_line = true; + if(status.irq_line) status.irq_transition = true; } tick(clocks); + queue.tick(clocks); + step(clocks); } void CPU::scanline() { diff --git a/bsnes/info.hpp b/bsnes/info.hpp index bed8783b..add71c68 100755 --- a/bsnes/info.hpp +++ b/bsnes/info.hpp @@ -1,7 +1,7 @@ namespace SNES { namespace Info { static const char Name[] = "bsnes"; - static const char Version[] = "067.10"; + static const char Version[] = "067.11"; static const unsigned SerializerVersion = 12; } } diff --git a/bsnes/smp/snes_spc/Spc_Dsp.cpp b/bsnes/smp/snes_spc/Spc_Dsp.cpp index 46412254..106980ac 100755 --- a/bsnes/smp/snes_spc/Spc_Dsp.cpp +++ b/bsnes/smp/snes_spc/Spc_Dsp.cpp @@ -21,9 +21,6 @@ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include BLARGG_ENABLE_OPTIMIZER #endif -// New SNES DSP behaves slightly differently (not all differences handled yet) -bool const new_snes = false; - // if ( io < -32768 ) io = -32768; // if ( io > 32767 ) io = 32767; #define CLAMP16( io )\ @@ -93,736 +90,551 @@ inline void Spc_Dsp::write_sample( int l, int r ) // Volume registers and efb are signed! Easy to forget int8_t cast. // Prefixes are to avoid accidental use of locals with same names. -// Gaussian interpolation - -static short const gauss [512] = +// Interleaved gauss table (to improve cache coherency) +// interleaved_gauss [i] = gauss [(i & 1) * 256 + 255 - (i >> 1 & 0xFF)] +static short const interleaved_gauss [512] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, - 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, - 11, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 15, 16, 16, 17, 17, - 18, 19, 19, 20, 20, 21, 21, 22, 23, 23, 24, 24, 25, 26, 27, 27, - 28, 29, 29, 30, 31, 32, 32, 33, 34, 35, 36, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 58, 59, 60, 61, 62, 64, 65, 66, 67, 69, 70, 71, 73, 74, 76, 77, - 78, 80, 81, 83, 84, 86, 87, 89, 90, 92, 94, 95, 97, 99, 100, 102, - 104, 106, 107, 109, 111, 113, 115, 117, 118, 120, 122, 124, 126, 128, 130, 132, - 134, 137, 139, 141, 143, 145, 147, 150, 152, 154, 156, 159, 161, 163, 166, 168, - 171, 173, 175, 178, 180, 183, 186, 188, 191, 193, 196, 199, 201, 204, 207, 210, - 212, 215, 218, 221, 224, 227, 230, 233, 236, 239, 242, 245, 248, 251, 254, 257, - 260, 263, 267, 270, 273, 276, 280, 283, 286, 290, 293, 297, 300, 304, 307, 311, - 314, 318, 321, 325, 328, 332, 336, 339, 343, 347, 351, 354, 358, 362, 366, 370, - 374, 378, 381, 385, 389, 393, 397, 401, 405, 410, 414, 418, 422, 426, 430, 434, - 439, 443, 447, 451, 456, 460, 464, 469, 473, 477, 482, 486, 491, 495, 499, 504, - 508, 513, 517, 522, 527, 531, 536, 540, 545, 550, 554, 559, 563, 568, 573, 577, - 582, 587, 592, 596, 601, 606, 611, 615, 620, 625, 630, 635, 640, 644, 649, 654, - 659, 664, 669, 674, 678, 683, 688, 693, 698, 703, 708, 713, 718, 723, 728, 732, - 737, 742, 747, 752, 757, 762, 767, 772, 777, 782, 787, 792, 797, 802, 806, 811, - 816, 821, 826, 831, 836, 841, 846, 851, 855, 860, 865, 870, 875, 880, 884, 889, - 894, 899, 904, 908, 913, 918, 923, 927, 932, 937, 941, 946, 951, 955, 960, 965, - 969, 974, 978, 983, 988, 992, 997,1001,1005,1010,1014,1019,1023,1027,1032,1036, -1040,1045,1049,1053,1057,1061,1066,1070,1074,1078,1082,1086,1090,1094,1098,1102, -1106,1109,1113,1117,1121,1125,1128,1132,1136,1139,1143,1146,1150,1153,1157,1160, -1164,1167,1170,1174,1177,1180,1183,1186,1190,1193,1196,1199,1202,1205,1207,1210, -1213,1216,1219,1221,1224,1227,1229,1232,1234,1237,1239,1241,1244,1246,1248,1251, -1253,1255,1257,1259,1261,1263,1265,1267,1269,1270,1272,1274,1275,1277,1279,1280, -1282,1283,1284,1286,1287,1288,1290,1291,1292,1293,1294,1295,1296,1297,1297,1298, -1299,1300,1300,1301,1302,1302,1303,1303,1303,1304,1304,1304,1304,1304,1305,1305, + 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303, + 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299, + 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292, + 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282, + 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269, + 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253, + 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234, + 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213, + 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190, + 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164, + 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136, + 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106, + 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074, + 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040, + 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005, + 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969, + 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932, + 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894, + 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855, + 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816, + 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777, + 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737, + 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698, + 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659, + 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620, + 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582, + 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545, + 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508, + 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473, + 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439, + 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405, + 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374, }; -inline int Spc_Dsp::interpolate( voice_t const* v ) -{ - // Make pointers into gaussian based on fractional position between samples - int offset = v->interp_pos >> 4 & 0xFF; - short const* fwd = gauss + 255 - offset; - short const* rev = gauss + offset; // mirror left half of gaussian - - int const* in = &v->buf [(v->interp_pos >> 12) + v->buf_pos]; - int out; - out = (fwd [ 0] * in [0]) >> 11; - out += (fwd [256] * in [1]) >> 11; - out += (rev [256] * in [2]) >> 11; - out = (int16_t) out; - out += (rev [ 0] * in [3]) >> 11; - - CLAMP16( out ); - out &= ~1; - return out; -} - //// Counters -int const simple_counter_range = 2048 * 5 * 3; // 30720 +#define RATE( rate, div )\ + (rate >= div ? rate / div * 8 - 1 : rate - 1) -static unsigned const counter_rates [32] = +static unsigned const counter_mask [32] = { - simple_counter_range + 1, // never fires - 2048, 1536, - 1280, 1024, 768, - 640, 512, 384, - 320, 256, 192, - 160, 128, 96, - 80, 64, 48, - 40, 32, 24, - 20, 16, 12, - 10, 8, 6, - 5, 4, 3, - 2, - 1 -}; -static unsigned const counter_offsets [32] = -{ - 1, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 536, 0, 1040, - 0, - 0 + RATE( 2,2), RATE(2048,4), RATE(1536,3), + RATE(1280,5), RATE(1024,4), RATE( 768,3), + RATE( 640,5), RATE( 512,4), RATE( 384,3), + RATE( 320,5), RATE( 256,4), RATE( 192,3), + RATE( 160,5), RATE( 128,4), RATE( 96,3), + RATE( 80,5), RATE( 64,4), RATE( 48,3), + RATE( 40,5), RATE( 32,4), RATE( 24,3), + RATE( 20,5), RATE( 16,4), RATE( 12,3), + RATE( 10,5), RATE( 8,4), RATE( 6,3), + RATE( 5,5), RATE( 4,4), RATE( 3,3), + RATE( 2,4), + RATE( 1,4) }; +#undef RATE inline void Spc_Dsp::init_counter() { - m.counter = 0; -} - -inline void Spc_Dsp::run_counters() -{ - if ( --m.counter < 0 ) - m.counter = simple_counter_range - 1; -} - -inline unsigned Spc_Dsp::read_counter( int rate ) -{ - return ((unsigned) m.counter + counter_offsets [rate]) % counter_rates [rate]; -} - - -//// Envelope - -inline void Spc_Dsp::run_envelope( voice_t* const v ) -{ - int env = v->env; - if ( v->env_mode == env_release ) // 60% + // counters start out with this synchronization + m.counters [0] = 1; + m.counters [1] = 0; + m.counters [2] = -0x20u; + m.counters [3] = 0x0B; + + int n = 2; + for ( int i = 1; i < 32; i++ ) { - if ( (env -= 0x8) < 0 ) - env = 0; - v->env = env; + m.counter_select [i] = &m.counters [n]; + if ( !--n ) + n = 3; } - else + m.counter_select [ 0] = &m.counters [0]; + m.counter_select [30] = &m.counters [2]; +} + +inline void Spc_Dsp::run_counter( int i ) +{ + int n = m.counters [i]; + if ( !(n-- & 7) ) + n -= 6 - i; + m.counters [i] = n; +} + +#define READ_COUNTER( rate )\ + (*m.counter_select [rate] & counter_mask [rate]) + + +//// Emulation + +void Spc_Dsp::run( int clock_count ) +{ + int new_phase = m.phase + clock_count; + int count = new_phase >> 5; + m.phase = new_phase & 31; + if ( !count ) + return; + + byte* const ram = this->ram; + byte const* const dir = &ram [REG(dir) * 0x100]; + int const slow_gaussian = (REG(pmon) >> 1) | REG(non); + int const noise_rate = REG(flg) & 0x1F; + + // Global volume + int mvoll = (int8_t) REG(mvoll); + int mvolr = (int8_t) REG(mvolr); + if ( mvoll * mvolr < surround_threshold ) + mvoll = -mvoll; // eliminate surround + + do { - int rate; - int env_data = VREG(v->regs,adsr1); - if ( m.t_adsr0 & 0x80 ) // 99% ADSR + // KON/KOFF reading + if ( (m.every_other_sample ^= 1) != 0 ) { - if ( v->env_mode >= env_decay ) // 99% - { - env--; - env -= env >> 8; - rate = env_data & 0x1F; - if ( v->env_mode == env_decay ) // 1% - rate = (m.t_adsr0 >> 3 & 0x0E) + 0x10; - } - else // env_attack - { - rate = (m.t_adsr0 & 0x0F) * 2 + 1; - env += rate < 31 ? 0x20 : 0x400; - } - } - else // GAIN - { - int mode; - env_data = VREG(v->regs,gain); - mode = env_data >> 5; - if ( mode < 4 ) // direct - { - env = env_data * 0x10; - rate = 31; - } - else - { - rate = env_data & 0x1F; - if ( mode == 4 ) // 4: linear decrease - { - env -= 0x20; - } - else if ( mode < 6 ) // 5: exponential decrease - { - env--; - env -= env >> 8; - } - else // 6,7: linear increase - { - env += 0x20; - if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 ) - env += 0x8 - 0x20; // 7: two-slope linear increase - } - } + m.new_kon &= ~m.kon; + m.kon = m.new_kon; + m.t_koff = REG(koff); } - // Sustain level - if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay ) - v->env_mode = env_sustain; - - v->hidden_env = env; - - // unsigned cast because linear decrease going negative also triggers this - if ( (unsigned) env > 0x7FF ) - { - env = (env < 0 ? 0 : 0x7FF); - if ( v->env_mode == env_attack ) - v->env_mode = env_decay; - } - - if ( !read_counter( rate ) ) - v->env = env; // nothing else is controlled by the counter - } -} - - -//// BRR Decoding - -inline void Spc_Dsp::decode_brr( voice_t* v ) -{ - // Arrange the four input nybbles in 0xABCD order for easy decoding - int nybbles = m.t_brr_byte * 0x100 + ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF]; - - int const header = m.t_brr_header; - - // Write to next four samples in circular buffer - int* pos = &v->buf [v->buf_pos]; - if ( (v->buf_pos += 4) >= brr_buf_size ) - v->buf_pos = 0; - - // Decode four samples - for ( int* end = pos + 4; pos < end; pos++ ) - { - // Extract nybble and sign-extend - int s = (int16_t) nybbles >> 12; - nybbles <<= 4; - - // Shift sample based on header - int const shift = header >> 4; - s = (s << shift) >> 1; - if ( shift >= 0xD ) // handle invalid range - s = (s >> 25) << 11; // same as: s = (s < 0 ? -0x800 : 0) - - // Apply (unstable) IIR filter (8 is the most commonly used) - int const filter = header & 0x0C; - int const p1 = pos [brr_buf_size - 1]; - int const p2 = pos [brr_buf_size - 2] >> 1; - if ( filter >= 8 ) // most common one - { - s += p1; - s -= p2; - if ( filter == 8 ) // pos[0] = s*2 + pos[-1] * 1.09625 - pos[-2] * 0.9375 - { - s += p2 >> 4; - s += (p1 * -3) >> 6; - } - else // pos[0] = s*2 + pos[-1] * 1.796875 - pos[-2] * 0.8125 - { - s += (p1 * -13) >> 7; - s += (p2 * 3) >> 4; - } - } - else if ( filter ) // pos[0] = s*2 + pos[-1] * 0.9375 - { - s += p1 >> 1; - s += (-p1) >> 5; - } - - // Adjust and write sample - CLAMP16( s ); - s = (int16_t) (s * 2); - pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around - } -} - - -//// Misc - -#define MISC_CLOCK( n ) inline void Spc_Dsp::misc_##n() - -MISC_CLOCK( 27 ) -{ - m.t_pmon = REG(pmon) & 0xFE; // voice 0 doesn't support PMON -} -MISC_CLOCK( 28 ) -{ - m.t_non = REG(non); - m.t_eon = REG(eon); - m.t_dir = REG(dir); -} -MISC_CLOCK( 29 ) -{ - if ( (m.every_other_sample ^= 1) != 0 ) - m.new_kon &= ~m.kon; // clears KON 63 clocks after it was last read -} -MISC_CLOCK( 30 ) -{ - if ( m.every_other_sample ) - { - m.kon = m.new_kon; - m.t_koff = REG(koff) | mute_mask; - } - - run_counters(); - - // Noise - if ( !read_counter( REG(flg) & 0x1F ) ) - { - int feedback = (m.noise << 13) ^ (m.noise << 14); - m.noise = (feedback & 0x4000) ^ (m.noise >> 1); - } -} - - -//// Voices - -#define VOICE_CLOCK( n ) void Spc_Dsp::voice_##n( voice_t* const v ) - -inline VOICE_CLOCK( V1 ) -{ - m.t_dir_addr = m.t_dir * 0x100 + m.t_srcn * 4; - m.t_srcn = VREG(v->regs,srcn); -} -inline VOICE_CLOCK( V2 ) -{ - // Read sample pointer (ignored if not needed) - byte const* entry = &ram [m.t_dir_addr]; - if ( !v->kon_delay ) - entry += 2; - m.t_brr_next_addr = GET_LE16A( entry ); - - m.t_adsr0 = VREG(v->regs,adsr0); - - // Read pitch, spread over two clocks - m.t_pitch = VREG(v->regs,pitchl); -} -inline VOICE_CLOCK( V3a ) -{ - m.t_pitch += (VREG(v->regs,pitchh) & 0x3F) << 8; -} -inline VOICE_CLOCK( V3b ) -{ - // Read BRR header and byte - m.t_brr_byte = ram [(v->brr_addr + v->brr_offset) & 0xFFFF]; - m.t_brr_header = ram [v->brr_addr]; // brr_addr doesn't need masking -} -VOICE_CLOCK( V3c ) -{ - // Pitch modulation using previous voice's output - if ( m.t_pmon & v->vbit ) - m.t_pitch += ((m.t_output >> 5) * m.t_pitch) >> 10; - - if ( v->kon_delay ) - { - // Get ready to start BRR decoding on next sample - if ( v->kon_delay == 5 ) - { - v->brr_addr = m.t_brr_next_addr; - v->brr_offset = 1; - v->buf_pos = 0; - m.t_brr_header = 0; // header is ignored on this sample - kon_check = true; - } - - // Envelope is never run during KON - v->env = 0; - v->hidden_env = 0; - - // Disable BRR decoding until last three samples - v->interp_pos = 0; - if ( --v->kon_delay & 3 ) - v->interp_pos = 0x4000; - - // Pitch is never added during KON - m.t_pitch = 0; - } - - // Gaussian interpolation - { - int output = interpolate( v ); + run_counter( 1 ); + run_counter( 2 ); + run_counter( 3 ); // Noise - if ( m.t_non & v->vbit ) - output = (int16_t) (m.noise * 2); - - // Apply envelope - m.t_output = (output * v->env) >> 11 & ~1; - v->t_envx_out = (byte) (v->env >> 4); - } - - // Immediate silence due to end of sample or soft reset - if ( REG(flg) & 0x80 || (m.t_brr_header & 3) == 1 ) - { - v->env_mode = env_release; - v->env = 0; - } - - if ( m.every_other_sample ) - { - // KOFF - if ( m.t_koff & v->vbit && (!new_snes || v->kon_delay < 3) ) - v->env_mode = env_release; - - // KON - if ( m.kon & v->vbit ) + if ( !READ_COUNTER( noise_rate ) ) { - v->kon_delay = 5; - v->env_mode = env_attack; + int feedback = (m.noise << 13) ^ (m.noise << 14); + m.noise = (feedback & 0x4000) ^ (m.noise >> 1); } - } - - // Run envelope for next sample - if ( !v->kon_delay ) - run_envelope( v ); -} -inline void Spc_Dsp::voice_output( voice_t const* v, int ch ) -{ - // Apply left/right volume - int amp = (m.t_output * (int8_t) VREG(v->regs,voll + ch)) >> 7; - - // Avoid negative volume if surround is disabled - // (emulator feature; not part of actual DSP) - if ( (int8_t) VREG(v->regs,voll + ch) < surround_threshold ) - amp = -amp; - - // Add to output total - m.t_main_out [ch] += amp; - CLAMP16( m.t_main_out [ch] ); - - // Optionally add to echo total - if ( m.t_eon & v->vbit ) - { - m.t_echo_out [ch] += amp; - CLAMP16( m.t_echo_out [ch] ); - } -} -VOICE_CLOCK( V4 ) -{ - // Decode BRR - m.t_looped = 0; - if ( v->interp_pos >= 0x4000 ) - { - decode_brr( v ); - if ( (v->brr_offset += 2) >= brr_block_size ) + // Voices + int pmon_input = 0; + int main_out_l = 0; + int main_out_r = 0; + int echo_out_l = 0; + int echo_out_r = 0; + voice_t* v = m.voices; + byte* v_regs = regs; + int vbit = 1; + do { - // Start decoding next BRR block - assert( v->brr_offset == brr_block_size ); - v->brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF; - if ( m.t_brr_header & 1 ) + #define SAMPLE_PTR(i) GET_LE16A( &dir [VREG(v_regs,srcn) * 4 + i * 2] ) + + int brr_header = ram [v->brr_addr]; + int kon_delay = v->kon_delay; + + // Pitch + int pitch = GET_LE16A( &VREG(v_regs,pitchl) ) & 0x3FFF; + if ( REG(pmon) & vbit ) + pitch += ((pmon_input >> 5) * pitch) >> 10; + + // KON phases + if ( --kon_delay >= 0 ) { - v->brr_addr = m.t_brr_next_addr; - m.t_looped = v->vbit; + v->kon_delay = kon_delay; + + // Get ready to start BRR decoding on next sample + if ( kon_delay == 4 ) + { + v->brr_addr = SAMPLE_PTR( 0 ); + v->brr_offset = 1; + v->buf_pos = v->buf; + brr_header = 0; // header is ignored on this sample + } + + // Envelope is never run during KON + v->env = 0; + v->hidden_env = 0; + + // Disable BRR decoding until last three samples + v->interp_pos = (kon_delay & 3 ? 0x4000 : 0); + + // Pitch is never added during KON + pitch = 0; } - v->brr_offset = 1; + + int env = v->env; + + // Gaussian interpolation + { + int output = 0; + VREG(v_regs,envx) = (byte) (env >> 4); + if ( env ) + { + // Make pointers into gaussian based on fractional position between samples + int offset = (unsigned) v->interp_pos >> 3 & 0x1FE; + short const* fwd = interleaved_gauss + offset; + short const* rev = interleaved_gauss + 510 - offset; // mirror left half of gaussian + + int const* in = &v->buf_pos [(unsigned) v->interp_pos >> 12]; + + if ( !(slow_gaussian & vbit) ) // 99% + { + // Faster approximation when exact sample value isn't necessary for pitch mod + output = (fwd [0] * in [0] + + fwd [1] * in [1] + + rev [1] * in [2] + + rev [0] * in [3]) >> 11; + output = (output * env) >> 11; + } + else + { + output = (int16_t) (m.noise * 2); + if ( !(REG(non) & vbit) ) + { + output = (fwd [0] * in [0]) >> 11; + output += (fwd [1] * in [1]) >> 11; + output += (rev [1] * in [2]) >> 11; + output = (int16_t) output; + output += (rev [0] * in [3]) >> 11; + + CLAMP16( output ); + output &= ~1; + } + output = (output * env) >> 11 & ~1; + } + + // Output + int l = output * v->volume [0]; + int r = output * v->volume [1]; + + main_out_l += l; + main_out_r += r; + + if ( REG(eon) & vbit ) + { + echo_out_l += l; + echo_out_r += r; + } + } + + pmon_input = output; + VREG(v_regs,outx) = (byte) (output >> 8); + } + + // Soft reset or end of sample + if ( REG(flg) & 0x80 || (brr_header & 3) == 1 ) + { + v->env_mode = env_release; + env = 0; + } + + if ( m.every_other_sample ) + { + // KOFF + if ( m.t_koff & vbit ) + v->env_mode = env_release; + + // KON + if ( m.kon & vbit ) + { + v->kon_delay = 5; + v->env_mode = env_attack; + REG(endx) &= ~vbit; + } + } + + // Envelope + if ( !v->kon_delay ) + { + if ( v->env_mode == env_release ) // 97% + { + env -= 0x8; + v->env = env; + if ( env <= 0 ) + { + v->env = 0; + goto skip_brr; // no BRR decoding for you! + } + } + else // 3% + { + int rate; + int const adsr0 = VREG(v_regs,adsr0); + int env_data = VREG(v_regs,adsr1); + if ( adsr0 >= 0x80 ) // 97% ADSR + { + if ( v->env_mode > env_decay ) // 89% + { + env--; + env -= env >> 8; + rate = env_data & 0x1F; + + // optimized handling + v->hidden_env = env; + if ( READ_COUNTER( rate ) ) + goto exit_env; + v->env = env; + goto exit_env; + } + else if ( v->env_mode == env_decay ) + { + env--; + env -= env >> 8; + rate = (adsr0 >> 3 & 0x0E) + 0x10; + } + else // env_attack + { + rate = (adsr0 & 0x0F) * 2 + 1; + env += rate < 31 ? 0x20 : 0x400; + } + } + else // GAIN + { + int mode; + env_data = VREG(v_regs,gain); + mode = env_data >> 5; + if ( mode < 4 ) // direct + { + env = env_data * 0x10; + rate = 31; + } + else + { + rate = env_data & 0x1F; + if ( mode == 4 ) // 4: linear decrease + { + env -= 0x20; + } + else if ( mode < 6 ) // 5: exponential decrease + { + env--; + env -= env >> 8; + } + else // 6,7: linear increase + { + env += 0x20; + if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 ) + env += 0x8 - 0x20; // 7: two-slope linear increase + } + } + } + + // Sustain level + if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay ) + v->env_mode = env_sustain; + + v->hidden_env = env; + + // unsigned cast because linear decrease going negative also triggers this + if ( (unsigned) env > 0x7FF ) + { + env = (env < 0 ? 0 : 0x7FF); + if ( v->env_mode == env_attack ) + v->env_mode = env_decay; + } + + if ( !READ_COUNTER( rate ) ) + v->env = env; // nothing else is controlled by the counter + } + } + exit_env: + + { + // Apply pitch + int old_pos = v->interp_pos; + int interp_pos = (old_pos & 0x3FFF) + pitch; + if ( interp_pos > 0x7FFF ) + interp_pos = 0x7FFF; + v->interp_pos = interp_pos; + + // BRR decode if necessary + if ( old_pos >= 0x4000 ) + { + // Arrange the four input nybbles in 0xABCD order for easy decoding + int nybbles = ram [(v->brr_addr + v->brr_offset) & 0xFFFF] * 0x100 + + ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF]; + + // Advance read position + int const brr_block_size = 9; + int brr_offset = v->brr_offset; + if ( (brr_offset += 2) >= brr_block_size ) + { + // Next BRR block + int brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF; + assert( brr_offset == brr_block_size ); + if ( brr_header & 1 ) + { + brr_addr = SAMPLE_PTR( 1 ); + if ( !v->kon_delay ) + REG(endx) |= vbit; + } + v->brr_addr = brr_addr; + brr_offset = 1; + } + v->brr_offset = brr_offset; + + // Decode + + // 0: >>1 1: <<0 2: <<1 ... 12: <<11 13-15: >>4 <<11 + static unsigned char const shifts [16 * 2] = { + 13,12,12,12,12,12,12,12,12,12,12, 12, 12, 16, 16, 16, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11 + }; + int const scale = brr_header >> 4; + int const right_shift = shifts [scale]; + int const left_shift = shifts [scale + 16]; + + // Decode and write to next four samples in circular buffer + int* pos = v->buf_pos; + for ( int* end = pos + 4; pos < end; pos++ ) + { + // Extract upper nybble and scale appropriately + int s = ((int16_t) nybbles >> right_shift) << left_shift; + nybbles <<= 4; + + // Apply IIR filter (8 is the most commonly used) + int const filter = brr_header & 0x0C; + int const p1 = pos [brr_buf_size - 1]; + int const p2 = pos [brr_buf_size - 2] >> 1; + if ( filter >= 8 ) + { + s += p1; + s -= p2; + if ( filter == 8 ) // s += p1 * 0.953125 - p2 * 0.46875 + { + s += p2 >> 4; + s += (p1 * -3) >> 6; + } + else // s += p1 * 0.8984375 - p2 * 0.40625 + { + s += (p1 * -13) >> 7; + s += (p2 * 3) >> 4; + } + } + else if ( filter ) // s += p1 * 0.46875 + { + s += p1 >> 1; + s += (-p1) >> 5; + } + + // Adjust and write sample + CLAMP16( s ); + s = (int16_t) (s * 2); + pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around + } + + if ( pos >= &v->buf [brr_buf_size] ) + pos = v->buf; + v->buf_pos = pos; + } + } +skip_brr: + // Next voice + vbit <<= 1; + v_regs += 0x10; + v++; } - } - - // Apply pitch - v->interp_pos = (v->interp_pos & 0x3FFF) + m.t_pitch; - - // Keep from getting too far ahead (when using pitch modulation) - if ( v->interp_pos > 0x7FFF ) - v->interp_pos = 0x7FFF; - - // Output left - voice_output( v, 0 ); -} -inline VOICE_CLOCK( V5 ) -{ - // Output right - voice_output( v, 1 ); - - // ENDX, OUTX, and ENVX won't update if you wrote to them 1-2 clocks earlier - int endx_buf = REG(endx) | m.t_looped; - - // Clear bit in ENDX if KON just began - if ( v->kon_delay == 5 ) - endx_buf &= ~v->vbit; - m.endx_buf = (byte) endx_buf; -} -inline VOICE_CLOCK( V6 ) -{ - (void) v; // avoid compiler warning about unused v - m.outx_buf = (byte) (m.t_output >> 8); -} -inline VOICE_CLOCK( V7 ) -{ - // Update ENDX - REG(endx) = m.endx_buf; - - m.envx_buf = v->t_envx_out; -} -inline VOICE_CLOCK( V8 ) -{ - // Update OUTX - VREG(v->regs,outx) = m.outx_buf; -} -inline VOICE_CLOCK( V9 ) -{ - // Update ENVX - VREG(v->regs,envx) = m.envx_buf; -} - -// Most voices do all these in one clock, so make a handy composite -inline VOICE_CLOCK( V3 ) -{ - voice_V3a( v ); - voice_V3b( v ); - voice_V3c( v ); -} - -// Common combinations of voice steps on different voices. This greatly reduces -// code size and allows everything to be inlined in these functions. -VOICE_CLOCK(V7_V4_V1) { voice_V7(v); voice_V1(v+3); voice_V4(v+1); } -VOICE_CLOCK(V8_V5_V2) { voice_V8(v); voice_V5(v+1); voice_V2(v+2); } -VOICE_CLOCK(V9_V6_V3) { voice_V9(v); voice_V6(v+1); voice_V3(v+2); } - - -//// Echo - -// Current echo buffer pointer for left/right channel -#define ECHO_PTR( ch ) (&ram [m.t_echo_ptr + ch * 2]) - -// Sample in echo history buffer, where 0 is the oldest -#define ECHO_FIR( i ) (m.echo_hist_pos [i]) - -// Calculate FIR point for left/right channel -#define CALC_FIR( i, ch ) ((ECHO_FIR( i + 1 ) [ch] * (int8_t) REG(fir + i * 0x10)) >> 6) - -#define ECHO_CLOCK( n ) inline void Spc_Dsp::echo_##n() - -inline void Spc_Dsp::echo_read( int ch ) -{ - int s = GET_LE16SA( ECHO_PTR( ch ) ); - // second copy simplifies wrap-around handling - ECHO_FIR( 0 ) [ch] = ECHO_FIR( 8 ) [ch] = s >> 1; -} - -ECHO_CLOCK( 22 ) -{ - // History - if ( ++m.echo_hist_pos >= &m.echo_hist [echo_hist_size] ) - m.echo_hist_pos = m.echo_hist; - - m.t_echo_ptr = (m.t_esa * 0x100 + m.echo_offset) & 0xFFFF; - echo_read( 0 ); - - // FIR (using l and r temporaries below helps compiler optimize) - int l = CALC_FIR( 0, 0 ); - int r = CALC_FIR( 0, 1 ); - - m.t_echo_in [0] = l; - m.t_echo_in [1] = r; -} -ECHO_CLOCK( 23 ) -{ - int l = CALC_FIR( 1, 0 ) + CALC_FIR( 2, 0 ); - int r = CALC_FIR( 1, 1 ) + CALC_FIR( 2, 1 ); - - m.t_echo_in [0] += l; - m.t_echo_in [1] += r; - - echo_read( 1 ); -} -ECHO_CLOCK( 24 ) -{ - int l = CALC_FIR( 3, 0 ) + CALC_FIR( 4, 0 ) + CALC_FIR( 5, 0 ); - int r = CALC_FIR( 3, 1 ) + CALC_FIR( 4, 1 ) + CALC_FIR( 5, 1 ); - - m.t_echo_in [0] += l; - m.t_echo_in [1] += r; -} -ECHO_CLOCK( 25 ) -{ - int l = m.t_echo_in [0] + CALC_FIR( 6, 0 ); - int r = m.t_echo_in [1] + CALC_FIR( 6, 1 ); - - l = (int16_t) l; - r = (int16_t) r; - - l += (int16_t) CALC_FIR( 7, 0 ); - r += (int16_t) CALC_FIR( 7, 1 ); - - CLAMP16( l ); - CLAMP16( r ); - - m.t_echo_in [0] = l & ~1; - m.t_echo_in [1] = r & ~1; -} -inline int Spc_Dsp::echo_output( int ch ) -{ - int out = (int16_t) ((m.t_main_out [ch] * (int8_t) REG(mvoll + ch * 0x10)) >> 7) + - (int16_t) ((m.t_echo_in [ch] * (int8_t) REG(evoll + ch * 0x10)) >> 7); - CLAMP16( out ); - return out; -} -ECHO_CLOCK( 26 ) -{ - // Surround disabler (emulator feature; not part of actual DSP) - if ( (int8_t) REG(mvoll) * (int8_t) REG(mvolr) < surround_threshold ) - m.t_main_out [0] = -m.t_main_out [0]; // eliminate surround - - // Left output volumes - // (save sample for next clock so we can output both together) - m.t_main_out [0] = echo_output( 0 ); - - // Echo feedback - int l = m.t_echo_out [0] + (int16_t) ((m.t_echo_in [0] * (int8_t) REG(efb)) >> 7); - int r = m.t_echo_out [1] + (int16_t) ((m.t_echo_in [1] * (int8_t) REG(efb)) >> 7); - - CLAMP16( l ); - CLAMP16( r ); - - m.t_echo_out [0] = l & ~1; - m.t_echo_out [1] = r & ~1; -} -ECHO_CLOCK( 27 ) -{ - // Output - int l = m.t_main_out [0]; - int r = echo_output( 1 ); - m.t_main_out [0] = 0; - m.t_main_out [1] = 0; - - // TODO: global muting isn't this simple (turns DAC on and off - // or something, causing small ~37-sample pulse when first muted) - if ( REG(flg) & 0x40 ) - { - l = 0; - r = 0; - } - - // Output sample to DAC - SPC_DSP_OUT_HOOK( l, r ); -} -ECHO_CLOCK( 28 ) -{ - m.t_echo_enabled = REG(flg); -} -inline void Spc_Dsp::echo_write( int ch ) -{ - if ( !(m.t_echo_enabled & 0x20) ) - { - #ifdef SPC_DSP_ECHO_DEBUG - SPC_DSP_ECHO_DEBUG + while ( vbit < 0x100 ); + + // Echo position + int echo_offset = m.echo_offset; + byte* const echo_ptr = &ram [(REG(esa) * 0x100 + echo_offset) & 0xFFFF]; + if ( !echo_offset ) + m.echo_length = (REG(edl) & 0x0F) * 0x800; + echo_offset += 4; + if ( echo_offset >= m.echo_length ) + echo_offset = 0; + m.echo_offset = echo_offset; + + // FIR + int echo_in_l = GET_LE16SA( echo_ptr + 0 ); + int echo_in_r = GET_LE16SA( echo_ptr + 2 ); + + int (*echo_hist_pos) [2] = m.echo_hist_pos; + if ( ++echo_hist_pos >= &m.echo_hist [echo_hist_size] ) + echo_hist_pos = m.echo_hist; + m.echo_hist_pos = echo_hist_pos; + + echo_hist_pos [0] [0] = echo_hist_pos [8] [0] = echo_in_l; + echo_hist_pos [0] [1] = echo_hist_pos [8] [1] = echo_in_r; + + #define CALC_FIR_( i, in ) ((in) * (int8_t) REG(fir + i * 0x10)) + echo_in_l = CALC_FIR_( 7, echo_in_l ); + echo_in_r = CALC_FIR_( 7, echo_in_r ); + + #define CALC_FIR( i, ch ) CALC_FIR_( i, echo_hist_pos [i + 1] [ch] ) + #define DO_FIR( i )\ + echo_in_l += CALC_FIR( i, 0 );\ + echo_in_r += CALC_FIR( i, 1 ); + DO_FIR( 0 ); + DO_FIR( 1 ); + DO_FIR( 2 ); + #if defined (__MWERKS__) && __MWERKS__ < 0x3200 + __eieio(); // keeps compiler from stupidly "caching" things in memory #endif - SET_LE16A( ECHO_PTR( ch ), m.t_echo_out [ch] ); + DO_FIR( 3 ); + DO_FIR( 4 ); + DO_FIR( 5 ); + DO_FIR( 6 ); + + // Echo out + if ( !(REG(flg) & 0x20) ) + { + int l = (echo_out_l >> 7) + ((echo_in_l * (int8_t) REG(efb)) >> 14); + int r = (echo_out_r >> 7) + ((echo_in_r * (int8_t) REG(efb)) >> 14); + + // just to help pass more validation tests + #if SPC_MORE_ACCURACY + l &= ~1; + r &= ~1; + #endif + + CLAMP16( l ); + CLAMP16( r ); + + SET_LE16A( echo_ptr + 0, l ); + SET_LE16A( echo_ptr + 2, r ); + } + + // Sound out + int l = (main_out_l * mvoll + echo_in_l * (int8_t) REG(evoll)) >> 14; + int r = (main_out_r * mvolr + echo_in_r * (int8_t) REG(evolr)) >> 14; + + CLAMP16( l ); + CLAMP16( r ); + + if ( (REG(flg) & 0x40) ) + { + l = 0; + r = 0; + } + + SPC_DSP_OUT_HOOK( l, r ); } - m.t_echo_out [ch] = 0; + while ( --count ); } -ECHO_CLOCK( 29 ) -{ - m.t_esa = REG(esa); - - if ( !m.echo_offset ) - m.echo_length = (REG(edl) & 0x0F) * 0x800; - - m.echo_offset += 4; - if ( m.echo_offset >= m.echo_length ) - m.echo_offset = 0; - - // Write left echo - echo_write( 0 ); - - m.t_echo_enabled = REG(flg); -} -ECHO_CLOCK( 30 ) -{ - // Write right echo - echo_write( 1 ); -} - - -//// Timing - -// Execute clock for a particular voice -#define V( clock, voice ) voice_##clock( &m.voices [voice] ); - -/* The most common sequence of clocks uses composite operations -for efficiency. For example, the following are equivalent to the -individual steps on the right: - -V(V7_V4_V1,2) -> V(V7,2) V(V4,3) V(V1,5) -V(V8_V5_V2,2) -> V(V8,2) V(V5,3) V(V2,4) -V(V9_V6_V3,2) -> V(V9,2) V(V6,3) V(V3,4) */ - -// Voice 0 1 2 3 4 5 6 7 -#define GEN_DSP_TIMING \ -PHASE( 0) V(V5,0)V(V2,1)\ -PHASE( 1) V(V6,0)V(V3,1)\ -PHASE( 2) V(V7_V4_V1,0)\ -PHASE( 3) V(V8_V5_V2,0)\ -PHASE( 4) V(V9_V6_V3,0)\ -PHASE( 5) V(V7_V4_V1,1)\ -PHASE( 6) V(V8_V5_V2,1)\ -PHASE( 7) V(V9_V6_V3,1)\ -PHASE( 8) V(V7_V4_V1,2)\ -PHASE( 9) V(V8_V5_V2,2)\ -PHASE(10) V(V9_V6_V3,2)\ -PHASE(11) V(V7_V4_V1,3)\ -PHASE(12) V(V8_V5_V2,3)\ -PHASE(13) V(V9_V6_V3,3)\ -PHASE(14) V(V7_V4_V1,4)\ -PHASE(15) V(V8_V5_V2,4)\ -PHASE(16) V(V9_V6_V3,4)\ -PHASE(17) V(V1,0) V(V7,5)V(V4,6)\ -PHASE(18) V(V8_V5_V2,5)\ -PHASE(19) V(V9_V6_V3,5)\ -PHASE(20) V(V1,1) V(V7,6)V(V4,7)\ -PHASE(21) V(V8,6)V(V5,7) V(V2,0) /* t_brr_next_addr order dependency */\ -PHASE(22) V(V3a,0) V(V9,6)V(V6,7) echo_22();\ -PHASE(23) V(V7,7) echo_23();\ -PHASE(24) V(V8,7) echo_24();\ -PHASE(25) V(V3b,0) V(V9,7) echo_25();\ -PHASE(26) echo_26();\ -PHASE(27) misc_27(); echo_27();\ -PHASE(28) misc_28(); echo_28();\ -PHASE(29) misc_29(); echo_29();\ -PHASE(30) misc_30();V(V3c,0) echo_30();\ -PHASE(31) V(V4,0) V(V1,2)\ - -#if !SPC_DSP_CUSTOM_RUN - -void Spc_Dsp::run( int clocks_remain ) -{ - require( clocks_remain > 0 ); - - int const phase = m.phase; - m.phase = (phase + clocks_remain) & 31; - switch ( phase ) - { - loop: - - #define PHASE( n ) if ( n && !--clocks_remain ) break; case n: - GEN_DSP_TIMING - #undef PHASE - - if ( --clocks_remain ) - goto loop; - } -} - -#endif //// Setup +void Spc_Dsp::apply_output_enables() +{ + for ( int i = 0; i < voice_count; i++ ) + update_voice_vol( i * 0x10 ); +} + void Spc_Dsp::init( void* ram_64k ) { ram = (byte*) ram_64k; - disable_surround( false ); + disable_surround( false ); // must be before mute_voices mute_voices( 0 ); set_output( NULL, 0 ); reset(); @@ -856,28 +668,25 @@ void Spc_Dsp::soft_reset() m.phase = 0; init_counter(); - - kon_check = false; } void Spc_Dsp::load( byte const new_regs [register_count] ) { - memcpy( regs, new_regs, register_count ); + memcpy( regs, new_regs, sizeof regs ); BLARGG_CLEAR( &m ); for ( int i = voice_count; --i >= 0; ) { - voice_t* v = &m.voices [i]; - v->brr_offset = 1; - v->vbit = 1 << i; - v->regs = ®s [i * 0x10]; + voice_t& v = m.voices [i]; + v.brr_offset = 1; + v.buf_pos = v.buf; } m.new_kon = REG(kon); - m.t_dir = REG(dir); - m.t_esa = REG(esa); soft_reset(); REG(flg) = new_regs [r_flg]; // soft_reset() overwrites this + + apply_output_enables(); } void Spc_Dsp::reset() @@ -892,6 +701,6 @@ void Spc_Dsp::reset() 0x75,0xF5,0x06,0x97,0x10,0xC3,0x24,0xBB,0x00,0x00,0x7B,0x7A,0xE0,0x60,0x12,0x0F, 0xF7,0x74,0x1C,0xE5,0x39,0x3D,0x73,0xC1,0x00,0x00,0x7A,0xB3,0xFF,0x4E,0x7B,0xFF }; - + load( initial_regs ); } diff --git a/bsnes/smp/snes_spc/Spc_Dsp.h b/bsnes/smp/snes_spc/Spc_Dsp.h index 95f136d7..0da18456 100755 --- a/bsnes/smp/snes_spc/Spc_Dsp.h +++ b/bsnes/smp/snes_spc/Spc_Dsp.h @@ -1,4 +1,4 @@ -// Highly accurate SNES SPC-700 DSP emulator +// Fast SNES SPC-700 DSP emulator (about 3x speed of accurate one) // snes_spc 0.9.5 #ifndef BLARGG_SPC_DSP_H @@ -8,16 +8,18 @@ BLARGG_NAMESPACE_BEGIN -extern "C" { typedef void (*dsp_copy_func_t)( unsigned char** io, void* state, size_t ); } - struct Spc_Dsp { public: typedef BOOST::uint8_t byte; // Setup - + // Initializes DSP and has it use the 64K RAM provided void init( void* ram_64k ); + + // Sets function that is called when output buffer is filled, or NULL for none + blargg_callback set_output_callback; + //void set_output_callback( void (*func)( void* user_data ), void* user_data ); // Sets destination for output samples. If begin is NULL, doesn't generate any. typedef short sample_t; @@ -26,52 +28,42 @@ public: // Current position in output buffer, or NULL if no buffer set sample_t* output_ptr() const; - // Sets function that is called when output buffer is filled, or NULL for none - blargg_callback set_output_callback; - //void set_output_callback( void (*func)( void* user_data ), void* user_data ); - -// Emulation + // Number of samples written to output buffer since last set, or 0 if no buffer set. + int sample_count() const; +// Emulation + // Resets DSP to power-on state void reset(); // Emulates pressing reset switch on SNES void soft_reset(); - // Reads/writes DSP registers. For accuracy, you must first call run() + // Reads/writes DSP registers. For accuracy, you must first call spc_run_dsp() // to catch the DSP up to present. int read ( int addr ) const; void write( int addr, int data ); // Runs DSP for specified number of clocks (~1024000 per second). Every 32 clocks - // a pair of samples is be generated. + // a pair of samples is generated. void run( int clock_count ); - + // Sound control - // Using these reduces emulation accuracy. - - // Mutes voices corresponding to non-zero bits in mask (issues repeated KOFF events). + // Mutes voices corresponding to non-zero bits in mask (overrides VxVOL with 0). + // Reduces emulation accuracy. enum { voice_count = 8 }; - void mute_voices( int mask ) { mute_mask = mask; } + void mute_voices( int mask ) { mute_mask = mask; } // If true, prevents channels and global volumes from being phase-negated void disable_surround( bool disable = true ); - + // State // Resets DSP and uses supplied values to initialize registers enum { register_count = 128 }; void load( byte const regs [register_count] ); - // Saves/loads exact emulator state - enum { state_size = 640 }; // maximum space needed when saving - typedef dsp_copy_func_t copy_func_t; - void copy_state( unsigned char** io, copy_func_t ); - - // Returns non-zero if new key-on events occurred since last call - bool check_kon(); - // DSP register addresses // Global registers @@ -109,21 +101,17 @@ public: struct voice_t { int buf [brr_buf_size*2];// decoded samples (twice the size to simplify wrap handling) - int buf_pos; // place in buffer where next samples will be decoded + int* buf_pos; // place in buffer where next samples will be decoded int interp_pos; // relative fractional position in sample (0x1000 = 1.0) int brr_addr; // address of current BRR block int brr_offset; // current decoding offset in BRR block - byte* regs; // pointer to voice's DSP registers - int vbit; // bitmask for voice: 0x01 for voice 0, 0x02 for voice 1, etc. int kon_delay; // KON delay/current setup phase env_mode_t env_mode; int env; // current envelope level int hidden_env; // used by GAIN mode 7, very obscure quirk - byte t_envx_out; + int volume [2]; // copy of volume from DSP registers, with surround disabled }; private: - enum { brr_block_size = 9 }; - // non-emulation state byte* ram; // 64K shared RAM between DSP and SMP int mute_mask; @@ -133,145 +121,81 @@ private: sample_t* output_end; sample_t* user_output_end; sample_t dummy_buf [2]; - bool kon_check; // set when a new KON occurs struct state_t { int every_other_sample; // toggles every sample int kon; // KON value when last checked int noise; - int counter; int echo_offset; // offset from ESA in echo buffer int echo_length; // number of bytes that echo_offset will stop at int phase; // next clock cycle to run (0-31) + unsigned counters [4]; - // Hidden registers also written to when main register is written to - int new_kon; - byte endx_buf; - byte envx_buf; - byte outx_buf; - - // Temporary state between clocks - - // read once per sample - int t_pmon; - int t_non; - int t_eon; - int t_dir; + int new_kon; int t_koff; - // read a few clocks ahead then used - int t_brr_next_addr; - int t_adsr0; - int t_brr_header; - int t_brr_byte; - int t_srcn; - int t_esa; - int t_echo_enabled; - - // internal state that is recalculated every sample - int t_dir_addr; - int t_pitch; - int t_output; - int t_looped; - int t_echo_ptr; - - // left/right sums - int t_main_out [2]; - int t_echo_out [2]; - int t_echo_in [2]; - - voice_t voices [voice_count]; - // Echo history keeps most recent 8 samples (twice the size to simplify wrap handling) int (*echo_hist_pos) [2]; // &echo_hist [0 to 7] int echo_hist [echo_hist_size * 2] [2]; + + unsigned* counter_select [32]; + voice_t voices [voice_count]; }; state_t m; byte regs [register_count]; void init_counter(); - void run_counters(); - unsigned read_counter( int rate ); - - int interpolate( voice_t const* v ); - void run_envelope( voice_t* const v ); - void decode_brr( voice_t* v ); - - void misc_27(); - void misc_28(); - void misc_29(); - void misc_30(); - - void voice_output( voice_t const* v, int ch ); - void voice_V1( voice_t* const ); - void voice_V2( voice_t* const ); - void voice_V3( voice_t* const ); - void voice_V3a( voice_t* const ); - void voice_V3b( voice_t* const ); - void voice_V3c( voice_t* const ); - void voice_V4( voice_t* const ); - void voice_V5( voice_t* const ); - void voice_V6( voice_t* const ); - void voice_V7( voice_t* const ); - void voice_V8( voice_t* const ); - void voice_V9( voice_t* const ); - void voice_V7_V4_V1( voice_t* const ); - void voice_V8_V5_V2( voice_t* const ); - void voice_V9_V6_V3( voice_t* const ); - - void echo_read( int ch ); - int echo_output( int ch ); - void echo_write( int ch ); - void echo_22(); - void echo_23(); - void echo_24(); - void echo_25(); - void echo_26(); - void echo_27(); - void echo_28(); - void echo_29(); - void echo_30(); - + void run_counter( int ); + void update_voice_vol( int addr ); void set_null_output(); void write_sample( int l, int r ); + void apply_output_enables(); }; -#include - inline int Spc_Dsp::read( int addr ) const { assert( (unsigned) addr < register_count ); - return regs [addr]; } +inline void Spc_Dsp::update_voice_vol( int addr ) +{ + int l = (int8_t) regs [addr + v_voll]; + int r = (int8_t) regs [addr + v_volr]; + + if ( l * r < surround_threshold ) + { + // signs differ, so negate those that are negative + l ^= l >> 7; + r ^= r >> 7; + } + + int index = addr >> 4; + voice_t& v = m.voices [index]; + int enabled = ~mute_mask >> index & 1; + v.volume [0] = l * enabled; + v.volume [1] = r * enabled; +} + inline void Spc_Dsp::write( int addr, int data ) { assert( (unsigned) addr < register_count ); regs [addr] = (byte) data; - switch ( addr & 0x0F ) + int low = addr & 0x0F; + if ( low < 0x2 ) // voice volumes + { + update_voice_vol( low ^ addr /* addr & 0xF0 */ ); + } + else if ( low == 0xC ) { - case v_envx: - m.envx_buf = (byte) data; - break; - - case v_outx: - m.outx_buf = (byte) data; - break; - - case 0x0C: if ( addr == r_kon ) m.new_kon = (byte) data; if ( addr == r_endx ) // always cleared, regardless of data written - { - m.endx_buf = 0; regs [r_endx] = 0; - } - break; } } @@ -280,40 +204,22 @@ inline void Spc_Dsp::disable_surround( bool disable ) surround_threshold = disable ? 0 : -0x4000; } -inline bool Spc_Dsp::check_kon() -{ - bool old = kon_check; - kon_check = 0; - return old; -} - inline Spc_Dsp::sample_t* Spc_Dsp::output_ptr() const { // Don't return pointer into dummy_buf return (output_ptr_ != dummy_buf ? output_ptr_ : user_output_end); } -class SPC_State_Copier { - Spc_Dsp::copy_func_t func; - unsigned char** buf; -public: - SPC_State_Copier( unsigned char** p, Spc_Dsp::copy_func_t f ) { func = f; buf = p; } - void copy( void* state, size_t size ); - int copy_int( int state, int size ); - void skip( int count ); - - // Reads uint8_t and then skips that many bytes. If writing, writes - // uint8_t of 0. This allows future expansion at this point, by writing - // non-zero and additional data. - void extra(); -}; - -#define SPC_COPY( type, state )\ -{\ - state = (BOOST::type) copier.copy_int( state, sizeof (BOOST::type) );\ - check( (BOOST::type) state == state );\ +inline int Spc_Dsp::sample_count() const +{ + sample_t* p = output_ptr(); + return (p ? p - output_begin : 0); } +#define SPC_NO_COPY_STATE_FUNCS 1 + +#define SPC_LESS_ACCURATE 1 + BLARGG_NAMESPACE_END #endif