Update to bsnes v107r4 beta release.

byuu says:

  - bsnes: added video filters from bsnes v082
  - bsnes: added ZSNES snow effect option when games paused or unloaded
    (no, I'm not joking)
  - bsnes: added 7-zip support (LZMA 19.00 SDK)

[Recent higan WIPs have also mentioned bsnes changes, although the higan code
no longer includes the bsnes code. These changes include:

  - higan, bsnes: added EXLOROM, EXLOROM-RAM, EXHIROM mappings
  - higan, bsnes: focus the viewport after leaving fullscreen exclusive
    mode
  - bsnes: re-added mightymo's cheat code database
  - bsnes: improved make install rules for the game and cheat code
    databases
  - bsnes: delayed construction of hiro::Window objects to properly show
    bsnes window icons

- Ed.]
This commit is contained in:
Tim Allen 2019-07-07 19:44:09 +10:00
parent becbca47d4
commit d87a0f633d
280 changed files with 120826 additions and 1521 deletions

87497
bsnes/Database/Cheat Codes.bml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -34,10 +34,12 @@ else
$(error "unsupported platform")
endif
objects := libco emulator
objects := libco emulator filter lzma
obj/libco.o: ../libco/libco.c
obj/emulator.o: emulator/emulator.cpp
obj/filter.o: filter/filter.cpp
obj/lzma.o: lzma/lzma.cpp
include sfc/GNUmakefile
include gb/GNUmakefile

View File

@ -31,7 +31,7 @@ using namespace nall;
namespace Emulator {
static const string Name = "bsnes";
static const string Version = "107.3";
static const string Version = "107.4";
static const string Author = "byuu";
static const string License = "GPLv3";
static const string Website = "https://byuu.org/";

View File

@ -16,7 +16,7 @@ struct Platform {
virtual auto path(uint id) -> string { return ""; }
virtual auto open(uint id, string name, vfs::file::mode mode, bool required = false) -> vfs::shared::file { return {}; }
virtual auto load(uint id, string name, string type, vector<string> options = {}) -> Load { return {}; }
virtual auto videoFrame(const uint32* data, uint pitch, uint width, uint height) -> void {}
virtual auto videoFrame(const uint16* data, uint pitch, uint width, uint height) -> void {}
virtual auto audioFrame(const double* samples, uint channels) -> void {}
virtual auto inputPoll(uint port, uint device, uint input) -> int16 { return 0; }
virtual auto inputRumble(uint port, uint device, uint input, bool enable) -> void {}

View File

@ -20,8 +20,6 @@ auto Video::reset(Interface* interface) -> void {
width = 0;
height = 0;
effects.colorBleed = false;
effects.interframeBlending = false;
effects.rotateLeft = false;
}
auto Video::setPalette() -> void {
@ -85,14 +83,6 @@ auto Video::setEffect(Effect effect, const any& value) -> void {
if(effect == Effect::ColorBleed && value.is<bool>()) {
effects.colorBleed = value.get<bool>();
}
if(effect == Effect::InterframeBlending && value.is<bool>()) {
effects.interframeBlending = value.get<bool>();
}
if(effect == Effect::RotateLeft && value.is<bool>()) {
effects.rotateLeft = value.get<bool>();
}
}
auto Video::createSprite(uint width, uint height) -> shared_pointer<Sprite> {
@ -128,18 +118,9 @@ auto Video::refresh(uint32* input, uint pitch, uint width, uint height) -> void
auto source = input + y * pitch;
auto target = output + y * width;
if(!effects.interframeBlending) {
for(uint x : range(width)) {
auto color = palette[*source++];
*target++ = color;
}
} else {
uint32 mask = depth == 30 ? 0x40100401 : 0x01010101;
for(uint x : range(width)) {
auto a = *target;
auto b = palette[*source++];
*target++ = (a + b - ((a ^ b) & mask)) >> 1;
}
for(uint x : range(width)) {
auto color = palette[*source++];
*target++ = color;
}
}
@ -155,18 +136,6 @@ auto Video::refresh(uint32* input, uint pitch, uint width, uint height) -> void
}
}
if(effects.rotateLeft) {
for(uint y : range(height)) {
auto source = buffer + y * width;
for(uint x : range(width)) {
auto target = rotate + (width - 1 - x) * height + y;
*target = *source++;
}
}
output = rotate;
swap(width, height);
}
for(auto& sprite : sprites) {
if(!sprite->visible) continue;
@ -185,7 +154,7 @@ auto Video::refresh(uint32* input, uint pitch, uint width, uint height) -> void
}
}
platform->videoFrame(output, width * sizeof(uint32), width, height);
platform->videoFrame((const uint16*)output, width * sizeof(uint32), width, height);
}
}

View File

@ -9,8 +9,6 @@ struct Sprite;
struct Video {
enum class Effect : uint {
ColorBleed,
InterframeBlending,
RotateLeft,
};
~Video();
@ -48,8 +46,6 @@ private:
struct Effects {
bool colorBleed = false;
bool interframeBlending = false;
bool rotateLeft = false;
} effects;
friend class Sprite;

25
bsnes/filter/2xsai.cpp Normal file
View File

@ -0,0 +1,25 @@
namespace Filter::_2xSaI {
auto size(uint& width, uint& height) -> void {
width *= 2;
height *= 2;
}
uint32_t temp[512 * 480];
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
for(unsigned y = 0; y < height; y++) {
const uint16_t *line_in = (const uint16_t*)(((const uint8_t*)input) + pitch * y);
uint32_t *line_out = temp + y * width;
for(unsigned x = 0; x < width; x++) {
line_out[x] = colortable[line_in[x]];
}
}
_2xSaI32((unsigned char*)temp, width * sizeof(uint32_t), 0, (unsigned char*)output, outpitch, width, height);
}
}

25
bsnes/filter/filter.cpp Normal file
View File

@ -0,0 +1,25 @@
#include <emulator/emulator.hpp>
#undef register
#define register
#include "sai/sai.cpp"
uint32_t* colortable;
#include "snes_ntsc/snes_ntsc.h"
#include "snes_ntsc/snes_ntsc.c"
#include "none.cpp"
#include "scanlines-light.cpp"
#include "scanlines-dark.cpp"
#include "scanlines-black.cpp"
#include "pixellate2x.cpp"
#include "scale2x.cpp"
#include "2xsai.cpp"
#include "super-2xsai.cpp"
#include "super-eagle.cpp"
#include "lq2x.cpp"
#include "hq2x.cpp"
#include "ntsc-rf.cpp"
#include "ntsc-composite.cpp"
#include "ntsc-svideo.cpp"
#include "ntsc-rgb.cpp"

123
bsnes/filter/filter.hpp Normal file
View File

@ -0,0 +1,123 @@
#pragma once
#include <emulator/emulator.hpp>
namespace Filter::None {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::ScanlinesLight {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::ScanlinesDark {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::ScanlinesBlack {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::Pixellate2x {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::Scale2x {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::_2xSaI {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::Super2xSaI {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::SuperEagle {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::LQ2x {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::HQ2x {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::NTSC_RF {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::NTSC_Composite {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::NTSC_SVideo {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}
namespace Filter::NTSC_RGB {
auto size(uint& width, uint& height) -> void;
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void;
}

200
bsnes/filter/hq2x.cpp Normal file
View File

@ -0,0 +1,200 @@
namespace Filter::HQ2x {
enum {
diff_offset = (0x440 << 21) + (0x207 << 11) + 0x407,
diff_mask = (0x380 << 21) + (0x1f0 << 11) + 0x3f0,
};
uint32_t *yuvTable;
uint8_t rotate[256];
const uint8_t hqTable[256] = {
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 15, 12, 5, 3, 17, 13,
4, 4, 6, 18, 4, 4, 6, 18, 5, 3, 12, 12, 5, 3, 1, 12,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 17, 13, 5, 3, 16, 14,
4, 4, 6, 18, 4, 4, 6, 18, 5, 3, 16, 12, 5, 3, 1, 14,
4, 4, 6, 2, 4, 4, 6, 2, 5, 19, 12, 12, 5, 19, 16, 12,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 16, 12,
4, 4, 6, 2, 4, 4, 6, 2, 5, 19, 1, 12, 5, 19, 1, 14,
4, 4, 6, 2, 4, 4, 6, 18, 5, 3, 16, 12, 5, 19, 1, 14,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 15, 12, 5, 3, 17, 13,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 16, 12,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 17, 13, 5, 3, 16, 14,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 13, 5, 3, 1, 14,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 16, 13,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 1, 12,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 1, 14,
4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 1, 12, 5, 3, 1, 14,
};
static void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
yuvTable = new uint32_t[32768];
for(unsigned i = 0; i < 32768; i++) {
uint8_t R = (i >> 0) & 31;
uint8_t G = (i >> 5) & 31;
uint8_t B = (i >> 10) & 31;
//bgr555->bgr888
double r = (R << 3) | (R >> 2);
double g = (G << 3) | (G >> 2);
double b = (B << 3) | (B >> 2);
//bgr888->yuv
double y = (r + g + b) * (0.25f * (63.5f / 48.0f));
double u = ((r - b) * 0.25f + 128.0f) * (7.5f / 7.0f);
double v = ((g * 2.0f - r - b) * 0.125f + 128.0f) * (7.5f / 6.0f);
yuvTable[i] = ((unsigned)y << 21) + ((unsigned)u << 11) + ((unsigned)v);
}
//counter-clockwise rotation table; one revolution:
//123 369 12346789
//4.6 -> 2.8 =
//789 147 36928147
for(unsigned n = 0; n < 256; n++) {
rotate[n] = ((n >> 2) & 0x11) | ((n << 2) & 0x88)
| ((n & 0x01) << 5) | ((n & 0x08) << 3)
| ((n & 0x10) >> 3) | ((n & 0x80) >> 5);
}
}
static void terminate() {
delete[] yuvTable;
}
static bool same(uint16_t x, uint16_t y) {
return !((yuvTable[x] - yuvTable[y] + diff_offset) & diff_mask);
}
static bool diff(uint32_t x, uint16_t y) {
return ((x - yuvTable[y]) & diff_mask);
}
static void grow(uint32_t &n) { n |= n << 16; n &= 0x03e07c1f; }
static uint16_t pack(uint32_t n) { n &= 0x03e07c1f; return n | (n >> 16); }
static uint16_t blend1(uint32_t A, uint32_t B) {
grow(A); grow(B);
return pack((A * 3 + B) >> 2);
}
static uint16_t blend2(uint32_t A, uint32_t B, uint32_t C) {
grow(A); grow(B); grow(C);
return pack((A * 2 + B + C) >> 2);
}
static uint16_t blend3(uint32_t A, uint32_t B, uint32_t C) {
grow(A); grow(B); grow(C);
return pack((A * 5 + B * 2 + C) >> 3);
}
static uint16_t blend4(uint32_t A, uint32_t B, uint32_t C) {
grow(A); grow(B); grow(C);
return pack((A * 6 + B + C) >> 3);
}
static uint16_t blend5(uint32_t A, uint32_t B, uint32_t C) {
grow(A); grow(B); grow(C);
return pack((A * 2 + (B + C) * 3) >> 3);
}
static uint16_t blend6(uint32_t A, uint32_t B, uint32_t C) {
grow(A); grow(B); grow(C);
return pack((A * 14 + B + C) >> 4);
}
static uint16_t blend(unsigned rule, uint16_t E, uint16_t A, uint16_t B, uint16_t D, uint16_t F, uint16_t H) {
switch(rule) { default:
case 0: return E;
case 1: return blend1(E, A);
case 2: return blend1(E, D);
case 3: return blend1(E, B);
case 4: return blend2(E, D, B);
case 5: return blend2(E, A, B);
case 6: return blend2(E, A, D);
case 7: return blend3(E, B, D);
case 8: return blend3(E, D, B);
case 9: return blend4(E, D, B);
case 10: return blend5(E, D, B);
case 11: return blend6(E, D, B);
case 12: return same(B, D) ? blend2(E, D, B) : E;
case 13: return same(B, D) ? blend5(E, D, B) : E;
case 14: return same(B, D) ? blend6(E, D, B) : E;
case 15: return same(B, D) ? blend2(E, D, B) : blend1(E, A);
case 16: return same(B, D) ? blend4(E, D, B) : blend1(E, A);
case 17: return same(B, D) ? blend5(E, D, B) : blend1(E, A);
case 18: return same(B, F) ? blend3(E, B, D) : blend1(E, D);
case 19: return same(D, H) ? blend3(E, D, B) : blend1(E, B);
}
}
auto size(uint& width, uint& height) -> void {
width *= 2;
height *= 2;
}
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
pitch >>= 1;
outpitch >>= 2;
for(uint y = 0; y < height; y++) {
const uint16_t* in = input + y * pitch;
uint32_t* out0 = output + y * outpitch * 2;
uint32_t* out1 = output + y * outpitch * 2 + outpitch;
int prevline = (y == 0 ? 0 : pitch);
int nextline = (y == height - 1 ? 0 : pitch);
in++;
*out0++ = 0; *out0++ = 0;
*out1++ = 0; *out1++ = 0;
for(unsigned x = 1; x < width - 1; x++) {
uint16_t A = *(in - prevline - 1);
uint16_t B = *(in - prevline + 0);
uint16_t C = *(in - prevline + 1);
uint16_t D = *(in - 1);
uint16_t E = *(in + 0);
uint16_t F = *(in + 1);
uint16_t G = *(in + nextline - 1);
uint16_t H = *(in + nextline + 0);
uint16_t I = *(in + nextline + 1);
uint32_t e = yuvTable[E] + diff_offset;
uint8_t pattern;
pattern = diff(e, A) << 0;
pattern |= diff(e, B) << 1;
pattern |= diff(e, C) << 2;
pattern |= diff(e, D) << 3;
pattern |= diff(e, F) << 4;
pattern |= diff(e, G) << 5;
pattern |= diff(e, H) << 6;
pattern |= diff(e, I) << 7;
*(out0 + 0) = colortable[blend(hqTable[pattern], E, A, B, D, F, H)]; pattern = rotate[pattern];
*(out0 + 1) = colortable[blend(hqTable[pattern], E, C, F, B, H, D)]; pattern = rotate[pattern];
*(out1 + 1) = colortable[blend(hqTable[pattern], E, I, H, F, D, B)]; pattern = rotate[pattern];
*(out1 + 0) = colortable[blend(hqTable[pattern], E, G, D, H, B, F)];
in++;
out0 += 2;
out1 += 2;
}
in++;
*out0++ = 0; *out0++ = 0;
*out1++ = 0; *out1++ = 0;
}
}
}

46
bsnes/filter/lq2x.cpp Normal file
View File

@ -0,0 +1,46 @@
namespace Filter::LQ2x {
auto size(uint& width, uint& height) -> void {
width *= 2;
height *= 2;
}
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
pitch >>= 1;
outpitch >>= 2;
for(uint y = 0; y < height; y++) {
const uint16_t* in = input + y * pitch;
uint32_t* out0 = output + y * outpitch * 2;
uint32_t* out1 = output + y * outpitch * 2 + outpitch;
int prevline = (y == 0 ? 0 : pitch);
int nextline = (y == height - 1 ? 0 : pitch);
for(uint x = 0; x < width; x++) {
uint16_t A = *(in - prevline);
uint16_t B = (x > 0) ? *(in - 1) : *in;
uint16_t C = *in;
uint16_t D = (x < width - 1) ? *(in + 1) : *in;
uint16_t E = *(in++ + nextline);
uint32_t c = colortable[C];
if(A != E && B != D) {
*out0++ = (A == B ? colortable[C + A - ((C ^ A) & 0x0421) >> 1] : c);
*out0++ = (A == D ? colortable[C + A - ((C ^ A) & 0x0421) >> 1] : c);
*out1++ = (E == B ? colortable[C + E - ((C ^ E) & 0x0421) >> 1] : c);
*out1++ = (E == D ? colortable[C + E - ((C ^ E) & 0x0421) >> 1] : c);
} else {
*out0++ = c;
*out0++ = c;
*out1++ = c;
*out1++ = c;
}
}
}
}
}

24
bsnes/filter/none.cpp Normal file
View File

@ -0,0 +1,24 @@
namespace Filter::None {
auto size(uint& width, uint& height) -> void {
width = width;
height = height;
}
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
pitch >>= 1;
outpitch >>= 2;
for(uint y = 0; y < height; y++) {
const uint16_t* in = input + y * pitch;
uint32_t* out = output + y * outpitch;
for(uint x = 0; x < width; x++) {
*out++ = colortable[*in++];
}
}
}
}

View File

@ -0,0 +1,50 @@
namespace Filter::NTSC_Composite {
struct snes_ntsc_t *ntsc;
snes_ntsc_setup_t setup;
int burst;
int burst_toggle;
void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
ntsc = (snes_ntsc_t*)malloc(sizeof *ntsc);
setup = snes_ntsc_composite;
setup.merge_fields = 1;
snes_ntsc_init(ntsc, &setup);
burst = 0;
burst_toggle = (setup.merge_fields ? 0 : 1);
}
void terminate() {
if(ntsc) free(ntsc);
}
auto size(uint& width, uint& height) -> void {
width = SNES_NTSC_OUT_WIDTH(256);
height = height;
}
auto render(
uint32_t* colortable_, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
colortable = colortable_;
pitch >>= 1;
outpitch >>= 2;
if(width <= 256) {
snes_ntsc_blit (ntsc, input, pitch, burst, width, height, output, outpitch << 2);
} else {
snes_ntsc_blit_hires(ntsc, input, pitch, burst, width, height, output, outpitch << 2);
}
burst ^= burst_toggle;
}
}

50
bsnes/filter/ntsc-rf.cpp Normal file
View File

@ -0,0 +1,50 @@
namespace Filter::NTSC_RF {
struct snes_ntsc_t *ntsc;
snes_ntsc_setup_t setup;
int burst;
int burst_toggle;
void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
ntsc = (snes_ntsc_t*)malloc(sizeof *ntsc);
setup = snes_ntsc_composite;
setup.merge_fields = 0;
snes_ntsc_init(ntsc, &setup);
burst = 0;
burst_toggle = (setup.merge_fields ? 0 : 1);
}
void terminate() {
if(ntsc) free(ntsc);
}
auto size(uint& width, uint& height) -> void {
width = SNES_NTSC_OUT_WIDTH(256);
height = height;
}
auto render(
uint32_t* colortable_, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
colortable = colortable_;
pitch >>= 1;
outpitch >>= 2;
if(width <= 256) {
snes_ntsc_blit (ntsc, input, pitch, burst, width, height, output, outpitch << 2);
} else {
snes_ntsc_blit_hires(ntsc, input, pitch, burst, width, height, output, outpitch << 2);
}
burst ^= burst_toggle;
}
}

50
bsnes/filter/ntsc-rgb.cpp Normal file
View File

@ -0,0 +1,50 @@
namespace Filter::NTSC_RGB {
struct snes_ntsc_t *ntsc;
snes_ntsc_setup_t setup;
int burst;
int burst_toggle;
void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
ntsc = (snes_ntsc_t*)malloc(sizeof *ntsc);
setup = snes_ntsc_rgb;
setup.merge_fields = 1;
snes_ntsc_init(ntsc, &setup);
burst = 0;
burst_toggle = (setup.merge_fields ? 0 : 1);
}
void terminate() {
if(ntsc) free(ntsc);
}
auto size(uint& width, uint& height) -> void {
width = SNES_NTSC_OUT_WIDTH(256);
height = height;
}
auto render(
uint32_t* colortable_, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
colortable = colortable_;
pitch >>= 1;
outpitch >>= 2;
if(width <= 256) {
snes_ntsc_blit (ntsc, input, pitch, burst, width, height, output, outpitch << 2);
} else {
snes_ntsc_blit_hires(ntsc, input, pitch, burst, width, height, output, outpitch << 2);
}
burst ^= burst_toggle;
}
}

View File

@ -0,0 +1,50 @@
namespace Filter::NTSC_SVideo {
struct snes_ntsc_t *ntsc;
snes_ntsc_setup_t setup;
int burst;
int burst_toggle;
void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
ntsc = (snes_ntsc_t*)malloc(sizeof *ntsc);
setup = snes_ntsc_svideo;
setup.merge_fields = 1;
snes_ntsc_init(ntsc, &setup);
burst = 0;
burst_toggle = (setup.merge_fields ? 0 : 1);
}
void terminate() {
if(ntsc) free(ntsc);
}
auto size(uint& width, uint& height) -> void {
width = SNES_NTSC_OUT_WIDTH(256);
height = height;
}
auto render(
uint32_t* colortable_, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
colortable = colortable_;
pitch >>= 1;
outpitch >>= 2;
if(width <= 256) {
snes_ntsc_blit (ntsc, input, pitch, burst, width, height, output, outpitch << 2);
} else {
snes_ntsc_blit_hires(ntsc, input, pitch, burst, width, height, output, outpitch << 2);
}
burst ^= burst_toggle;
}
}

View File

@ -0,0 +1,40 @@
namespace Filter::Pixellate2x {
auto size(uint& width, uint& height) -> void {
width = (width <= 256) ? width * 2 : width;
height = (height <= 240) ? height * 2 : height;
}
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
pitch >>= 1;
outpitch >>= 2;
uint32_t *out0 = output;
uint32_t *out1 = output + outpitch;
for(unsigned y = 0; y < height; y++) {
for(unsigned x = 0; x < width; x++) {
uint32_t p = colortable[*input++];
*out0++ = p;
if(height <= 240) *out1++ = p;
if(width > 256) continue;
*out0++ = p;
if(height <= 240) *out1++ = p;
}
input += pitch - width;
if(height <= 240) {
out0 += outpitch + outpitch - 512;
out1 += outpitch + outpitch - 512;
} else {
out0 += outpitch - 512;
}
}
}
}

1175
bsnes/filter/sai/sai.cpp Normal file

File diff suppressed because it is too large Load Diff

46
bsnes/filter/scale2x.cpp Normal file
View File

@ -0,0 +1,46 @@
namespace Filter::Scale2x {
auto size(uint& width, uint& height) -> void {
width *= 2;
height *= 2;
}
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
pitch >>= 1;
outpitch >>= 2;
for(uint y = 0; y < height; y++) {
const uint16_t* in = input + y * pitch;
uint32_t* out0 = output + y * outpitch * 2;
uint32_t* out1 = output + y * outpitch * 2 + outpitch;
int prevline = (y == 0 ? 0 : pitch);
int nextline = (y == height - 1 ? 0 : pitch);
for(unsigned x = 0; x < width; x++) {
uint16_t A = *(in - prevline);
uint16_t B = (x > 0) ? *(in - 1) : *in;
uint16_t C = *in;
uint16_t D = (x < width - 1) ? *(in + 1) : *in;
uint16_t E = *(in++ + nextline);
uint32_t c = colortable[C];
if(A != E && B != D) {
*out0++ = (A == B ? colortable[A] : c);
*out0++ = (A == D ? colortable[A] : c);
*out1++ = (E == B ? colortable[E] : c);
*out1++ = (E == D ? colortable[E] : c);
} else {
*out0++ = c;
*out0++ = c;
*out1++ = c;
*out1++ = c;
}
}
}
}
}

View File

@ -0,0 +1,28 @@
namespace Filter::ScanlinesBlack {
auto size(uint& width, uint& height) -> void {
width = width;
height = height * 2;
}
auto render(
uint32_t* palette, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
pitch >>= 1;
outpitch >>= 2;
for(unsigned y = 0; y < height; y++) {
const uint16_t *in = input + y * pitch;
uint32_t *out0 = output + y * outpitch * 2;
uint32_t *out1 = output + y * outpitch * 2 + outpitch;
for(unsigned x = 0; x < width; x++) {
uint16_t color = *in++;
*out0++ = palette[color];
*out1++ = 0;
}
}
}
}

View File

@ -0,0 +1,48 @@
namespace Filter::ScanlinesDark {
uint16_t adjust[32768];
void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
for(unsigned i = 0; i < 32768; i++) {
uint8_t r = (i >> 10) & 31;
uint8_t g = (i >> 5) & 31;
uint8_t b = (i >> 0) & 31;
r *= 0.333;
g *= 0.333;
b *= 0.333;
adjust[i] = (r << 10) + (g << 5) + (b << 0);
}
}
auto size(uint& width, uint& height) -> void {
width = width;
height = height * 2;
}
auto render(
uint32_t* palette, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
pitch >>= 1;
outpitch >>= 2;
for(unsigned y = 0; y < height; y++) {
const uint16_t *in = input + y * pitch;
uint32_t *out0 = output + y * outpitch * 2;
uint32_t *out1 = output + y * outpitch * 2 + outpitch;
for(unsigned x = 0; x < width; x++) {
uint16_t color = *in++;
*out0++ = palette[color];
*out1++ = palette[adjust[color]];
}
}
}
}

View File

@ -0,0 +1,48 @@
namespace Filter::ScanlinesLight {
uint16_t adjust[32768];
void initialize() {
static bool initialized = false;
if(initialized == true) return;
initialized = true;
for(unsigned i = 0; i < 32768; i++) {
uint8_t r = (i >> 10) & 31;
uint8_t g = (i >> 5) & 31;
uint8_t b = (i >> 0) & 31;
r *= 0.666;
g *= 0.666;
b *= 0.666;
adjust[i] = (r << 10) + (g << 5) + (b << 0);
}
}
auto size(uint& width, uint& height) -> void {
width = width;
height = height * 2;
}
auto render(
uint32_t* palette, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
initialize();
pitch >>= 1;
outpitch >>= 2;
for(unsigned y = 0; y < height; y++) {
const uint16_t *in = input + y * pitch;
uint32_t *out0 = output + y * outpitch * 2;
uint32_t *out1 = output + y * outpitch * 2 + outpitch;
for(unsigned x = 0; x < width; x++) {
uint16_t color = *in++;
*out0++ = palette[color];
*out1++ = palette[adjust[color]];
}
}
}
}

View File

@ -0,0 +1,251 @@
/* snes_ntsc 0.2.2. http://www.slack.net/~ant/ */
#include "snes_ntsc.h"
/* Copyright (C) 2006-2007 Shay Green. This module is free software; you
can redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. This
module is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
details. You should have received a copy of the GNU Lesser General Public
License along with this module; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
snes_ntsc_setup_t const snes_ntsc_monochrome = { 0,-1, 0, 0,.2, 0,.2,-.2,-.2,-1, 1, 0, 0 };
snes_ntsc_setup_t const snes_ntsc_composite = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 };
snes_ntsc_setup_t const snes_ntsc_svideo = { 0, 0, 0, 0,.2, 0,.2, -1, -1, 0, 1, 0, 0 };
snes_ntsc_setup_t const snes_ntsc_rgb = { 0, 0, 0, 0,.2, 0,.7, -1, -1,-1, 1, 0, 0 };
#define alignment_count 3
#define burst_count 3
#define rescale_in 8
#define rescale_out 7
#define artifacts_mid 1.0f
#define fringing_mid 1.0f
#define std_decoder_hue 0
#define rgb_bits 7 /* half normal range to allow for doubled hires pixels */
#define gamma_size 32
#include "snes_ntsc_impl.h"
/* 3 input pixels -> 8 composite samples */
pixel_info_t const snes_ntsc_pixels [alignment_count] = {
{ PIXEL_OFFSET( -4, -9 ), { 1, 1, .6667f, 0 } },
{ PIXEL_OFFSET( -2, -7 ), { .3333f, 1, 1, .3333f } },
{ PIXEL_OFFSET( 0, -5 ), { 0, .6667f, 1, 1 } },
};
static void merge_kernel_fields( snes_ntsc_rgb_t* io )
{
int n;
for ( n = burst_size; n; --n )
{
snes_ntsc_rgb_t p0 = io [burst_size * 0] + rgb_bias;
snes_ntsc_rgb_t p1 = io [burst_size * 1] + rgb_bias;
snes_ntsc_rgb_t p2 = io [burst_size * 2] + rgb_bias;
/* merge colors without losing precision */
io [burst_size * 0] =
((p0 + p1 - ((p0 ^ p1) & snes_ntsc_rgb_builder)) >> 1) - rgb_bias;
io [burst_size * 1] =
((p1 + p2 - ((p1 ^ p2) & snes_ntsc_rgb_builder)) >> 1) - rgb_bias;
io [burst_size * 2] =
((p2 + p0 - ((p2 ^ p0) & snes_ntsc_rgb_builder)) >> 1) - rgb_bias;
++io;
}
}
static void correct_errors( snes_ntsc_rgb_t color, snes_ntsc_rgb_t* out )
{
int n;
for ( n = burst_count; n; --n )
{
unsigned i;
for ( i = 0; i < rgb_kernel_size / 2; i++ )
{
snes_ntsc_rgb_t error = color -
out [i ] - out [(i+12)%14+14] - out [(i+10)%14+28] -
out [i + 7] - out [i + 5 +14] - out [i + 3 +28];
DISTRIBUTE_ERROR( i+3+28, i+5+14, i+7 );
}
out += alignment_count * rgb_kernel_size;
}
}
void snes_ntsc_init( snes_ntsc_t* ntsc, snes_ntsc_setup_t const* setup )
{
int merge_fields;
int entry;
init_t impl;
if ( !setup )
setup = &snes_ntsc_composite;
init( &impl, setup );
merge_fields = setup->merge_fields;
if ( setup->artifacts <= -1 && setup->fringing <= -1 )
merge_fields = 1;
for ( entry = 0; entry < snes_ntsc_palette_size; entry++ )
{
/* Reduce number of significant bits of source color. Clearing the
low bits of R and B were least notictable. Modifying green was too
noticeable. */
int ir = entry >> 8 & 0x1E;
int ig = entry >> 4 & 0x1F;
int ib = entry << 1 & 0x1E;
#if SNES_NTSC_BSNES_COLORTBL
if ( setup->bsnes_colortbl )
{
int bgr15 = (ib << 10) | (ig << 5) | ir;
unsigned long rgb16 = setup->bsnes_colortbl [bgr15];
ir = rgb16 >> 11 & 0x1E;
ig = rgb16 >> 6 & 0x1F;
ib = rgb16 & 0x1E;
}
#endif
{
float rr = impl.to_float [ir];
float gg = impl.to_float [ig];
float bb = impl.to_float [ib];
float y, i, q = RGB_TO_YIQ( rr, gg, bb, y, i );
int r, g, b = YIQ_TO_RGB( y, i, q, impl.to_rgb, int, r, g );
snes_ntsc_rgb_t rgb = PACK_RGB( r, g, b );
snes_ntsc_rgb_t* out = ntsc->table [entry];
gen_kernel( &impl, y, i, q, out );
if ( merge_fields )
merge_kernel_fields( out );
correct_errors( rgb, out );
}
}
}
#ifndef SNES_NTSC_NO_BLITTERS
void snes_ntsc_blit( snes_ntsc_t const* ntsc, SNES_NTSC_IN_T const* input, long in_row_width,
int burst_phase, int in_width, int in_height, void* rgb_out, long out_pitch )
{
int chunk_count = (in_width - 1) / snes_ntsc_in_chunk;
for ( ; in_height; --in_height )
{
SNES_NTSC_IN_T const* line_in = input;
SNES_NTSC_BEGIN_ROW( ntsc, burst_phase,
snes_ntsc_black, snes_ntsc_black, SNES_NTSC_ADJ_IN( *line_in ) );
snes_ntsc_out_t* restrict line_out = (snes_ntsc_out_t*) rgb_out;
int n;
++line_in;
for ( n = chunk_count; n; --n )
{
/* order of input and output pixels must not be altered */
SNES_NTSC_COLOR_IN( 0, SNES_NTSC_ADJ_IN( line_in [0] ) );
SNES_NTSC_RGB_OUT( 0, line_out [0], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 1, line_out [1], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 1, SNES_NTSC_ADJ_IN( line_in [1] ) );
SNES_NTSC_RGB_OUT( 2, line_out [2], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 3, line_out [3], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 2, SNES_NTSC_ADJ_IN( line_in [2] ) );
SNES_NTSC_RGB_OUT( 4, line_out [4], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 5, line_out [5], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 6, line_out [6], SNES_NTSC_OUT_DEPTH );
line_in += 3;
line_out += 7;
}
/* finish final pixels */
SNES_NTSC_COLOR_IN( 0, snes_ntsc_black );
SNES_NTSC_RGB_OUT( 0, line_out [0], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 1, line_out [1], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 1, snes_ntsc_black );
SNES_NTSC_RGB_OUT( 2, line_out [2], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 3, line_out [3], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 2, snes_ntsc_black );
SNES_NTSC_RGB_OUT( 4, line_out [4], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 5, line_out [5], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_RGB_OUT( 6, line_out [6], SNES_NTSC_OUT_DEPTH );
burst_phase = (burst_phase + 1) % snes_ntsc_burst_count;
input += in_row_width;
rgb_out = (char*) rgb_out + out_pitch;
}
}
void snes_ntsc_blit_hires( snes_ntsc_t const* ntsc, SNES_NTSC_IN_T const* input, long in_row_width,
int burst_phase, int in_width, int in_height, void* rgb_out, long out_pitch )
{
int chunk_count = (in_width - 2) / (snes_ntsc_in_chunk * 2);
for ( ; in_height; --in_height )
{
SNES_NTSC_IN_T const* line_in = input;
SNES_NTSC_HIRES_ROW( ntsc, burst_phase,
snes_ntsc_black, snes_ntsc_black, snes_ntsc_black,
SNES_NTSC_ADJ_IN( line_in [0] ),
SNES_NTSC_ADJ_IN( line_in [1] ) );
snes_ntsc_out_t* restrict line_out = (snes_ntsc_out_t*) rgb_out;
int n;
line_in += 2;
for ( n = chunk_count; n; --n )
{
/* twice as many input pixels per chunk */
SNES_NTSC_COLOR_IN( 0, SNES_NTSC_ADJ_IN( line_in [0] ) );
SNES_NTSC_HIRES_OUT( 0, line_out [0], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 1, SNES_NTSC_ADJ_IN( line_in [1] ) );
SNES_NTSC_HIRES_OUT( 1, line_out [1], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 2, SNES_NTSC_ADJ_IN( line_in [2] ) );
SNES_NTSC_HIRES_OUT( 2, line_out [2], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 3, SNES_NTSC_ADJ_IN( line_in [3] ) );
SNES_NTSC_HIRES_OUT( 3, line_out [3], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 4, SNES_NTSC_ADJ_IN( line_in [4] ) );
SNES_NTSC_HIRES_OUT( 4, line_out [4], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 5, SNES_NTSC_ADJ_IN( line_in [5] ) );
SNES_NTSC_HIRES_OUT( 5, line_out [5], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_HIRES_OUT( 6, line_out [6], SNES_NTSC_OUT_DEPTH );
line_in += 6;
line_out += 7;
}
SNES_NTSC_COLOR_IN( 0, snes_ntsc_black );
SNES_NTSC_HIRES_OUT( 0, line_out [0], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 1, snes_ntsc_black );
SNES_NTSC_HIRES_OUT( 1, line_out [1], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 2, snes_ntsc_black );
SNES_NTSC_HIRES_OUT( 2, line_out [2], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 3, snes_ntsc_black );
SNES_NTSC_HIRES_OUT( 3, line_out [3], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 4, snes_ntsc_black );
SNES_NTSC_HIRES_OUT( 4, line_out [4], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_COLOR_IN( 5, snes_ntsc_black );
SNES_NTSC_HIRES_OUT( 5, line_out [5], SNES_NTSC_OUT_DEPTH );
SNES_NTSC_HIRES_OUT( 6, line_out [6], SNES_NTSC_OUT_DEPTH );
burst_phase = (burst_phase + 1) % snes_ntsc_burst_count;
input += in_row_width;
rgb_out = (char*) rgb_out + out_pitch;
}
}
#endif

View File

@ -0,0 +1,228 @@
/* SNES NTSC video filter */
/* snes_ntsc 0.2.2 */
#ifndef SNES_NTSC_H
#define SNES_NTSC_H
#include "snes_ntsc_config.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Image parameters, ranging from -1.0 to 1.0. Actual internal values shown
in parenthesis and should remain fairly stable in future versions. */
typedef struct snes_ntsc_setup_t
{
/* Basic parameters */
double hue; /* -1 = -180 degrees +1 = +180 degrees */
double saturation; /* -1 = grayscale (0.0) +1 = oversaturated colors (2.0) */
double contrast; /* -1 = dark (0.5) +1 = light (1.5) */
double brightness; /* -1 = dark (0.5) +1 = light (1.5) */
double sharpness; /* edge contrast enhancement/blurring */
/* Advanced parameters */
double gamma; /* -1 = dark (1.5) +1 = light (0.5) */
double resolution; /* image resolution */
double artifacts; /* artifacts caused by color changes */
double fringing; /* color artifacts caused by brightness changes */
double bleed; /* color bleed (color resolution reduction) */
int merge_fields; /* if 1, merges even and odd fields together to reduce flicker */
float const* decoder_matrix; /* optional RGB decoder matrix, 6 elements */
unsigned long const* bsnes_colortbl; /* undocumented; set to 0 */
} snes_ntsc_setup_t;
/* Video format presets */
extern snes_ntsc_setup_t const snes_ntsc_composite; /* color bleeding + artifacts */
extern snes_ntsc_setup_t const snes_ntsc_svideo; /* color bleeding only */
extern snes_ntsc_setup_t const snes_ntsc_rgb; /* crisp image */
extern snes_ntsc_setup_t const snes_ntsc_monochrome;/* desaturated + artifacts */
/* Initializes and adjusts parameters. Can be called multiple times on the same
snes_ntsc_t object. Can pass NULL for either parameter. */
typedef struct snes_ntsc_t snes_ntsc_t;
void snes_ntsc_init( snes_ntsc_t* ntsc, snes_ntsc_setup_t const* setup );
/* Filters one or more rows of pixels. Input pixel format is set by SNES_NTSC_IN_FORMAT
and output RGB depth is set by SNES_NTSC_OUT_DEPTH. Both default to 16-bit RGB.
In_row_width is the number of pixels to get to the next input row. Out_pitch
is the number of *bytes* to get to the next output row. */
void snes_ntsc_blit( snes_ntsc_t const* ntsc, SNES_NTSC_IN_T const* input,
long in_row_width, int burst_phase, int in_width, int in_height,
void* rgb_out, long out_pitch );
void snes_ntsc_blit_hires( snes_ntsc_t const* ntsc, SNES_NTSC_IN_T const* input,
long in_row_width, int burst_phase, int in_width, int in_height,
void* rgb_out, long out_pitch );
/* Number of output pixels written by low-res blitter for given input width. Width
might be rounded down slightly; use SNES_NTSC_IN_WIDTH() on result to find rounded
value. Guaranteed not to round 256 down at all. */
#define SNES_NTSC_OUT_WIDTH( in_width ) \
((((in_width) - 1) / snes_ntsc_in_chunk + 1) * snes_ntsc_out_chunk)
/* Number of low-res input pixels that will fit within given output width. Might be
rounded down slightly; use SNES_NTSC_OUT_WIDTH() on result to find rounded
value. */
#define SNES_NTSC_IN_WIDTH( out_width ) \
(((out_width) / snes_ntsc_out_chunk - 1) * snes_ntsc_in_chunk + 1)
/* Interface for user-defined custom blitters */
enum { snes_ntsc_in_chunk = 3 }; /* number of input pixels read per chunk */
enum { snes_ntsc_out_chunk = 7 }; /* number of output pixels generated per chunk */
enum { snes_ntsc_black = 0 }; /* palette index for black */
enum { snes_ntsc_burst_count = 3 }; /* burst phase cycles through 0, 1, and 2 */
/* Begins outputting row and starts three pixels. First pixel will be cut off a bit.
Use snes_ntsc_black for unused pixels. Declares variables, so must be before first
statement in a block (unless you're using C++). */
#define SNES_NTSC_BEGIN_ROW( ntsc, burst, pixel0, pixel1, pixel2 ) \
char const* ktable = \
(char const*) (ntsc)->table + burst * (snes_ntsc_burst_size * sizeof (snes_ntsc_rgb_t));\
SNES_NTSC_BEGIN_ROW_6_( pixel0, pixel1, pixel2, SNES_NTSC_IN_FORMAT, ktable )
/* Begins input pixel */
#define SNES_NTSC_COLOR_IN( index, color ) \
SNES_NTSC_COLOR_IN_( index, color, SNES_NTSC_IN_FORMAT, ktable )
/* Generates output pixel. Bits can be 24, 16, 15, 14, 32 (treated as 24), or 0:
24: RRRRRRRR GGGGGGGG BBBBBBBB (8-8-8 RGB)
16: RRRRRGGG GGGBBBBB (5-6-5 RGB)
15: RRRRRGG GGGBBBBB (5-5-5 RGB)
14: BBBBBGG GGGRRRRR (5-5-5 BGR, native SNES format)
0: xxxRRRRR RRRxxGGG GGGGGxxB BBBBBBBx (native internal format; x = junk bits) */
#define SNES_NTSC_RGB_OUT( index, rgb_out, bits ) \
SNES_NTSC_RGB_OUT_14_( index, rgb_out, bits, 1 )
/* Hires equivalents */
#define SNES_NTSC_HIRES_ROW( ntsc, burst, pixel1, pixel2, pixel3, pixel4, pixel5 ) \
char const* ktable = \
(char const*) (ntsc)->table + burst * (snes_ntsc_burst_size * sizeof (snes_ntsc_rgb_t));\
unsigned const snes_ntsc_pixel1_ = (pixel1);\
snes_ntsc_rgb_t const* kernel1 = SNES_NTSC_IN_FORMAT( ktable, snes_ntsc_pixel1_ );\
unsigned const snes_ntsc_pixel2_ = (pixel2);\
snes_ntsc_rgb_t const* kernel2 = SNES_NTSC_IN_FORMAT( ktable, snes_ntsc_pixel2_ );\
unsigned const snes_ntsc_pixel3_ = (pixel3);\
snes_ntsc_rgb_t const* kernel3 = SNES_NTSC_IN_FORMAT( ktable, snes_ntsc_pixel3_ );\
unsigned const snes_ntsc_pixel4_ = (pixel4);\
snes_ntsc_rgb_t const* kernel4 = SNES_NTSC_IN_FORMAT( ktable, snes_ntsc_pixel4_ );\
unsigned const snes_ntsc_pixel5_ = (pixel5);\
snes_ntsc_rgb_t const* kernel5 = SNES_NTSC_IN_FORMAT( ktable, snes_ntsc_pixel5_ );\
snes_ntsc_rgb_t const* kernel0 = kernel1;\
snes_ntsc_rgb_t const* kernelx0;\
snes_ntsc_rgb_t const* kernelx1 = kernel1;\
snes_ntsc_rgb_t const* kernelx2 = kernel1;\
snes_ntsc_rgb_t const* kernelx3 = kernel1;\
snes_ntsc_rgb_t const* kernelx4 = kernel1;\
snes_ntsc_rgb_t const* kernelx5 = kernel1
#define SNES_NTSC_HIRES_OUT( x, rgb_out, bits ) {\
snes_ntsc_rgb_t raw_ =\
kernel0 [ x ] + kernel2 [(x+5)%7+14] + kernel4 [(x+3)%7+28] +\
kernelx0 [(x+7)%7+7] + kernelx2 [(x+5)%7+21] + kernelx4 [(x+3)%7+35] +\
kernel1 [(x+6)%7 ] + kernel3 [(x+4)%7+14] + kernel5 [(x+2)%7+28] +\
kernelx1 [(x+6)%7+7] + kernelx3 [(x+4)%7+21] + kernelx5 [(x+2)%7+35];\
SNES_NTSC_CLAMP_( raw_, 0 );\
SNES_NTSC_RGB_OUT_( rgb_out, (bits), 0 );\
}
/* private */
enum { snes_ntsc_entry_size = 128 };
enum { snes_ntsc_palette_size = 0x2000 };
typedef unsigned long snes_ntsc_rgb_t;
struct snes_ntsc_t {
snes_ntsc_rgb_t table [snes_ntsc_palette_size] [snes_ntsc_entry_size];
};
enum { snes_ntsc_burst_size = snes_ntsc_entry_size / snes_ntsc_burst_count };
#define SNES_NTSC_RGB16( ktable, n ) \
(snes_ntsc_rgb_t const*) (ktable + ((n & 0x001E) | (n >> 1 & 0x03E0) | (n >> 2 & 0x3C00)) * \
(snes_ntsc_entry_size / 2 * sizeof (snes_ntsc_rgb_t)))
#define SNES_NTSC_BGR15( ktable, n ) \
(snes_ntsc_rgb_t const*) (ktable + ((n << 9 & 0x3C00) | (n & 0x03E0) | (n >> 10 & 0x001E)) * \
(snes_ntsc_entry_size / 2 * sizeof (snes_ntsc_rgb_t)))
/* common 3->7 ntsc macros */
#define SNES_NTSC_BEGIN_ROW_6_( pixel0, pixel1, pixel2, ENTRY, table ) \
unsigned const snes_ntsc_pixel0_ = (pixel0);\
snes_ntsc_rgb_t const* kernel0 = ENTRY( table, snes_ntsc_pixel0_ );\
unsigned const snes_ntsc_pixel1_ = (pixel1);\
snes_ntsc_rgb_t const* kernel1 = ENTRY( table, snes_ntsc_pixel1_ );\
unsigned const snes_ntsc_pixel2_ = (pixel2);\
snes_ntsc_rgb_t const* kernel2 = ENTRY( table, snes_ntsc_pixel2_ );\
snes_ntsc_rgb_t const* kernelx0;\
snes_ntsc_rgb_t const* kernelx1 = kernel0;\
snes_ntsc_rgb_t const* kernelx2 = kernel0
#define SNES_NTSC_RGB_OUT_14_( x, rgb_out, bits, shift ) {\
snes_ntsc_rgb_t raw_ =\
kernel0 [x ] + kernel1 [(x+12)%7+14] + kernel2 [(x+10)%7+28] +\
kernelx0 [(x+7)%14] + kernelx1 [(x+ 5)%7+21] + kernelx2 [(x+ 3)%7+35];\
SNES_NTSC_CLAMP_( raw_, shift );\
SNES_NTSC_RGB_OUT_( rgb_out, bits, shift );\
}
/* common ntsc macros */
#define snes_ntsc_rgb_builder ((1L << 21) | (1 << 11) | (1 << 1))
#define snes_ntsc_clamp_mask (snes_ntsc_rgb_builder * 3 / 2)
#define snes_ntsc_clamp_add (snes_ntsc_rgb_builder * 0x101)
#define SNES_NTSC_CLAMP_( io, shift ) {\
snes_ntsc_rgb_t sub = (io) >> (9-(shift)) & snes_ntsc_clamp_mask;\
snes_ntsc_rgb_t clamp = snes_ntsc_clamp_add - sub;\
io |= clamp;\
clamp -= sub;\
io &= clamp;\
}
#define SNES_NTSC_COLOR_IN_( index, color, ENTRY, table ) {\
unsigned color_;\
kernelx##index = kernel##index;\
kernel##index = (color_ = (color), ENTRY( table, color_ ));\
}
/* x is always zero except in snes_ntsc library */
/* original routine */
/*
#define SNES_NTSC_RGB_OUT_( rgb_out, bits, x ) {\
if ( bits == 16 )\
rgb_out = (raw_>>(13-x)& 0xF800)|(raw_>>(8-x)&0x07E0)|(raw_>>(4-x)&0x001F);\
if ( bits == 24 || bits == 32 )\
rgb_out = (raw_>>(5-x)&0xFF0000)|(raw_>>(3-x)&0xFF00)|(raw_>>(1-x)&0xFF);\
if ( bits == 15 )\
rgb_out = (raw_>>(14-x)& 0x7C00)|(raw_>>(9-x)&0x03E0)|(raw_>>(4-x)&0x001F);\
if ( bits == 14 )\
rgb_out = (raw_>>(24-x)& 0x001F)|(raw_>>(9-x)&0x03E0)|(raw_<<(6+x)&0x7C00);\
if ( bits == 0 )\
rgb_out = raw_ << x;\
}
*/
/* custom bsnes routine -- hooks into bsnes colortable */
#define SNES_NTSC_RGB_OUT_( rgb_out, bits, x ) {\
if ( bits == 16 ) {\
rgb_out = (raw_>>(13-x)& 0xF800)|(raw_>>(8-x)&0x07E0)|(raw_>>(4-x)&0x001F);\
rgb_out = ((rgb_out&0xf800)>>11)|((rgb_out&0x07c0)>>1)|((rgb_out&0x001f)<<10);\
rgb_out = colortable[rgb_out];\
} else if ( bits == 24 || bits == 32 ) {\
rgb_out = (raw_>>(5-x)&0xFF0000)|(raw_>>(3-x)&0xFF00)|(raw_>>(1-x)&0xFF);\
rgb_out = ((rgb_out&0xf80000)>>19)|((rgb_out&0x00f800)>>6)|((rgb_out&0x0000f8)<<7);\
rgb_out = colortable[rgb_out];\
} else if ( bits == 15 ) {\
rgb_out = (raw_>>(14-x)& 0x7C00)|(raw_>>(9-x)&0x03E0)|(raw_>>(4-x)&0x001F);\
rgb_out = ((rgb_out&0x7c00)>>10)|((rgb_out&0x03e0))|((rgb_out&0x001f)<<10);\
rgb_out = colortable[rgb_out];\
} else {\
rgb_out = raw_ << x;\
}\
}
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,26 @@
/* Configure library by modifying this file */
#ifndef SNES_NTSC_CONFIG_H
#define SNES_NTSC_CONFIG_H
/* Format of source pixels */
/* #define SNES_NTSC_IN_FORMAT SNES_NTSC_RGB16 */
#define SNES_NTSC_IN_FORMAT SNES_NTSC_BGR15
/* The following affect the built-in blitter only; a custom blitter can
handle things however it wants. */
/* Bits per pixel of output. Can be 15, 16, 32, or 24 (same as 32). */
#define SNES_NTSC_OUT_DEPTH 32
/* Type of input pixel values */
#define SNES_NTSC_IN_T unsigned short
/* Each raw pixel input value is passed through this. You might want to mask
the pixel index if you use the high bits as flags, etc. */
#define SNES_NTSC_ADJ_IN( in ) in
/* For each pixel, this is the basic operation:
output_color = SNES_NTSC_ADJ_IN( SNES_NTSC_IN_T ) */
#endif

View File

@ -0,0 +1,439 @@
/* snes_ntsc 0.2.2. http://www.slack.net/~ant/ */
/* Common implementation of NTSC filters */
#include <assert.h>
#include <math.h>
/* Copyright (C) 2006 Shay Green. This module is free software; you
can redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. This
module is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
details. You should have received a copy of the GNU Lesser General Public
License along with this module; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
#define DISABLE_CORRECTION 0
#undef PI
#define PI 3.14159265358979323846f
#ifndef LUMA_CUTOFF
#define LUMA_CUTOFF 0.20
#endif
#ifndef gamma_size
#define gamma_size 1
#endif
#ifndef rgb_bits
#define rgb_bits 8
#endif
#ifndef artifacts_max
#define artifacts_max (artifacts_mid * 1.5f)
#endif
#ifndef fringing_max
#define fringing_max (fringing_mid * 2)
#endif
#ifndef STD_HUE_CONDITION
#define STD_HUE_CONDITION( setup ) 1
#endif
#define ext_decoder_hue (std_decoder_hue + 15)
#define rgb_unit (1 << rgb_bits)
#define rgb_offset (rgb_unit * 2 + 0.5f)
enum { burst_size = snes_ntsc_entry_size / burst_count };
enum { kernel_half = 16 };
enum { kernel_size = kernel_half * 2 + 1 };
typedef struct init_t
{
float to_rgb [burst_count * 6];
float to_float [gamma_size];
float contrast;
float brightness;
float artifacts;
float fringing;
float kernel [rescale_out * kernel_size * 2];
} init_t;
#define ROTATE_IQ( i, q, sin_b, cos_b ) {\
float t;\
t = i * cos_b - q * sin_b;\
q = i * sin_b + q * cos_b;\
i = t;\
}
static void init_filters( init_t* impl, snes_ntsc_setup_t const* setup )
{
#if rescale_out > 1
float kernels [kernel_size * 2];
#else
float* const kernels = impl->kernel;
#endif
/* generate luma (y) filter using sinc kernel */
{
/* sinc with rolloff (dsf) */
float const rolloff = 1 + (float) setup->sharpness * (float) 0.032;
float const maxh = 32;
float const pow_a_n = (float) pow( rolloff, maxh );
float sum;
int i;
/* quadratic mapping to reduce negative (blurring) range */
float to_angle = (float) setup->resolution + 1;
to_angle = PI / maxh * (float) LUMA_CUTOFF * (to_angle * to_angle + 1);
kernels [kernel_size * 3 / 2] = maxh; /* default center value */
for ( i = 0; i < kernel_half * 2 + 1; i++ )
{
int x = i - kernel_half;
float angle = x * to_angle;
/* instability occurs at center point with rolloff very close to 1.0 */
if ( x || pow_a_n > (float) 1.056 || pow_a_n < (float) 0.981 )
{
float rolloff_cos_a = rolloff * (float) cos( angle );
float num = 1 - rolloff_cos_a -
pow_a_n * (float) cos( maxh * angle ) +
pow_a_n * rolloff * (float) cos( (maxh - 1) * angle );
float den = 1 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
float dsf = num / den;
kernels [kernel_size * 3 / 2 - kernel_half + i] = dsf - (float) 0.5;
}
}
/* apply blackman window and find sum */
sum = 0;
for ( i = 0; i < kernel_half * 2 + 1; i++ )
{
float x = PI * 2 / (kernel_half * 2) * i;
float blackman = 0.42f - 0.5f * (float) cos( x ) + 0.08f * (float) cos( x * 2 );
sum += (kernels [kernel_size * 3 / 2 - kernel_half + i] *= blackman);
}
/* normalize kernel */
sum = 1.0f / sum;
for ( i = 0; i < kernel_half * 2 + 1; i++ )
{
int x = kernel_size * 3 / 2 - kernel_half + i;
kernels [x] *= sum;
assert( kernels [x] == kernels [x] ); /* catch numerical instability */
}
}
/* generate chroma (iq) filter using gaussian kernel */
{
float const cutoff_factor = -0.03125f;
float cutoff = (float) setup->bleed;
int i;
if ( cutoff < 0 )
{
/* keep extreme value accessible only near upper end of scale (1.0) */
cutoff *= cutoff;
cutoff *= cutoff;
cutoff *= cutoff;
cutoff *= -30.0f / 0.65f;
}
cutoff = cutoff_factor - 0.65f * cutoff_factor * cutoff;
for ( i = -kernel_half; i <= kernel_half; i++ )
kernels [kernel_size / 2 + i] = (float) exp( i * i * cutoff );
/* normalize even and odd phases separately */
for ( i = 0; i < 2; i++ )
{
float sum = 0;
int x;
for ( x = i; x < kernel_size; x += 2 )
sum += kernels [x];
sum = 1.0f / sum;
for ( x = i; x < kernel_size; x += 2 )
{
kernels [x] *= sum;
assert( kernels [x] == kernels [x] ); /* catch numerical instability */
}
}
}
/*
printf( "luma:\n" );
for ( i = kernel_size; i < kernel_size * 2; i++ )
printf( "%f\n", kernels [i] );
printf( "chroma:\n" );
for ( i = 0; i < kernel_size; i++ )
printf( "%f\n", kernels [i] );
*/
/* generate linear rescale kernels */
#if rescale_out > 1
{
float weight = 1.0f;
float* out = impl->kernel;
int n = rescale_out;
do
{
float remain = 0;
int i;
weight -= 1.0f / rescale_in;
for ( i = 0; i < kernel_size * 2; i++ )
{
float cur = kernels [i];
float m = cur * weight;
*out++ = m + remain;
remain = cur - m;
}
}
while ( --n );
}
#endif
}
static float const default_decoder [6] =
{ 0.956f, 0.621f, -0.272f, -0.647f, -1.105f, 1.702f };
static void init( init_t* impl, snes_ntsc_setup_t const* setup )
{
impl->brightness = (float) setup->brightness * (0.5f * rgb_unit) + rgb_offset;
impl->contrast = (float) setup->contrast * (0.5f * rgb_unit) + rgb_unit;
#ifdef default_palette_contrast
if ( !setup->palette )
impl->contrast *= default_palette_contrast;
#endif
impl->artifacts = (float) setup->artifacts;
if ( impl->artifacts > 0 )
impl->artifacts *= artifacts_max - artifacts_mid;
impl->artifacts = impl->artifacts * artifacts_mid + artifacts_mid;
impl->fringing = (float) setup->fringing;
if ( impl->fringing > 0 )
impl->fringing *= fringing_max - fringing_mid;
impl->fringing = impl->fringing * fringing_mid + fringing_mid;
init_filters( impl, setup );
/* generate gamma table */
if ( gamma_size > 1 )
{
float const to_float = 1.0f / (gamma_size - (gamma_size > 1));
float const gamma = 1.1333f - (float) setup->gamma * 0.5f;
/* match common PC's 2.2 gamma to TV's 2.65 gamma */
int i;
for ( i = 0; i < gamma_size; i++ )
impl->to_float [i] =
(float) pow( i * to_float, gamma ) * impl->contrast + impl->brightness;
}
/* setup decoder matricies */
{
float hue = (float) setup->hue * PI + PI / 180 * ext_decoder_hue;
float sat = (float) setup->saturation + 1;
float const* decoder = setup->decoder_matrix;
if ( !decoder )
{
decoder = default_decoder;
if ( STD_HUE_CONDITION( setup ) )
hue += PI / 180 * (std_decoder_hue - ext_decoder_hue);
}
{
float s = (float) sin( hue ) * sat;
float c = (float) cos( hue ) * sat;
float* out = impl->to_rgb;
int n;
n = burst_count;
do
{
float const* in = decoder;
int n = 3;
do
{
float i = *in++;
float q = *in++;
*out++ = i * c - q * s;
*out++ = i * s + q * c;
}
while ( --n );
if ( burst_count <= 1 )
break;
ROTATE_IQ( s, c, 0.866025f, -0.5f ); /* +120 degrees */
}
while ( --n );
}
}
}
/* kernel generation */
#define RGB_TO_YIQ( r, g, b, y, i ) (\
(y = (r) * 0.299f + (g) * 0.587f + (b) * 0.114f),\
(i = (r) * 0.596f - (g) * 0.275f - (b) * 0.321f),\
((r) * 0.212f - (g) * 0.523f + (b) * 0.311f)\
)
#define YIQ_TO_RGB( y, i, q, to_rgb, type, r, g ) (\
r = (type) (y + to_rgb [0] * i + to_rgb [1] * q),\
g = (type) (y + to_rgb [2] * i + to_rgb [3] * q),\
(type) (y + to_rgb [4] * i + to_rgb [5] * q)\
)
#define PACK_RGB( r, g, b ) ((r) << 21 | (g) << 11 | (b) << 1)
enum { rgb_kernel_size = burst_size / alignment_count };
enum { rgb_bias = rgb_unit * 2 * snes_ntsc_rgb_builder };
typedef struct pixel_info_t
{
int offset;
float negate;
float kernel [4];
} pixel_info_t;
#if rescale_in > 1
#define PIXEL_OFFSET_( ntsc, scaled ) \
(kernel_size / 2 + ntsc + (scaled != 0) + (rescale_out - scaled) % rescale_out + \
(kernel_size * 2 * scaled))
#define PIXEL_OFFSET( ntsc, scaled ) \
PIXEL_OFFSET_( ((ntsc) - (scaled) / rescale_out * rescale_in),\
(((scaled) + rescale_out * 10) % rescale_out) ),\
(1.0f - (((ntsc) + 100) & 2))
#else
#define PIXEL_OFFSET( ntsc, scaled ) \
(kernel_size / 2 + (ntsc) - (scaled)),\
(1.0f - (((ntsc) + 100) & 2))
#endif
extern pixel_info_t const snes_ntsc_pixels [alignment_count];
/* Generate pixel at all burst phases and column alignments */
static void gen_kernel( init_t* impl, float y, float i, float q, snes_ntsc_rgb_t* out )
{
/* generate for each scanline burst phase */
float const* to_rgb = impl->to_rgb;
int burst_remain = burst_count;
y -= rgb_offset;
do
{
/* Encode yiq into *two* composite signals (to allow control over artifacting).
Convolve these with kernels which: filter respective components, apply
sharpening, and rescale horizontally. Convert resulting yiq to rgb and pack
into integer. Based on algorithm by NewRisingSun. */
pixel_info_t const* pixel = snes_ntsc_pixels;
int alignment_remain = alignment_count;
do
{
/* negate is -1 when composite starts at odd multiple of 2 */
float const yy = y * impl->fringing * pixel->negate;
float const ic0 = (i + yy) * pixel->kernel [0];
float const qc1 = (q + yy) * pixel->kernel [1];
float const ic2 = (i - yy) * pixel->kernel [2];
float const qc3 = (q - yy) * pixel->kernel [3];
float const factor = impl->artifacts * pixel->negate;
float const ii = i * factor;
float const yc0 = (y + ii) * pixel->kernel [0];
float const yc2 = (y - ii) * pixel->kernel [2];
float const qq = q * factor;
float const yc1 = (y + qq) * pixel->kernel [1];
float const yc3 = (y - qq) * pixel->kernel [3];
float const* k = &impl->kernel [pixel->offset];
int n;
++pixel;
for ( n = rgb_kernel_size; n; --n )
{
float i = k[0]*ic0 + k[2]*ic2;
float q = k[1]*qc1 + k[3]*qc3;
float y = k[kernel_size+0]*yc0 + k[kernel_size+1]*yc1 +
k[kernel_size+2]*yc2 + k[kernel_size+3]*yc3 + rgb_offset;
if ( rescale_out <= 1 )
k--;
else if ( k < &impl->kernel [kernel_size * 2 * (rescale_out - 1)] )
k += kernel_size * 2 - 1;
else
k -= kernel_size * 2 * (rescale_out - 1) + 2;
{
int r, g, b = YIQ_TO_RGB( y, i, q, to_rgb, int, r, g );
*out++ = PACK_RGB( r, g, b ) - rgb_bias;
}
}
}
while ( alignment_count > 1 && --alignment_remain );
if ( burst_count <= 1 )
break;
to_rgb += 6;
ROTATE_IQ( i, q, -0.866025f, -0.5f ); /* -120 degrees */
}
while ( --burst_remain );
}
static void correct_errors( snes_ntsc_rgb_t color, snes_ntsc_rgb_t* out );
#if DISABLE_CORRECTION
#define CORRECT_ERROR( a ) { out [i] += rgb_bias; }
#define DISTRIBUTE_ERROR( a, b, c ) { out [i] += rgb_bias; }
#else
#define CORRECT_ERROR( a ) { out [a] += error; }
#define DISTRIBUTE_ERROR( a, b, c ) {\
snes_ntsc_rgb_t fourth = (error + 2 * snes_ntsc_rgb_builder) >> 2;\
fourth &= (rgb_bias >> 1) - snes_ntsc_rgb_builder;\
fourth -= rgb_bias >> 2;\
out [a] += fourth;\
out [b] += fourth;\
out [c] += fourth;\
out [i] += error - (fourth * 3);\
}
#endif
#define RGB_PALETTE_OUT( rgb, out_ )\
{\
unsigned char* out = (out_);\
snes_ntsc_rgb_t clamped = (rgb);\
SNES_NTSC_CLAMP_( clamped, (8 - rgb_bits) );\
out [0] = (unsigned char) (clamped >> 21);\
out [1] = (unsigned char) (clamped >> 11);\
out [2] = (unsigned char) (clamped >> 1);\
}
/* blitter related */
#ifndef restrict
#if defined (__GNUC__)
#define restrict __restrict__
#elif defined (_MSC_VER) && _MSC_VER > 1300
#define restrict __restrict
#else
/* no support for restricted pointers */
#define restrict
#endif
#endif
#include <limits.h>
#if SNES_NTSC_OUT_DEPTH <= 16
#if USHRT_MAX == 0xFFFF
typedef unsigned short snes_ntsc_out_t;
#else
#error "Need 16-bit int type"
#endif
#else
#if UINT_MAX == 0xFFFFFFFF
typedef unsigned int snes_ntsc_out_t;
#elif ULONG_MAX == 0xFFFFFFFF
typedef unsigned long snes_ntsc_out_t;
#else
#error "Need 32-bit int type"
#endif
#endif

View File

@ -0,0 +1,25 @@
namespace Filter::Super2xSaI {
auto size(uint& width, uint& height) -> void {
width *= 2;
height *= 2;
}
uint32_t temp[512 * 480];
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
for(unsigned y = 0; y < height; y++) {
const uint16_t *line_in = (const uint16_t*)(((const uint8_t*)input) + pitch * y);
uint32_t *line_out = temp + y * width;
for(unsigned x = 0; x < width; x++) {
line_out[x] = colortable[line_in[x]];
}
}
Super2xSaI32((unsigned char*)temp, width * sizeof(uint32_t), 0, (unsigned char*)output, outpitch, width, height);
}
}

View File

@ -0,0 +1,25 @@
namespace Filter::SuperEagle {
auto size(uint& width, uint& height) -> void {
width *= 2;
height *= 2;
}
uint32_t temp[512 * 480];
auto render(
uint32_t* colortable, uint32_t* output, uint outpitch,
const uint16_t* input, uint pitch, uint width, uint height
) -> void {
for(unsigned y = 0; y < height; y++) {
const uint16_t *line_in = (const uint16_t*)(((const uint8_t*)input) + pitch * y);
uint32_t *line_out = temp + y * width;
for(unsigned x = 0; x < width; x++) {
line_out[x] = colortable[line_in[x]];
}
}
SuperEagle32((unsigned char*)temp, width * sizeof(uint32_t), 0, (unsigned char*)output, outpitch, width, height);
}
}

View File

@ -15,15 +15,6 @@ auto GameBoyColorInterface::color(uint32 color) -> uint64 {
uint64_t G = image::normalize(g, 5, 16);
uint64_t B = image::normalize(b, 5, 16);
if(settings.colorEmulation) {
R = (r * 26 + g * 4 + b * 2);
G = ( g * 24 + b * 8);
B = (r * 6 + g * 4 + b * 22);
R = image::normalize(min(960, R), 10, 16);
G = image::normalize(min(960, G), 10, 16);
B = image::normalize(min(960, B), 10, 16);
}
return R << 32 | G << 16 | B << 0;
}

View File

@ -7,39 +7,8 @@ auto GameBoyInterface::information() -> Information {
}
auto GameBoyInterface::color(uint32 color) -> uint64 {
if(!settings.colorEmulation) {
uint64 L = image::normalize(3 - color, 2, 16);
return L << 32 | L << 16 | L << 0;
} else {
#define DMG_PALETTE_GREEN
//#define DMG_PALETTE_YELLOW
//#define DMG_PALETTE_WHITE
const uint16 monochrome[4][3] = {
#if defined(DMG_PALETTE_GREEN)
{0xaeae, 0xd9d9, 0x2727},
{0x5858, 0xa0a0, 0x2828},
{0x2020, 0x6262, 0x2929},
{0x1a1a, 0x4545, 0x2a2a},
#elif defined(DMG_PALETTE_YELLOW)
{0xffff, 0xf7f7, 0x7b7b},
{0xb5b5, 0xaeae, 0x4a4a},
{0x6b6b, 0x6969, 0x3131},
{0x2121, 0x2020, 0x1010},
#elif defined(DMG_PALETTE_WHITE)
{0xffff, 0xffff, 0xffff},
{0xaaaa, 0xaaaa, 0xaaaa},
{0x5555, 0x5555, 0x5555},
{0x0000, 0x0000, 0x0000},
#endif
};
uint64 R = monochrome[color][0];
uint64 G = monochrome[color][1];
uint64 B = monochrome[color][2];
return R << 32 | G << 16 | B << 0;
}
uint64 L = image::normalize(3 - color, 2, 16);
return L << 32 | L << 16 | L << 0;
}
auto GameBoyInterface::load() -> bool {

View File

@ -3,7 +3,6 @@
namespace GameBoy {
SuperGameBoyInterface* superGameBoy = nullptr;
Settings settings;
#include "game-boy.cpp"
#include "game-boy-color.cpp"
@ -110,32 +109,14 @@ auto Interface::cheats(const vector<string>& list) -> void {
}
auto Interface::cap(const string& name) -> bool {
if(name == "Blur Emulation") return true;
if(name == "Color Emulation") return true;
return false;
}
auto Interface::get(const string& name) -> any {
if(name == "Blur Emulation") return settings.blurEmulation;
if(name == "Color Emulation") return settings.colorEmulation;
return {};
}
auto Interface::set(const string& name, const any& value) -> bool {
if(name == "Blur Emulation" && value.is<bool>()) {
settings.blurEmulation = value.get<bool>();
if(Model::SuperGameBoy()) return true;
Emulator::video.setEffect(Emulator::Video::Effect::InterframeBlending, settings.blurEmulation);
return true;
}
if(name == "Color Emulation" && value.is<bool>()) {
settings.colorEmulation = value.get<bool>();
if(Model::SuperGameBoy()) return true;
Emulator::video.setPalette();
return true;
}
return false;
}

View File

@ -74,12 +74,6 @@ struct SuperGameBoyInterface {
virtual auto joypWrite(bool p15, bool p14) -> void = 0;
};
struct Settings {
bool blurEmulation = true;
bool colorEmulation = true;
};
extern SuperGameBoyInterface* superGameBoy;
extern Settings settings;
}

View File

@ -85,8 +85,6 @@ auto System::power() -> void {
if(model() != Model::SuperGameBoy) {
Emulator::video.reset(interface);
Emulator::video.setPalette();
Emulator::video.setEffect(Emulator::Video::Effect::InterframeBlending, settings.blurEmulation);
Emulator::audio.reset(interface);
}

202
bsnes/lzma/7z.h Normal file
View File

@ -0,0 +1,202 @@
/* 7z.h -- 7z interface
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define k7zStartHeaderSize 0x20
#define k7zSignatureSize 6
extern const Byte k7zSignature[k7zSignatureSize];
typedef struct
{
const Byte *Data;
size_t Size;
} CSzData;
/* CSzCoderInfo & CSzFolder support only default methods */
typedef struct
{
size_t PropsOffset;
UInt32 MethodID;
Byte NumStreams;
Byte PropsSize;
} CSzCoderInfo;
typedef struct
{
UInt32 InIndex;
UInt32 OutIndex;
} CSzBond;
#define SZ_NUM_CODERS_IN_FOLDER_MAX 4
#define SZ_NUM_BONDS_IN_FOLDER_MAX 3
#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4
typedef struct
{
UInt32 NumCoders;
UInt32 NumBonds;
UInt32 NumPackStreams;
UInt32 UnpackStream;
UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX];
CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX];
CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX];
} CSzFolder;
SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd);
typedef struct
{
UInt32 Low;
UInt32 High;
} CNtfsFileTime;
typedef struct
{
Byte *Defs; /* MSB 0 bit numbering */
UInt32 *Vals;
} CSzBitUi32s;
typedef struct
{
Byte *Defs; /* MSB 0 bit numbering */
// UInt64 *Vals;
CNtfsFileTime *Vals;
} CSzBitUi64s;
#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
typedef struct
{
UInt32 NumPackStreams;
UInt32 NumFolders;
UInt64 *PackPositions; // NumPackStreams + 1
CSzBitUi32s FolderCRCs; // NumFolders
size_t *FoCodersOffsets; // NumFolders + 1
UInt32 *FoStartPackStreamIndex; // NumFolders + 1
UInt32 *FoToCoderUnpackSizes; // NumFolders + 1
Byte *FoToMainUnpackSizeIndex; // NumFolders
UInt64 *CoderUnpackSizes; // for all coders in all folders
Byte *CodersData;
} CSzAr;
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *stream, UInt64 startPos,
Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain);
typedef struct
{
CSzAr db;
UInt64 startPosAfterHeader;
UInt64 dataPos;
UInt32 NumFiles;
UInt64 *UnpackPositions; // NumFiles + 1
// Byte *IsEmptyFiles;
Byte *IsDirs;
CSzBitUi32s CRCs;
CSzBitUi32s Attribs;
// CSzBitUi32s Parents;
CSzBitUi64s MTime;
CSzBitUi64s CTime;
UInt32 *FolderToFile; // NumFolders + 1
UInt32 *FileToFolder; // NumFiles
size_t *FileNameOffsets; /* in 2-byte steps */
Byte *FileNames; /* UTF-16-LE */
} CSzArEx;
#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i))
#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i])
void SzArEx_Init(CSzArEx *p);
void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc);
UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder);
int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize);
/*
if dest == NULL, the return value specifies the required size of the buffer,
in 16-bit characters, including the null-terminating character.
if dest != NULL, the return value specifies the number of 16-bit characters that
are written to the dest, including the null-terminating character. */
size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
/*
size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex);
UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
*/
/*
SzArEx_Extract extracts file from archive
*outBuffer must be 0 before first call for each new archive.
Extracting cache:
If you need to decompress more than one file, you can send
these values from previous call:
*blockIndex,
*outBuffer,
*outBufferSize
You can consider "*outBuffer" as cache of solid block. If your archive is solid,
it will increase decompression speed.
If you use external function, you can declare these 3 cache variables
(blockIndex, outBuffer, outBufferSize) as static in that external function.
Free *outBuffer and set *outBuffer to 0, if you want to flush cache.
*/
SRes SzArEx_Extract(
const CSzArEx *db,
ILookInStream *inStream,
UInt32 fileIndex, /* index of file */
UInt32 *blockIndex, /* index of solid block */
Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */
size_t *outBufferSize, /* buffer size for output buffer */
size_t *offset, /* offset of stream for required file in *outBuffer */
size_t *outSizeProcessed, /* size of file in *outBuffer */
ISzAllocPtr allocMain,
ISzAllocPtr allocTemp);
/*
SzArEx_Open Errors:
SZ_ERROR_NO_ARCHIVE
SZ_ERROR_ARCHIVE
SZ_ERROR_UNSUPPORTED
SZ_ERROR_MEM
SZ_ERROR_CRC
SZ_ERROR_INPUT_EOF
SZ_ERROR_FAIL
*/
SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
EXTERN_C_END
#endif

80
bsnes/lzma/7zAlloc.c Normal file
View File

@ -0,0 +1,80 @@
/* 7zAlloc.c -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdlib.h>
#include "7zAlloc.h"
/* #define _SZ_ALLOC_DEBUG */
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG
#ifdef _WIN32
#include <windows.h>
#endif
#include <stdio.h>
int g_allocCount = 0;
int g_allocCountTemp = 0;
#endif
void *SzAlloc(ISzAllocPtr p, size_t size)
{
UNUSED_VAR(p);
if (size == 0)
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount);
g_allocCount++;
#endif
return malloc(size);
}
void SzFree(ISzAllocPtr p, void *address)
{
UNUSED_VAR(p);
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
{
g_allocCount--;
fprintf(stderr, "\nFree; count = %10d", g_allocCount);
}
#endif
free(address);
}
void *SzAllocTemp(ISzAllocPtr p, size_t size)
{
UNUSED_VAR(p);
if (size == 0)
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc_temp %10u bytes; count = %10d", (unsigned)size, g_allocCountTemp);
g_allocCountTemp++;
#ifdef _WIN32
return HeapAlloc(GetProcessHeap(), 0, size);
#endif
#endif
return malloc(size);
}
void SzFreeTemp(ISzAllocPtr p, void *address)
{
UNUSED_VAR(p);
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
{
g_allocCountTemp--;
fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
}
#ifdef _WIN32
HeapFree(GetProcessHeap(), 0, address);
return;
#endif
#endif
free(address);
}

19
bsnes/lzma/7zAlloc.h Normal file
View File

@ -0,0 +1,19 @@
/* 7zAlloc.h -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_ALLOC_H
#define __7Z_ALLOC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
void *SzAlloc(ISzAllocPtr p, size_t size);
void SzFree(ISzAllocPtr p, void *address);
void *SzAllocTemp(ISzAllocPtr p, size_t size);
void SzFreeTemp(ISzAllocPtr p, void *address);
EXTERN_C_END
#endif

1771
bsnes/lzma/7zArcIn.c Normal file

File diff suppressed because it is too large Load Diff

36
bsnes/lzma/7zBuf.c Normal file
View File

@ -0,0 +1,36 @@
/* 7zBuf.c -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zBuf.h"
void Buf_Init(CBuf *p)
{
p->data = 0;
p->size = 0;
}
int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc)
{
p->size = 0;
if (size == 0)
{
p->data = 0;
return 1;
}
p->data = (Byte *)ISzAlloc_Alloc(alloc, size);
if (p->data)
{
p->size = size;
return 1;
}
return 0;
}
void Buf_Free(CBuf *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->data);
p->data = 0;
p->size = 0;
}

35
bsnes/lzma/7zBuf.h Normal file
View File

@ -0,0 +1,35 @@
/* 7zBuf.h -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_BUF_H
#define __7Z_BUF_H
#include "7zTypes.h"
EXTERN_C_BEGIN
typedef struct
{
Byte *data;
size_t size;
} CBuf;
void Buf_Init(CBuf *p);
int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc);
void Buf_Free(CBuf *p, ISzAllocPtr alloc);
typedef struct
{
Byte *data;
size_t size;
size_t pos;
} CDynBuf;
void DynBuf_Construct(CDynBuf *p);
void DynBuf_SeekToBeg(CDynBuf *p);
int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc);
void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc);
EXTERN_C_END
#endif

52
bsnes/lzma/7zBuf2.c Normal file
View File

@ -0,0 +1,52 @@
/* 7zBuf2.c -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "7zBuf.h"
void DynBuf_Construct(CDynBuf *p)
{
p->data = 0;
p->size = 0;
p->pos = 0;
}
void DynBuf_SeekToBeg(CDynBuf *p)
{
p->pos = 0;
}
int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc)
{
if (size > p->size - p->pos)
{
size_t newSize = p->pos + size;
Byte *data;
newSize += newSize / 4;
data = (Byte *)ISzAlloc_Alloc(alloc, newSize);
if (!data)
return 0;
p->size = newSize;
if (p->pos != 0)
memcpy(data, p->data, p->pos);
ISzAlloc_Free(alloc, p->data);
p->data = data;
}
if (size != 0)
{
memcpy(p->data + p->pos, buf, size);
p->pos += size;
}
return 1;
}
void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->data);
p->data = 0;
p->size = 0;
p->pos = 0;
}

128
bsnes/lzma/7zCrc.c Normal file
View File

@ -0,0 +1,128 @@
/* 7zCrc.c -- CRC32 init
2017-06-06 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zCrc.h"
#include "CpuArch.h"
#define kCrcPoly 0xEDB88320
#ifdef MY_CPU_LE
#define CRC_NUM_TABLES 8
#else
#define CRC_NUM_TABLES 9
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#ifndef MY_CPU_BE
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
{
return g_CrcUpdate(v, data, size, g_CrcTable);
}
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
{
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
}
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
const Byte *pEnd = p + size;
for (; p != pEnd; p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
void MY_FAST_CALL CrcGenerateTable()
{
UInt32 i;
for (i = 0; i < 256; i++)
{
UInt32 r = i;
unsigned j;
for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
g_CrcTable[i] = r;
}
for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
{
UInt32 r = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
}
#if CRC_NUM_TABLES < 4
g_CrcUpdate = CrcUpdateT1;
#else
#ifdef MY_CPU_LE
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
#ifdef MY_CPU_X86_OR_AMD64
if (!CPU_Is_InOrder())
#endif
g_CrcUpdate = CrcUpdateT8;
#endif
#else
{
#ifndef MY_CPU_BE
UInt32 k = 0x01020304;
const Byte *p = (const Byte *)&k;
if (p[0] == 4 && p[1] == 3)
{
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
g_CrcUpdate = CrcUpdateT8;
#endif
}
else if (p[0] != 1 || p[1] != 2)
g_CrcUpdate = CrcUpdateT1;
else
#endif
{
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
{
UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = CRC_UINT32_SWAP(x);
}
g_CrcUpdateT4 = CrcUpdateT1_BeT4;
g_CrcUpdate = CrcUpdateT1_BeT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
g_CrcUpdate = CrcUpdateT1_BeT8;
#endif
}
}
#endif
#endif
}

25
bsnes/lzma/7zCrc.h Normal file
View File

@ -0,0 +1,25 @@
/* 7zCrc.h -- CRC32 calculation
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __7Z_CRC_H
#define __7Z_CRC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
extern UInt32 g_CrcTable[];
/* Call CrcGenerateTable one time before other CRC functions */
void MY_FAST_CALL CrcGenerateTable(void);
#define CRC_INIT_VAL 0xFFFFFFFF
#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
EXTERN_C_END
#endif

115
bsnes/lzma/7zCrcOpt.c Normal file
View File

@ -0,0 +1,115 @@
/* 7zCrcOpt.c -- CRC32 calculation
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifndef MY_CPU_BE
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
^ (table + 0x100)[((v >> 16) & 0xFF)]
^ (table + 0x000)[((v >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
d = *((const UInt32 *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
^ (table + 0x100)[((d >> 16) & 0xFF)]
^ (table + 0x000)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
#endif
#ifndef MY_CPU_LE
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
table += 0x100;
v = CRC_UINT32_SWAP(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
^ (table + 0x200)[((v >> 16) & 0xFF)]
^ (table + 0x300)[((v >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
}
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
table += 0x100;
v = CRC_UINT32_SWAP(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
d = *((const UInt32 *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
^ (table + 0x200)[((d >> 16) & 0xFF)]
^ (table + 0x300)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
}
#endif

591
bsnes/lzma/7zDec.c Normal file
View File

@ -0,0 +1,591 @@
/* 7zDec.c -- Decoding from 7z folder
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
/* #define _7ZIP_PPMD_SUPPPORT */
#include "7z.h"
#include "7zCrc.h"
#include "Bcj2.h"
#include "Bra.h"
#include "CpuArch.h"
#include "Delta.h"
#include "LzmaDec.h"
#include "Lzma2Dec.h"
#ifdef _7ZIP_PPMD_SUPPPORT
#include "Ppmd7.h"
#endif
#define k_Copy 0
#define k_Delta 3
#define k_LZMA2 0x21
#define k_LZMA 0x30101
#define k_BCJ 0x3030103
#define k_BCJ2 0x303011B
#define k_PPC 0x3030205
#define k_IA64 0x3030401
#define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805
#ifdef _7ZIP_PPMD_SUPPPORT
#define k_PPMD 0x30401
typedef struct
{
IByteIn vt;
const Byte *cur;
const Byte *end;
const Byte *begin;
UInt64 processed;
BoolInt extra;
SRes res;
const ILookInStream *inStream;
} CByteInToLook;
static Byte ReadByte(const IByteIn *pp)
{
CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
if (p->cur != p->end)
return *p->cur++;
if (p->res == SZ_OK)
{
size_t size = p->cur - p->begin;
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size);
p->cur = p->begin;
p->end = p->begin + size;
if (size != 0)
return *p->cur++;;
}
p->extra = True;
return 0;
}
static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{
CPpmd7 ppmd;
CByteInToLook s;
SRes res = SZ_OK;
s.vt.Read = ReadByte;
s.inStream = inStream;
s.begin = s.end = s.cur = NULL;
s.extra = False;
s.res = SZ_OK;
s.processed = 0;
if (propsSize != 5)
return SZ_ERROR_UNSUPPORTED;
{
unsigned order = props[0];
UInt32 memSize = GetUi32(props + 1);
if (order < PPMD7_MIN_ORDER ||
order > PPMD7_MAX_ORDER ||
memSize < PPMD7_MIN_MEM_SIZE ||
memSize > PPMD7_MAX_MEM_SIZE)
return SZ_ERROR_UNSUPPORTED;
Ppmd7_Construct(&ppmd);
if (!Ppmd7_Alloc(&ppmd, memSize, allocMain))
return SZ_ERROR_MEM;
Ppmd7_Init(&ppmd, order);
}
{
CPpmd7z_RangeDec rc;
Ppmd7z_RangeDec_CreateVTable(&rc);
rc.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&rc))
res = SZ_ERROR_DATA;
else if (s.extra)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else
{
SizeT i;
for (i = 0; i < outSize; i++)
{
int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
if (s.extra || sym < 0)
break;
outBuffer[i] = (Byte)sym;
}
if (i != outSize)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
res = SZ_ERROR_DATA;
}
}
Ppmd7_Free(&ppmd, allocMain);
return res;
}
#endif
static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{
CLzmaDec state;
SRes res = SZ_OK;
LzmaDec_Construct(&state);
RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain));
state.dic = outBuffer;
state.dicBufSize = outSize;
LzmaDec_Init(&state);
for (;;)
{
const void *inBuf = NULL;
size_t lookahead = (1 << 18);
if (lookahead > inSize)
lookahead = (size_t)inSize;
res = ILookInStream_Look(inStream, &inBuf, &lookahead);
if (res != SZ_OK)
break;
{
SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos;
ELzmaStatus status;
res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
lookahead -= inProcessed;
inSize -= inProcessed;
if (res != SZ_OK)
break;
if (status == LZMA_STATUS_FINISHED_WITH_MARK)
{
if (outSize != state.dicPos || inSize != 0)
res = SZ_ERROR_DATA;
break;
}
if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
break;
if (inProcessed == 0 && dicPos == state.dicPos)
{
res = SZ_ERROR_DATA;
break;
}
res = ILookInStream_Skip(inStream, inProcessed);
if (res != SZ_OK)
break;
}
}
LzmaDec_FreeProbs(&state, allocMain);
return res;
}
#ifndef _7Z_NO_METHOD_LZMA2
static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{
CLzma2Dec state;
SRes res = SZ_OK;
Lzma2Dec_Construct(&state);
if (propsSize != 1)
return SZ_ERROR_DATA;
RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain));
state.decoder.dic = outBuffer;
state.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&state);
for (;;)
{
const void *inBuf = NULL;
size_t lookahead = (1 << 18);
if (lookahead > inSize)
lookahead = (size_t)inSize;
res = ILookInStream_Look(inStream, &inBuf, &lookahead);
if (res != SZ_OK)
break;
{
SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos;
ELzmaStatus status;
res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
lookahead -= inProcessed;
inSize -= inProcessed;
if (res != SZ_OK)
break;
if (status == LZMA_STATUS_FINISHED_WITH_MARK)
{
if (outSize != state.decoder.dicPos || inSize != 0)
res = SZ_ERROR_DATA;
break;
}
if (inProcessed == 0 && dicPos == state.decoder.dicPos)
{
res = SZ_ERROR_DATA;
break;
}
res = ILookInStream_Skip(inStream, inProcessed);
if (res != SZ_OK)
break;
}
}
Lzma2Dec_FreeProbs(&state, allocMain);
return res;
}
#endif
static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer)
{
while (inSize > 0)
{
const void *inBuf;
size_t curSize = (1 << 18);
if (curSize > inSize)
curSize = (size_t)inSize;
RINOK(ILookInStream_Look(inStream, &inBuf, &curSize));
if (curSize == 0)
return SZ_ERROR_INPUT_EOF;
memcpy(outBuffer, inBuf, curSize);
outBuffer += curSize;
inSize -= curSize;
RINOK(ILookInStream_Skip(inStream, curSize));
}
return SZ_OK;
}
static BoolInt IS_MAIN_METHOD(UInt32 m)
{
switch (m)
{
case k_Copy:
case k_LZMA:
#ifndef _7Z_NO_METHOD_LZMA2
case k_LZMA2:
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
case k_PPMD:
#endif
return True;
}
return False;
}
static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)
{
return
c->NumStreams == 1
/* && c->MethodID <= (UInt32)0xFFFFFFFF */
&& IS_MAIN_METHOD((UInt32)c->MethodID);
}
#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4)
static SRes CheckSupportedFolder(const CSzFolder *f)
{
if (f->NumCoders < 1 || f->NumCoders > 4)
return SZ_ERROR_UNSUPPORTED;
if (!IS_SUPPORTED_CODER(&f->Coders[0]))
return SZ_ERROR_UNSUPPORTED;
if (f->NumCoders == 1)
{
if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0)
return SZ_ERROR_UNSUPPORTED;
return SZ_OK;
}
#ifndef _7Z_NO_METHODS_FILTERS
if (f->NumCoders == 2)
{
const CSzCoderInfo *c = &f->Coders[1];
if (
/* c->MethodID > (UInt32)0xFFFFFFFF || */
c->NumStreams != 1
|| f->NumPackStreams != 1
|| f->PackStreams[0] != 0
|| f->NumBonds != 1
|| f->Bonds[0].InIndex != 1
|| f->Bonds[0].OutIndex != 0)
return SZ_ERROR_UNSUPPORTED;
switch ((UInt32)c->MethodID)
{
case k_Delta:
case k_BCJ:
case k_PPC:
case k_IA64:
case k_SPARC:
case k_ARM:
case k_ARMT:
break;
default:
return SZ_ERROR_UNSUPPORTED;
}
return SZ_OK;
}
#endif
if (f->NumCoders == 4)
{
if (!IS_SUPPORTED_CODER(&f->Coders[1])
|| !IS_SUPPORTED_CODER(&f->Coders[2])
|| !IS_BCJ2(&f->Coders[3]))
return SZ_ERROR_UNSUPPORTED;
if (f->NumPackStreams != 4
|| f->PackStreams[0] != 2
|| f->PackStreams[1] != 6
|| f->PackStreams[2] != 1
|| f->PackStreams[3] != 0
|| f->NumBonds != 3
|| f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0
|| f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1
|| f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2)
return SZ_ERROR_UNSUPPORTED;
return SZ_OK;
}
return SZ_ERROR_UNSUPPORTED;
}
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData,
const UInt64 *unpackSizes,
const UInt64 *packPositions,
ILookInStream *inStream, UInt64 startPos,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain,
Byte *tempBuf[])
{
UInt32 ci;
SizeT tempSizes[3] = { 0, 0, 0};
SizeT tempSize3 = 0;
Byte *tempBuf3 = 0;
RINOK(CheckSupportedFolder(folder));
for (ci = 0; ci < folder->NumCoders; ci++)
{
const CSzCoderInfo *coder = &folder->Coders[ci];
if (IS_MAIN_METHOD((UInt32)coder->MethodID))
{
UInt32 si = 0;
UInt64 offset;
UInt64 inSize;
Byte *outBufCur = outBuffer;
SizeT outSizeCur = outSize;
if (folder->NumCoders == 4)
{
UInt32 indices[] = { 3, 2, 0 };
UInt64 unpackSize = unpackSizes[ci];
si = indices[ci];
if (ci < 2)
{
Byte *temp;
outSizeCur = (SizeT)unpackSize;
if (outSizeCur != unpackSize)
return SZ_ERROR_MEM;
temp = (Byte *)ISzAlloc_Alloc(allocMain, outSizeCur);
if (!temp && outSizeCur != 0)
return SZ_ERROR_MEM;
outBufCur = tempBuf[1 - ci] = temp;
tempSizes[1 - ci] = outSizeCur;
}
else if (ci == 2)
{
if (unpackSize > outSize) /* check it */
return SZ_ERROR_PARAM;
tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize);
tempSize3 = outSizeCur = (SizeT)unpackSize;
}
else
return SZ_ERROR_UNSUPPORTED;
}
offset = packPositions[si];
inSize = packPositions[(size_t)si + 1] - offset;
RINOK(LookInStream_SeekTo(inStream, startPos + offset));
if (coder->MethodID == k_Copy)
{
if (inSize != outSizeCur) /* check it */
return SZ_ERROR_DATA;
RINOK(SzDecodeCopy(inSize, inStream, outBufCur));
}
else if (coder->MethodID == k_LZMA)
{
RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
}
#ifndef _7Z_NO_METHOD_LZMA2
else if (coder->MethodID == k_LZMA2)
{
RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
}
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
else if (coder->MethodID == k_PPMD)
{
RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
}
#endif
else
return SZ_ERROR_UNSUPPORTED;
}
else if (coder->MethodID == k_BCJ2)
{
UInt64 offset = packPositions[1];
UInt64 s3Size = packPositions[2] - offset;
if (ci != 3)
return SZ_ERROR_UNSUPPORTED;
tempSizes[2] = (SizeT)s3Size;
if (tempSizes[2] != s3Size)
return SZ_ERROR_MEM;
tempBuf[2] = (Byte *)ISzAlloc_Alloc(allocMain, tempSizes[2]);
if (!tempBuf[2] && tempSizes[2] != 0)
return SZ_ERROR_MEM;
RINOK(LookInStream_SeekTo(inStream, startPos + offset));
RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]));
if ((tempSizes[0] & 3) != 0 ||
(tempSizes[1] & 3) != 0 ||
tempSize3 + tempSizes[0] + tempSizes[1] != outSize)
return SZ_ERROR_DATA;
{
CBcj2Dec p;
p.bufs[0] = tempBuf3; p.lims[0] = tempBuf3 + tempSize3;
p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0];
p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1];
p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2];
p.dest = outBuffer;
p.destLim = outBuffer + outSize;
Bcj2Dec_Init(&p);
RINOK(Bcj2Dec_Decode(&p));
{
unsigned i;
for (i = 0; i < 4; i++)
if (p.bufs[i] != p.lims[i])
return SZ_ERROR_DATA;
if (!Bcj2Dec_IsFinished(&p))
return SZ_ERROR_DATA;
if (p.dest != p.destLim
|| p.state != BCJ2_STREAM_MAIN)
return SZ_ERROR_DATA;
}
}
}
#ifndef _7Z_NO_METHODS_FILTERS
else if (ci == 1)
{
if (coder->MethodID == k_Delta)
{
if (coder->PropsSize != 1)
return SZ_ERROR_UNSUPPORTED;
{
Byte state[DELTA_STATE_SIZE];
Delta_Init(state);
Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
}
}
else
{
if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
switch (coder->MethodID)
{
case k_BCJ:
{
UInt32 state;
x86_Convert_Init(state);
x86_Convert(outBuffer, outSize, 0, &state, 0);
break;
}
CASE_BRA_CONV(PPC)
CASE_BRA_CONV(IA64)
CASE_BRA_CONV(SPARC)
CASE_BRA_CONV(ARM)
CASE_BRA_CONV(ARMT)
default:
return SZ_ERROR_UNSUPPORTED;
}
}
}
#endif
else
return SZ_ERROR_UNSUPPORTED;
}
return SZ_OK;
}
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *inStream, UInt64 startPos,
Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain)
{
SRes res;
CSzFolder folder;
CSzData sd;
const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex];
sd.Data = data;
sd.Size = p->FoCodersOffsets[(size_t)folderIndex + 1] - p->FoCodersOffsets[folderIndex];
res = SzGetNextFolderItem(&folder, &sd);
if (res != SZ_OK)
return res;
if (sd.Size != 0
|| folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex]
|| outSize != SzAr_GetFolderUnpackSize(p, folderIndex))
return SZ_ERROR_FAIL;
{
unsigned i;
Byte *tempBuf[3] = { 0, 0, 0};
res = SzFolder_Decode2(&folder, data,
&p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]],
p->PackPositions + p->FoStartPackStreamIndex[folderIndex],
inStream, startPos,
outBuffer, (SizeT)outSize, allocMain, tempBuf);
for (i = 0; i < 3; i++)
ISzAlloc_Free(allocMain, tempBuf[i]);
if (res == SZ_OK)
if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex))
if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex])
res = SZ_ERROR_CRC;
return res;
}
}

286
bsnes/lzma/7zFile.c Normal file
View File

@ -0,0 +1,286 @@
/* 7zFile.c -- File IO
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zFile.h"
#ifndef USE_WINDOWS_FILE
#ifndef UNDER_CE
#include <errno.h>
#endif
#else
/*
ReadFile and WriteFile functions in Windows have BUG:
If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1)
from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES
(Insufficient system resources exist to complete the requested service).
Probably in some version of Windows there are problems with other sizes:
for 32 MB (maybe also for 16 MB).
And message can be "Network connection was lost"
*/
#define kChunkSizeMax (1 << 22)
#endif
void File_Construct(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
p->handle = INVALID_HANDLE_VALUE;
#else
p->file = NULL;
#endif
}
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
static WRes File_Open(CSzFile *p, const char *name, int writeMode)
{
#ifdef USE_WINDOWS_FILE
p->handle = CreateFileA(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
#else
p->file = fopen(name, writeMode ? "wb+" : "rb");
return (p->file != 0) ? 0 :
#ifdef UNDER_CE
2; /* ENOENT */
#else
errno;
#endif
#endif
}
WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
#endif
#ifdef USE_WINDOWS_FILE
static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
{
p->handle = CreateFileW(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
}
WRes InFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 0); }
WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1); }
#endif
WRes File_Close(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
if (p->handle != INVALID_HANDLE_VALUE)
{
if (!CloseHandle(p->handle))
return GetLastError();
p->handle = INVALID_HANDLE_VALUE;
}
#else
if (p->file != NULL)
{
int res = fclose(p->file);
if (res != 0)
return res;
p->file = NULL;
}
#endif
return 0;
}
WRes File_Read(CSzFile *p, void *data, size_t *size)
{
size_t originalSize = *size;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
*size = 0;
do
{
DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
if (processed == 0)
break;
}
while (originalSize > 0);
return 0;
#else
*size = fread(data, 1, originalSize, p->file);
if (*size == originalSize)
return 0;
return ferror(p->file);
#endif
}
WRes File_Write(CSzFile *p, const void *data, size_t *size)
{
size_t originalSize = *size;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
*size = 0;
do
{
DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
if (processed == 0)
break;
}
while (originalSize > 0);
return 0;
#else
*size = fwrite(data, 1, originalSize, p->file);
if (*size == originalSize)
return 0;
return ferror(p->file);
#endif
}
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
{
#ifdef USE_WINDOWS_FILE
LARGE_INTEGER value;
DWORD moveMethod;
value.LowPart = (DWORD)*pos;
value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
switch (origin)
{
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
case SZ_SEEK_END: moveMethod = FILE_END; break;
default: return ERROR_INVALID_PARAMETER;
}
value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
if (value.LowPart == 0xFFFFFFFF)
{
WRes res = GetLastError();
if (res != NO_ERROR)
return res;
}
*pos = ((Int64)value.HighPart << 32) | value.LowPart;
return 0;
#else
int moveMethod;
int res;
switch (origin)
{
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
case SZ_SEEK_END: moveMethod = SEEK_END; break;
default: return 1;
}
res = fseek(p->file, (long)*pos, moveMethod);
*pos = ftell(p->file);
return res;
#endif
}
WRes File_GetLength(CSzFile *p, UInt64 *length)
{
#ifdef USE_WINDOWS_FILE
DWORD sizeHigh;
DWORD sizeLow = GetFileSize(p->handle, &sizeHigh);
if (sizeLow == 0xFFFFFFFF)
{
DWORD res = GetLastError();
if (res != NO_ERROR)
return res;
}
*length = (((UInt64)sizeHigh) << 32) + sizeLow;
return 0;
#else
long pos = ftell(p->file);
int res = fseek(p->file, 0, SEEK_END);
*length = ftell(p->file);
fseek(p->file, pos, SEEK_SET);
return res;
#endif
}
/* ---------- FileSeqInStream ---------- */
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
}
void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
{
p->vt.Read = FileSeqInStream_Read;
}
/* ---------- FileInStream ---------- */
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
}
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
return File_Seek(&p->file, pos, origin);
}
void FileInStream_CreateVTable(CFileInStream *p)
{
p->vt.Read = FileInStream_Read;
p->vt.Seek = FileInStream_Seek;
}
/* ---------- FileOutStream ---------- */
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
{
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
File_Write(&p->file, data, &size);
return size;
}
void FileOutStream_CreateVTable(CFileOutStream *p)
{
p->vt.Write = FileOutStream_Write;
}

83
bsnes/lzma/7zFile.h Normal file
View File

@ -0,0 +1,83 @@
/* 7zFile.h -- File IO
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_FILE_H
#define __7Z_FILE_H
#ifdef _WIN32
#define USE_WINDOWS_FILE
#endif
#ifdef USE_WINDOWS_FILE
#include <windows.h>
#else
#include <stdio.h>
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
/* ---------- File ---------- */
typedef struct
{
#ifdef USE_WINDOWS_FILE
HANDLE handle;
#else
FILE *file;
#endif
} CSzFile;
void File_Construct(CSzFile *p);
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
WRes InFile_Open(CSzFile *p, const char *name);
WRes OutFile_Open(CSzFile *p, const char *name);
#endif
#ifdef USE_WINDOWS_FILE
WRes InFile_OpenW(CSzFile *p, const WCHAR *name);
WRes OutFile_OpenW(CSzFile *p, const WCHAR *name);
#endif
WRes File_Close(CSzFile *p);
/* reads max(*size, remain file's size) bytes */
WRes File_Read(CSzFile *p, void *data, size_t *size);
/* writes *size bytes */
WRes File_Write(CSzFile *p, const void *data, size_t *size);
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin);
WRes File_GetLength(CSzFile *p, UInt64 *length);
/* ---------- FileInStream ---------- */
typedef struct
{
ISeqInStream vt;
CSzFile file;
} CFileSeqInStream;
void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
typedef struct
{
ISeekInStream vt;
CSzFile file;
} CFileInStream;
void FileInStream_CreateVTable(CFileInStream *p);
typedef struct
{
ISeqOutStream vt;
CSzFile file;
} CFileOutStream;
void FileOutStream_CreateVTable(CFileOutStream *p);
EXTERN_C_END
#endif

176
bsnes/lzma/7zStream.c Normal file
View File

@ -0,0 +1,176 @@
/* 7zStream.c -- 7z Stream functions
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "7zTypes.h"
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
{
while (size != 0)
{
size_t processed = size;
RINOK(ISeqInStream_Read(stream, buf, &processed));
if (processed == 0)
return errorType;
buf = (void *)((Byte *)buf + processed);
size -= processed;
}
return SZ_OK;
}
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size)
{
return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
}
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
{
size_t processed = 1;
RINOK(ISeqInStream_Read(stream, buf, &processed));
return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
}
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
{
Int64 t = offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
}
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size)
{
const void *lookBuf;
if (*size == 0)
return SZ_OK;
RINOK(ILookInStream_Look(stream, &lookBuf, size));
memcpy(buf, lookBuf, *size);
return ILookInStream_Skip(stream, *size);
}
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType)
{
while (size != 0)
{
size_t processed = size;
RINOK(ILookInStream_Read(stream, buf, &processed));
if (processed == 0)
return errorType;
buf = (void *)((Byte *)buf + processed);
size -= processed;
}
return SZ_OK;
}
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size)
{
return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
}
#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt);
static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size)
{
SRes res = SZ_OK;
GET_LookToRead2
size_t size2 = p->size - p->pos;
if (size2 == 0 && *size != 0)
{
p->pos = 0;
p->size = 0;
size2 = p->bufSize;
res = ISeekInStream_Read(p->realStream, p->buf, &size2);
p->size = size2;
}
if (*size > size2)
*size = size2;
*buf = p->buf + p->pos;
return res;
}
static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size)
{
SRes res = SZ_OK;
GET_LookToRead2
size_t size2 = p->size - p->pos;
if (size2 == 0 && *size != 0)
{
p->pos = 0;
p->size = 0;
if (*size > p->bufSize)
*size = p->bufSize;
res = ISeekInStream_Read(p->realStream, p->buf, size);
size2 = p->size = *size;
}
if (*size > size2)
*size = size2;
*buf = p->buf + p->pos;
return res;
}
static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset)
{
GET_LookToRead2
p->pos += offset;
return SZ_OK;
}
static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
{
GET_LookToRead2
size_t rem = p->size - p->pos;
if (rem == 0)
return ISeekInStream_Read(p->realStream, buf, size);
if (rem > *size)
rem = *size;
memcpy(buf, p->buf + p->pos, rem);
p->pos += rem;
*size = rem;
return SZ_OK;
}
static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin)
{
GET_LookToRead2
p->pos = p->size = 0;
return ISeekInStream_Seek(p->realStream, pos, origin);
}
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
{
p->vt.Look = lookahead ?
LookToRead2_Look_Lookahead :
LookToRead2_Look_Exact;
p->vt.Skip = LookToRead2_Skip;
p->vt.Read = LookToRead2_Read;
p->vt.Seek = LookToRead2_Seek;
}
static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt);
return LookInStream_LookRead(p->realStream, buf, size);
}
void SecToLook_CreateVTable(CSecToLook *p)
{
p->vt.Read = SecToLook_Read;
}
static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt);
return ILookInStream_Read(p->realStream, buf, size);
}
void SecToRead_CreateVTable(CSecToRead *p)
{
p->vt.Read = SecToRead_Read;
}

375
bsnes/lzma/7zTypes.h Normal file
View File

@ -0,0 +1,375 @@
/* 7zTypes.h -- Basic types
2018-08-04 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
#endif
#include <stddef.h>
#ifndef EXTERN_C_BEGIN
#ifdef __cplusplus
#define EXTERN_C_BEGIN extern "C" {
#define EXTERN_C_END }
#else
#define EXTERN_C_BEGIN
#define EXTERN_C_END
#endif
#endif
EXTERN_C_BEGIN
#define SZ_OK 0
#define SZ_ERROR_DATA 1
#define SZ_ERROR_MEM 2
#define SZ_ERROR_CRC 3
#define SZ_ERROR_UNSUPPORTED 4
#define SZ_ERROR_PARAM 5
#define SZ_ERROR_INPUT_EOF 6
#define SZ_ERROR_OUTPUT_EOF 7
#define SZ_ERROR_READ 8
#define SZ_ERROR_WRITE 9
#define SZ_ERROR_PROGRESS 10
#define SZ_ERROR_FAIL 11
#define SZ_ERROR_THREAD 12
#define SZ_ERROR_ARCHIVE 16
#define SZ_ERROR_NO_ARCHIVE 17
typedef int SRes;
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
#else
typedef int WRes;
#define MY__FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_WIN32
#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
#endif
#ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG
typedef long Int32;
typedef unsigned long UInt32;
#else
typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64;
typedef unsigned long UInt64;
#else
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n
#else
typedef long long int Int64;
typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL
#endif
#endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T
typedef UInt32 SizeT;
#else
typedef size_t SizeT;
#endif
typedef int BoolInt;
/* typedef BoolInt Bool; */
#define True 1
#define False 0
#ifdef _WIN32
#define MY_STD_CALL __stdcall
#else
#define MY_STD_CALL
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline)
#else
#define MY_NO_INLINE
#endif
#define MY_FORCE_INLINE __forceinline
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
#else
#define MY_NO_INLINE
#define MY_FORCE_INLINE
#define MY_CDECL
#define MY_FAST_CALL
/* inline keyword : for C++ / C99 */
/* GCC, clang: */
/*
#if defined (__GNUC__) && (__GNUC__ >= 4)
#define MY_FORCE_INLINE __attribute__((always_inline))
#define MY_NO_INLINE __attribute__((noinline))
#endif
*/
#endif
/* The following interfaces use first parameter as pointer to structure */
typedef struct IByteIn IByteIn;
struct IByteIn
{
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
};
#define IByteIn_Read(p) (p)->Read(p)
typedef struct IByteOut IByteOut;
struct IByteOut
{
void (*Write)(const IByteOut *p, Byte b);
};
#define IByteOut_Write(p, b) (p)->Write(p, b)
typedef struct ISeqInStream ISeqInStream;
struct ISeqInStream
{
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */
};
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
typedef struct ISeqOutStream ISeqOutStream;
struct ISeqOutStream
{
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes.
(result < size) means error */
};
#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
typedef enum
{
SZ_SEEK_SET = 0,
SZ_SEEK_CUR = 1,
SZ_SEEK_END = 2
} ESzSeek;
typedef struct ISeekInStream ISeekInStream;
struct ISeekInStream
{
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
};
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
typedef struct ILookInStream ILookInStream;
struct ILookInStream
{
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */
SRes (*Skip)(const ILookInStream *p, size_t offset);
/* offset must be <= output(*size) of Look */
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
};
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
/* reads via ILookInStream::Read */
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
typedef struct
{
ILookInStream vt;
const ISeekInStream *realStream;
size_t pos;
size_t size; /* it's data size */
/* the following variables must be set outside */
Byte *buf;
size_t bufSize;
} CLookToRead2;
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
typedef struct
{
ISeqInStream vt;
const ILookInStream *realStream;
} CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p);
typedef struct
{
ISeqInStream vt;
const ILookInStream *realStream;
} CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p);
typedef struct ICompressProgress ICompressProgress;
struct ICompressProgress
{
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */
};
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
typedef struct ISzAlloc ISzAlloc;
typedef const ISzAlloc * ISzAllocPtr;
struct ISzAlloc
{
void *(*Alloc)(ISzAllocPtr p, size_t size);
void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
};
#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
#define ISzAlloc_Free(p, a) (p)->Free(p, a)
/* deprecated */
#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
#ifndef MY_offsetof
#ifdef offsetof
#define MY_offsetof(type, m) offsetof(type, m)
/*
#define MY_offsetof(type, m) FIELD_OFFSET(type, m)
*/
#else
#define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
#endif
#endif
#ifndef MY_container_of
/*
#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/
/*
GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
GCC 3.4.4 : classes with constructor
GCC 4.8.1 : classes with non-public variable members"
*/
#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
*/
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/*
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
*/
#ifdef _WIN32
#define CHAR_PATH_SEPARATOR '\\'
#define WCHAR_PATH_SEPARATOR L'\\'
#define STRING_PATH_SEPARATOR "\\"
#define WSTRING_PATH_SEPARATOR L"\\"
#else
#define CHAR_PATH_SEPARATOR '/'
#define WCHAR_PATH_SEPARATOR L'/'
#define STRING_PATH_SEPARATOR "/"
#define WSTRING_PATH_SEPARATOR L"/"
#endif
EXTERN_C_END
#endif

27
bsnes/lzma/7zVersion.h Normal file
View File

@ -0,0 +1,27 @@
#define MY_VER_MAJOR 19
#define MY_VER_MINOR 00
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "19.00"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
#define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")"
#else
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2019-02-21"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR
#else
#define MY_COPYRIGHT MY_COPYRIGHT_PD
#endif
#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE
#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE

55
bsnes/lzma/7zVersion.rc Normal file
View File

@ -0,0 +1,55 @@
#define MY_VS_FFI_FILEFLAGSMASK 0x0000003FL
#define MY_VOS_NT_WINDOWS32 0x00040004L
#define MY_VOS_CE_WINDOWS32 0x00050004L
#define MY_VFT_APP 0x00000001L
#define MY_VFT_DLL 0x00000002L
// #include <WinVer.h>
#ifndef MY_VERSION
#include "7zVersion.h"
#endif
#define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0
#ifdef DEBUG
#define DBG_FL VS_FF_DEBUG
#else
#define DBG_FL 0
#endif
#define MY_VERSION_INFO(fileType, descr, intName, origName) \
LANGUAGE 9, 1 \
1 VERSIONINFO \
FILEVERSION MY_VER \
PRODUCTVERSION MY_VER \
FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \
FILEFLAGS DBG_FL \
FILEOS MY_VOS_NT_WINDOWS32 \
FILETYPE fileType \
FILESUBTYPE 0x0L \
BEGIN \
BLOCK "StringFileInfo" \
BEGIN \
BLOCK "040904b0" \
BEGIN \
VALUE "CompanyName", "Igor Pavlov" \
VALUE "FileDescription", descr \
VALUE "FileVersion", MY_VERSION \
VALUE "InternalName", intName \
VALUE "LegalCopyright", MY_COPYRIGHT \
VALUE "OriginalFilename", origName \
VALUE "ProductName", "7-Zip" \
VALUE "ProductVersion", MY_VERSION \
END \
END \
BLOCK "VarFileInfo" \
BEGIN \
VALUE "Translation", 0x409, 1200 \
END \
END
#define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe")
#define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll")

306
bsnes/lzma/Aes.c Normal file
View File

@ -0,0 +1,306 @@
/* Aes.c -- AES encryption / decryption
2017-01-24 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Aes.h"
#include "CpuArch.h"
static UInt32 T[256 * 4];
static const Byte Sbox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
AES_CODE_FUNC g_AesCbc_Encode;
AES_CODE_FUNC g_AesCbc_Decode;
AES_CODE_FUNC g_AesCtr_Code;
static UInt32 D[256 * 4];
static Byte InvS[256];
static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
#define gb0(x) ( (x) & 0xFF)
#define gb1(x) (((x) >> ( 8)) & 0xFF)
#define gb2(x) (((x) >> (16)) & 0xFF)
#define gb3(x) (((x) >> (24)))
#define gb(n, x) gb ## n(x)
#define TT(x) (T + (x << 8))
#define DD(x) (D + (x << 8))
void AesGenTables(void)
{
unsigned i;
for (i = 0; i < 256; i++)
InvS[Sbox[i]] = (Byte)i;
for (i = 0; i < 256; i++)
{
{
UInt32 a1 = Sbox[i];
UInt32 a2 = xtime(a1);
UInt32 a3 = a2 ^ a1;
TT(0)[i] = Ui32(a2, a1, a1, a3);
TT(1)[i] = Ui32(a3, a2, a1, a1);
TT(2)[i] = Ui32(a1, a3, a2, a1);
TT(3)[i] = Ui32(a1, a1, a3, a2);
}
{
UInt32 a1 = InvS[i];
UInt32 a2 = xtime(a1);
UInt32 a4 = xtime(a2);
UInt32 a8 = xtime(a4);
UInt32 a9 = a8 ^ a1;
UInt32 aB = a8 ^ a2 ^ a1;
UInt32 aD = a8 ^ a4 ^ a1;
UInt32 aE = a8 ^ a4 ^ a2;
DD(0)[i] = Ui32(aE, a9, aD, aB);
DD(1)[i] = Ui32(aB, aE, a9, aD);
DD(2)[i] = Ui32(aD, aB, aE, a9);
DD(3)[i] = Ui32(a9, aD, aB, aE);
}
}
g_AesCbc_Encode = AesCbc_Encode;
g_AesCbc_Decode = AesCbc_Decode;
g_AesCtr_Code = AesCtr_Code;
#ifdef MY_CPU_X86_OR_AMD64
if (CPU_Is_Aes_Supported())
{
g_AesCbc_Encode = AesCbc_Encode_Intel;
g_AesCbc_Decode = AesCbc_Decode_Intel;
g_AesCtr_Code = AesCtr_Code_Intel;
}
#endif
}
#define HT(i, x, s) TT(x)[gb(x, s[(i + x) & 3])]
#define HT4(m, i, s, p) m[i] = \
HT(i, 0, s) ^ \
HT(i, 1, s) ^ \
HT(i, 2, s) ^ \
HT(i, 3, s) ^ w[p + i]
#define HT16(m, s, p) \
HT4(m, 0, s, p); \
HT4(m, 1, s, p); \
HT4(m, 2, s, p); \
HT4(m, 3, s, p); \
#define FT(i, x) Sbox[gb(x, m[(i + x) & 3])]
#define FT4(i) dest[i] = Ui32(FT(i, 0), FT(i, 1), FT(i, 2), FT(i, 3)) ^ w[i];
#define HD(i, x, s) DD(x)[gb(x, s[(i - x) & 3])]
#define HD4(m, i, s, p) m[i] = \
HD(i, 0, s) ^ \
HD(i, 1, s) ^ \
HD(i, 2, s) ^ \
HD(i, 3, s) ^ w[p + i];
#define HD16(m, s, p) \
HD4(m, 0, s, p); \
HD4(m, 1, s, p); \
HD4(m, 2, s, p); \
HD4(m, 3, s, p); \
#define FD(i, x) InvS[gb(x, m[(i - x) & 3])]
#define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i];
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
{
unsigned i, wSize;
wSize = keySize + 28;
keySize /= 4;
w[0] = ((UInt32)keySize / 2) + 3;
w += 4;
for (i = 0; i < keySize; i++, key += 4)
w[i] = GetUi32(key);
for (; i < wSize; i++)
{
UInt32 t = w[(size_t)i - 1];
unsigned rem = i % keySize;
if (rem == 0)
t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
else if (keySize > 6 && rem == 4)
t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
w[i] = w[i - keySize] ^ t;
}
}
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
{
unsigned i, num;
Aes_SetKey_Enc(w, key, keySize);
num = keySize + 20;
w += 8;
for (i = 0; i < num; i++)
{
UInt32 r = w[i];
w[i] =
DD(0)[Sbox[gb0(r)]] ^
DD(1)[Sbox[gb1(r)]] ^
DD(2)[Sbox[gb2(r)]] ^
DD(3)[Sbox[gb3(r)]];
}
}
/* Aes_Encode and Aes_Decode functions work with little-endian words.
src and dest are pointers to 4 UInt32 words.
src and dest can point to same block */
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
UInt32 m[4];
UInt32 numRounds2 = w[0];
w += 4;
s[0] = src[0] ^ w[0];
s[1] = src[1] ^ w[1];
s[2] = src[2] ^ w[2];
s[3] = src[3] ^ w[3];
w += 4;
for (;;)
{
HT16(m, s, 0);
if (--numRounds2 == 0)
break;
HT16(s, m, 4);
w += 8;
}
w += 4;
FT4(0); FT4(1); FT4(2); FT4(3);
}
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
UInt32 m[4];
UInt32 numRounds2 = w[0];
w += 4 + numRounds2 * 8;
s[0] = src[0] ^ w[0];
s[1] = src[1] ^ w[1];
s[2] = src[2] ^ w[2];
s[3] = src[3] ^ w[3];
for (;;)
{
w -= 8;
HD16(m, s, 4);
if (--numRounds2 == 0)
break;
HD16(s, m, 0);
}
FD4(0); FD4(1); FD4(2); FD4(3);
}
void AesCbc_Init(UInt32 *p, const Byte *iv)
{
unsigned i;
for (i = 0; i < 4; i++)
p[i] = GetUi32(iv + i * 4);
}
void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
{
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
{
p[0] ^= GetUi32(data);
p[1] ^= GetUi32(data + 4);
p[2] ^= GetUi32(data + 8);
p[3] ^= GetUi32(data + 12);
Aes_Encode(p + 4, p, p);
SetUi32(data, p[0]);
SetUi32(data + 4, p[1]);
SetUi32(data + 8, p[2]);
SetUi32(data + 12, p[3]);
}
}
void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
{
UInt32 in[4], out[4];
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
{
in[0] = GetUi32(data);
in[1] = GetUi32(data + 4);
in[2] = GetUi32(data + 8);
in[3] = GetUi32(data + 12);
Aes_Decode(p + 4, out, in);
SetUi32(data, p[0] ^ out[0]);
SetUi32(data + 4, p[1] ^ out[1]);
SetUi32(data + 8, p[2] ^ out[2]);
SetUi32(data + 12, p[3] ^ out[3]);
p[0] = in[0];
p[1] = in[1];
p[2] = in[2];
p[3] = in[3];
}
}
void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
{
for (; numBlocks != 0; numBlocks--)
{
UInt32 temp[4];
unsigned i;
if (++p[0] == 0)
p[1]++;
Aes_Encode(p + 4, temp, p);
for (i = 0; i < 4; i++, data += 4)
{
UInt32 t = temp[i];
#ifdef MY_CPU_LE_UNALIGN
*((UInt32 *)data) ^= t;
#else
data[0] ^= (t & 0xFF);
data[1] ^= ((t >> 8) & 0xFF);
data[2] ^= ((t >> 16) & 0xFF);
data[3] ^= ((t >> 24));
#endif
}
}
}

38
bsnes/lzma/Aes.h Normal file
View File

@ -0,0 +1,38 @@
/* Aes.h -- AES encryption / decryption
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __AES_H
#define __AES_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define AES_BLOCK_SIZE 16
/* Call AesGenTables one time before other AES functions */
void AesGenTables(void);
/* UInt32 pointers must be 16-byte aligned */
/* 16-byte (4 * 32-bit words) blocks: 1 (IV) + 1 (keyMode) + 15 (AES-256 roundKeys) */
#define AES_NUM_IVMRK_WORDS ((1 + 1 + 15) * 4)
/* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */
/* keySize = 16 or 24 or 32 (bytes) */
typedef void (MY_FAST_CALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize);
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize);
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize);
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
/* data - 16-byte aligned pointer to data */
/* numBlocks - the number of 16-byte blocks in data array */
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCbc_Decode;
extern AES_CODE_FUNC g_AesCtr_Code;
EXTERN_C_END
#endif

184
bsnes/lzma/AesOpt.c Normal file
View File

@ -0,0 +1,184 @@
/* AesOpt.c -- Intel's AES
2017-06-08 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
#define USE_INTEL_AES
#endif
#endif
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i m = *p;
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p + 3;
m = _mm_xor_si128(m, *data);
m = _mm_xor_si128(m, p[2]);
do
{
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenc_si128(m, w[1]);
w += 2;
}
while (--numRounds2 != 0);
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenclast_si128(m, w[1]);
*data = m;
}
*p = m;
}
#define NUM_WAYS 3
#define AES_OP_W(op, n) { \
const __m128i t = w[n]; \
m0 = op(m0, t); \
m1 = op(m1, t); \
m2 = op(m2, t); \
}
#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i iv = *p;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const __m128i *w = p + numRounds2 * 2;
__m128i m0, m1, m2;
{
const __m128i t = w[2];
m0 = _mm_xor_si128(t, data[0]);
m1 = _mm_xor_si128(t, data[1]);
m2 = _mm_xor_si128(t, data[2]);
}
numRounds2--;
do
{
AES_DEC(1)
AES_DEC(0)
w -= 2;
}
while (--numRounds2 != 0);
AES_DEC(1)
AES_DEC_LAST(0)
{
__m128i t;
t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
}
}
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const __m128i *w = p + numRounds2 * 2;
__m128i m = _mm_xor_si128(w[2], *data);
numRounds2--;
do
{
m = _mm_aesdec_si128(m, w[1]);
m = _mm_aesdec_si128(m, w[0]);
w -= 2;
}
while (--numRounds2 != 0);
m = _mm_aesdec_si128(m, w[1]);
m = _mm_aesdeclast_si128(m, w[0]);
m = _mm_xor_si128(m, iv);
iv = *data;
*data = m;
}
*p = iv;
}
void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i ctr = *p;
__m128i one;
one.m128i_u64[0] = 1;
one.m128i_u64[1] = 0;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p;
__m128i m0, m1, m2;
{
const __m128i t = w[2];
ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
}
w += 3;
do
{
AES_ENC(0)
AES_ENC(1)
w += 2;
}
while (--numRounds2 != 0);
AES_ENC(0)
AES_ENC_LAST(1)
data[0] = _mm_xor_si128(data[0], m0);
data[1] = _mm_xor_si128(data[1], m1);
data[2] = _mm_xor_si128(data[2], m2);
}
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p;
__m128i m;
ctr = _mm_add_epi64(ctr, one);
m = _mm_xor_si128(ctr, p[2]);
w += 3;
do
{
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenc_si128(m, w[1]);
w += 2;
}
while (--numRounds2 != 0);
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenclast_si128(m, w[1]);
*data = _mm_xor_si128(*data, m);
}
*p = ctr;
}
#else
void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Encode(p, data, numBlocks);
}
void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Decode(p, data, numBlocks);
}
void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCtr_Code(p, data, numBlocks);
}
#endif

455
bsnes/lzma/Alloc.c Normal file
View File

@ -0,0 +1,455 @@
/* Alloc.c -- Memory allocation functions
2018-04-27 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32
#include <windows.h>
#endif
#include <stdlib.h>
#include "Alloc.h"
/* #define _SZ_ALLOC_DEBUG */
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG
#include <stdio.h>
int g_allocCount = 0;
int g_allocCountMid = 0;
int g_allocCountBig = 0;
#define CONVERT_INT_TO_STR(charType, tempSize) \
unsigned char temp[tempSize]; unsigned i = 0; \
while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
*s++ = (charType)('0' + (unsigned)val); \
while (i != 0) { i--; *s++ = temp[i]; } \
*s = 0;
static void ConvertUInt64ToString(UInt64 val, char *s)
{
CONVERT_INT_TO_STR(char, 24);
}
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
static void ConvertUInt64ToHex(UInt64 val, char *s)
{
UInt64 v = val;
unsigned i;
for (i = 1;; i++)
{
v >>= 4;
if (v == 0)
break;
}
s[i] = 0;
do
{
unsigned t = (unsigned)(val & 0xF);
val >>= 4;
s[--i] = GET_HEX_CHAR(t);
}
while (i);
}
#define DEBUG_OUT_STREAM stderr
static void Print(const char *s)
{
fputs(s, DEBUG_OUT_STREAM);
}
static void PrintAligned(const char *s, size_t align)
{
size_t len = strlen(s);
for(;;)
{
fputc(' ', DEBUG_OUT_STREAM);
if (len >= align)
break;
++len;
}
Print(s);
}
static void PrintLn()
{
Print("\n");
}
static void PrintHex(UInt64 v, size_t align)
{
char s[32];
ConvertUInt64ToHex(v, s);
PrintAligned(s, align);
}
static void PrintDec(UInt64 v, size_t align)
{
char s[32];
ConvertUInt64ToString(v, s);
PrintAligned(s, align);
}
static void PrintAddr(void *p)
{
PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
}
#define PRINT_ALLOC(name, cnt, size, ptr) \
Print(name " "); \
PrintDec(cnt++, 10); \
PrintHex(size, 10); \
PrintAddr(ptr); \
PrintLn();
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
Print(name " "); \
PrintDec(--cnt, 10); \
PrintAddr(ptr); \
PrintLn(); }
#else
#define PRINT_ALLOC(name, cnt, size, ptr)
#define PRINT_FREE(name, cnt, ptr)
#define Print(s)
#define PrintLn()
#define PrintHex(v, align)
#define PrintDec(v, align)
#define PrintAddr(p)
#endif
void *MyAlloc(size_t size)
{
if (size == 0)
return NULL;
#ifdef _SZ_ALLOC_DEBUG
{
void *p = malloc(size);
PRINT_ALLOC("Alloc ", g_allocCount, size, p);
return p;
}
#else
return malloc(size);
#endif
}
void MyFree(void *address)
{
PRINT_FREE("Free ", g_allocCount, address);
free(address);
}
#ifdef _WIN32
void *MidAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
}
void MidFree(void *address)
{
PRINT_FREE("Free-Mid", g_allocCountMid, address);
if (!address)
return;
VirtualFree(address, 0, MEM_RELEASE);
}
#ifndef MEM_LARGE_PAGES
#undef _7ZIP_LARGE_PAGES
#endif
#ifdef _7ZIP_LARGE_PAGES
SIZE_T g_LargePageSize = 0;
typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
#endif
void SetLargePageSize()
{
#ifdef _7ZIP_LARGE_PAGES
SIZE_T size;
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
if (!largePageMinimum)
return;
size = largePageMinimum();
if (size == 0 || (size & (size - 1)) != 0)
return;
g_LargePageSize = size;
#endif
}
void *BigAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
#ifdef _7ZIP_LARGE_PAGES
{
SIZE_T ps = g_LargePageSize;
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
{
size_t size2;
ps--;
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
if (res)
return res;
}
}
}
#endif
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
}
void BigFree(void *address)
{
PRINT_FREE("Free-Big", g_allocCountBig, address);
if (!address)
return;
VirtualFree(address, 0, MEM_RELEASE);
}
#endif
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
/*
uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/
#ifdef _WIN32
typedef UINT_PTR UIntPtr;
#else
/*
typedef uintptr_t UIntPtr;
*/
typedef ptrdiff_t UIntPtr;
#endif
#define ADJUST_ALLOC_SIZE 0
/*
#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
*/
/*
Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
MyAlloc() can return address that is NOT multiple of sizeof(void *).
*/
/*
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
#define USE_posix_memalign
#endif
/*
This posix_memalign() is for test purposes only.
We also need special Free() function instead of free(),
if this posix_memalign() is used.
*/
/*
static int posix_memalign(void **ptr, size_t align, size_t size)
{
size_t newSize = size + align;
void *p;
void *pAligned;
*ptr = NULL;
if (newSize < size)
return 12; // ENOMEM
p = MyAlloc(newSize);
if (!p)
return 12; // ENOMEM
pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
((void **)pAligned)[-1] = p;
*ptr = pAligned;
return 0;
}
*/
/*
ALLOC_ALIGN_SIZE >= sizeof(void *)
ALLOC_ALIGN_SIZE >= cache_line_size
*/
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
{
#ifndef USE_posix_memalign
void *p;
void *pAligned;
size_t newSize;
UNUSED_VAR(pp);
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
if (newSize < size)
return NULL;
p = MyAlloc(newSize);
if (!p)
return NULL;
pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
Print(" size="); PrintHex(size, 8);
Print(" a_size="); PrintHex(newSize, 8);
Print(" ptr="); PrintAddr(p);
Print(" a_ptr="); PrintAddr(pAligned);
PrintLn();
((void **)pAligned)[-1] = p;
return pAligned;
#else
void *p;
UNUSED_VAR(pp);
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL;
Print(" posix_memalign="); PrintAddr(p);
PrintLn();
return p;
#endif
}
static void SzAlignedFree(ISzAllocPtr pp, void *address)
{
UNUSED_VAR(pp);
#ifndef USE_posix_memalign
if (address)
MyFree(((void **)address)[-1]);
#else
free(address);
#endif
}
const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
/*
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
*/
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
void *adr;
void *pAligned;
size_t newSize;
size_t extra;
size_t alignSize = (size_t)1 << p->numAlignBits;
if (alignSize < sizeof(void *))
alignSize = sizeof(void *);
if (p->offset >= alignSize)
return NULL;
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
extra = p->offset & (sizeof(void *) - 1);
newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
if (newSize < size)
return NULL;
adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
if (!adr)
return NULL;
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
PrintLn();
Print("- Aligned: ");
Print(" size="); PrintHex(size, 8);
Print(" a_size="); PrintHex(newSize, 8);
Print(" ptr="); PrintAddr(adr);
Print(" a_ptr="); PrintAddr(pAligned);
PrintLn();
REAL_BLOCK_PTR_VAR(pAligned) = adr;
return pAligned;
}
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{
if (address)
{
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
PrintLn();
Print("- Aligned Free: ");
PrintLn();
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
}
}
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
{
p->vt.Alloc = AlignOffsetAlloc_Alloc;
p->vt.Free = AlignOffsetAlloc_Free;
}

51
bsnes/lzma/Alloc.h Normal file
View File

@ -0,0 +1,51 @@
/* Alloc.h -- Memory allocation functions
2018-02-19 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
void *MyAlloc(size_t size);
void MyFree(void *address);
#ifdef _WIN32
void SetLargePageSize();
void *MidAlloc(size_t size);
void MidFree(void *address);
void *BigAlloc(size_t size);
void BigFree(void *address);
#else
#define MidAlloc(size) MyAlloc(size)
#define MidFree(address) MyFree(address)
#define BigAlloc(size) MyAlloc(size)
#define BigFree(address) MyFree(address)
#endif
extern const ISzAlloc g_Alloc;
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
extern const ISzAlloc g_AlignedAlloc;
typedef struct
{
ISzAlloc vt;
ISzAllocPtr baseAlloc;
unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
} CAlignOffsetAlloc;
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
EXTERN_C_END
#endif

257
bsnes/lzma/Bcj2.c Normal file
View File

@ -0,0 +1,257 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2018-04-28 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Bcj2.h"
#include "CpuArch.h"
#define CProb UInt16
#define kTopValue ((UInt32)1 << 24)
#define kNumModelBits 11
#define kBitModelTotal (1 << kNumModelBits)
#define kNumMoveBits 5
#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)
#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
void Bcj2Dec_Init(CBcj2Dec *p)
{
unsigned i;
p->state = BCJ2_DEC_STATE_OK;
p->ip = 0;
p->temp[3] = 0;
p->range = 0;
p->code = 0;
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
p->probs[i] = kBitModelTotal >> 1;
}
SRes Bcj2Dec_Decode(CBcj2Dec *p)
{
if (p->range <= 5)
{
p->state = BCJ2_DEC_STATE_OK;
for (; p->range != 5; p->range++)
{
if (p->range == 1 && p->code != 0)
return SZ_ERROR_DATA;
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
{
p->state = BCJ2_STREAM_RC;
return SZ_OK;
}
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
}
if (p->code == 0xFFFFFFFF)
return SZ_ERROR_DATA;
p->range = 0xFFFFFFFF;
}
else if (p->state >= BCJ2_DEC_STATE_ORIG_0)
{
while (p->state <= BCJ2_DEC_STATE_ORIG_3)
{
Byte *dest = p->dest;
if (dest == p->destLim)
return SZ_OK;
*dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];
p->state++;
p->dest = dest + 1;
}
}
/*
if (BCJ2_IS_32BIT_STREAM(p->state))
{
const Byte *cur = p->bufs[p->state];
if (cur == p->lims[p->state])
return SZ_OK;
p->bufs[p->state] = cur + 4;
{
UInt32 val;
Byte *dest;
SizeT rem;
p->ip += 4;
val = GetBe32(cur) - p->ip;
dest = p->dest;
rem = p->destLim - dest;
if (rem < 4)
{
SizeT i;
SetUi32(p->temp, val);
for (i = 0; i < rem; i++)
dest[i] = p->temp[i];
p->dest = dest + rem;
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
return SZ_OK;
}
SetUi32(dest, val);
p->temp[3] = (Byte)(val >> 24);
p->dest = dest + 4;
p->state = BCJ2_DEC_STATE_OK;
}
}
*/
for (;;)
{
if (BCJ2_IS_32BIT_STREAM(p->state))
p->state = BCJ2_DEC_STATE_OK;
else
{
if (p->range < kTopValue)
{
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
{
p->state = BCJ2_STREAM_RC;
return SZ_OK;
}
p->range <<= 8;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
}
{
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim;
Byte *dest;
SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
return SZ_OK;
}
dest = p->dest;
if (num > (SizeT)(p->destLim - dest))
{
num = p->destLim - dest;
if (num == 0)
{
p->state = BCJ2_DEC_STATE_ORIG;
return SZ_OK;
}
}
srcLim = src + num;
if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
*dest = src[0];
else for (;;)
{
Byte b = *src;
*dest = b;
if (b != 0x0F)
{
if ((b & 0xFE) == 0xE8)
break;
dest++;
if (++src != srcLim)
continue;
break;
}
dest++;
if (++src == srcLim)
break;
if ((*src & 0xF0) != 0x80)
continue;
*dest = *src;
break;
}
num = src - p->bufs[BCJ2_STREAM_MAIN];
if (src == srcLim)
{
p->temp[3] = src[-1];
p->bufs[BCJ2_STREAM_MAIN] = src;
p->ip += (UInt32)num;
p->dest += num;
p->state =
p->bufs[BCJ2_STREAM_MAIN] ==
p->lims[BCJ2_STREAM_MAIN] ?
(unsigned)BCJ2_STREAM_MAIN :
(unsigned)BCJ2_DEC_STATE_ORIG;
return SZ_OK;
}
{
UInt32 bound, ttt;
CProb *prob;
Byte b = src[0];
Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);
p->temp[3] = b;
p->bufs[BCJ2_STREAM_MAIN] = src + 1;
num++;
p->ip += (UInt32)num;
p->dest += num;
prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
_IF_BIT_0
{
_UPDATE_0
continue;
}
_UPDATE_1
}
}
}
{
UInt32 val;
unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
const Byte *cur = p->bufs[cj];
Byte *dest;
SizeT rem;
if (cur == p->lims[cj])
{
p->state = cj;
break;
}
val = GetBe32(cur);
p->bufs[cj] = cur + 4;
p->ip += 4;
val -= p->ip;
dest = p->dest;
rem = p->destLim - dest;
if (rem < 4)
{
p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8;
p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8;
p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8;
p->temp[3] = (Byte)val;
p->dest = dest + rem;
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
break;
}
SetUi32(dest, val);
p->temp[3] = (Byte)(val >> 24);
p->dest = dest + 4;
}
}
if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
{
p->range <<= 8;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
}
return SZ_OK;
}

146
bsnes/lzma/Bcj2.h Normal file
View File

@ -0,0 +1,146 @@
/* Bcj2.h -- BCJ2 Converter for x86 code
2014-11-10 : Igor Pavlov : Public domain */
#ifndef __BCJ2_H
#define __BCJ2_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define BCJ2_NUM_STREAMS 4
enum
{
BCJ2_STREAM_MAIN,
BCJ2_STREAM_CALL,
BCJ2_STREAM_JUMP,
BCJ2_STREAM_RC
};
enum
{
BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,
BCJ2_DEC_STATE_ORIG_1,
BCJ2_DEC_STATE_ORIG_2,
BCJ2_DEC_STATE_ORIG_3,
BCJ2_DEC_STATE_ORIG,
BCJ2_DEC_STATE_OK
};
enum
{
BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
BCJ2_ENC_STATE_OK
};
#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)
/*
CBcj2Dec / CBcj2Enc
bufs sizes:
BUF_SIZE(n) = lims[n] - bufs[n]
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:
(BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
(BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
*/
/*
CBcj2Dec:
dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
bufs[BCJ2_STREAM_MAIN] >= dest &&
bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +
BUF_SIZE(BCJ2_STREAM_CALL) +
BUF_SIZE(BCJ2_STREAM_JUMP)
tempReserv = 0 : for first call of Bcj2Dec_Decode
tempReserv = 4 : for any other calls of Bcj2Dec_Decode
overlap with offset = 1 is not allowed
*/
typedef struct
{
const Byte *bufs[BCJ2_NUM_STREAMS];
const Byte *lims[BCJ2_NUM_STREAMS];
Byte *dest;
const Byte *destLim;
unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
UInt32 ip;
Byte temp[4];
UInt32 range;
UInt32 code;
UInt16 probs[2 + 256];
} CBcj2Dec;
void Bcj2Dec_Init(CBcj2Dec *p);
/* Returns: SZ_OK or SZ_ERROR_DATA */
SRes Bcj2Dec_Decode(CBcj2Dec *p);
#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)
typedef enum
{
BCJ2_ENC_FINISH_MODE_CONTINUE,
BCJ2_ENC_FINISH_MODE_END_BLOCK,
BCJ2_ENC_FINISH_MODE_END_STREAM
} EBcj2Enc_FinishMode;
typedef struct
{
Byte *bufs[BCJ2_NUM_STREAMS];
const Byte *lims[BCJ2_NUM_STREAMS];
const Byte *src;
const Byte *srcLim;
unsigned state;
EBcj2Enc_FinishMode finishMode;
Byte prevByte;
Byte cache;
UInt32 range;
UInt64 low;
UInt64 cacheSize;
UInt32 ip;
/* 32-bit ralative offset in JUMP/CALL commands is
- (mod 4 GB) in 32-bit mode
- signed Int32 in 64-bit mode
We use (mod 4 GB) check for fileSize.
Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */
UInt32 fileIp;
UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
UInt32 tempTarget;
unsigned tempPos;
Byte temp[4 * 2];
unsigned flushPos;
UInt16 probs[2 + 256];
} CBcj2Enc;
void Bcj2Enc_Init(CBcj2Enc *p);
void Bcj2Enc_Encode(CBcj2Enc *p);
#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)
#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)
#define BCJ2_RELAT_LIMIT_NUM_BITS 26
#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)
/* limit for CBcj2Enc::fileSize variable */
#define BCJ2_FileSize_MAX ((UInt32)1 << 31)
EXTERN_C_END
#endif

311
bsnes/lzma/Bcj2Enc.c Normal file
View File

@ -0,0 +1,311 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
/* #define SHOW_STAT */
#ifdef SHOW_STAT
#include <stdio.h>
#define PRF(x) x
#else
#define PRF(x)
#endif
#include <string.h>
#include "Bcj2.h"
#include "CpuArch.h"
#define CProb UInt16
#define kTopValue ((UInt32)1 << 24)
#define kNumModelBits 11
#define kBitModelTotal (1 << kNumModelBits)
#define kNumMoveBits 5
void Bcj2Enc_Init(CBcj2Enc *p)
{
unsigned i;
p->state = BCJ2_ENC_STATE_OK;
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
p->prevByte = 0;
p->cache = 0;
p->range = 0xFFFFFFFF;
p->low = 0;
p->cacheSize = 1;
p->ip = 0;
p->fileIp = 0;
p->fileSize = 0;
p->relatLimit = BCJ2_RELAT_LIMIT;
p->tempPos = 0;
p->flushPos = 0;
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
p->probs[i] = kBitModelTotal >> 1;
}
static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
{
if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
{
Byte *buf = p->bufs[BCJ2_STREAM_RC];
do
{
if (buf == p->lims[BCJ2_STREAM_RC])
{
p->state = BCJ2_STREAM_RC;
p->bufs[BCJ2_STREAM_RC] = buf;
return True;
}
*buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));
p->cache = 0xFF;
}
while (--p->cacheSize);
p->bufs[BCJ2_STREAM_RC] = buf;
p->cache = (Byte)((UInt32)p->low >> 24);
}
p->cacheSize++;
p->low = (UInt32)p->low << 8;
return False;
}
static void Bcj2Enc_Encode_2(CBcj2Enc *p)
{
if (BCJ2_IS_32BIT_STREAM(p->state))
{
Byte *cur = p->bufs[p->state];
if (cur == p->lims[p->state])
return;
SetBe32(cur, p->tempTarget);
p->bufs[p->state] = cur + 4;
}
p->state = BCJ2_ENC_STATE_ORIG;
for (;;)
{
if (p->range < kTopValue)
{
if (RangeEnc_ShiftLow(p))
return;
p->range <<= 8;
}
{
{
const Byte *src = p->src;
const Byte *srcLim;
Byte *dest;
SizeT num = p->srcLim - src;
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{
if (num <= 4)
return;
num -= 4;
}
else if (num == 0)
break;
dest = p->bufs[BCJ2_STREAM_MAIN];
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
{
num = p->lims[BCJ2_STREAM_MAIN] - dest;
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
return;
}
}
srcLim = src + num;
if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)
*dest = src[0];
else for (;;)
{
Byte b = *src;
*dest = b;
if (b != 0x0F)
{
if ((b & 0xFE) == 0xE8)
break;
dest++;
if (++src != srcLim)
continue;
break;
}
dest++;
if (++src == srcLim)
break;
if ((*src & 0xF0) != 0x80)
continue;
*dest = *src;
break;
}
num = src - p->src;
if (src == srcLim)
{
p->prevByte = src[-1];
p->bufs[BCJ2_STREAM_MAIN] = dest;
p->src = src;
p->ip += (UInt32)num;
continue;
}
{
Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
BoolInt needConvert;
p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
p->ip += (UInt32)num + 1;
src++;
needConvert = False;
if ((SizeT)(p->srcLim - src) >= 4)
{
UInt32 relatVal = GetUi32(src);
if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)
&& ((relatVal + p->relatLimit) >> 1) < p->relatLimit)
needConvert = True;
}
{
UInt32 bound;
unsigned ttt;
Byte b = src[-1];
CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
ttt = *prob;
bound = (p->range >> kNumModelBits) * ttt;
if (!needConvert)
{
p->range = bound;
*prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
p->src = src;
p->prevByte = b;
continue;
}
p->low += bound;
p->range -= bound;
*prob = (CProb)(ttt - (ttt >> kNumMoveBits));
{
UInt32 relatVal = GetUi32(src);
UInt32 absVal;
p->ip += 4;
absVal = p->ip + relatVal;
p->prevByte = src[3];
src += 4;
p->src = src;
{
unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
Byte *cur = p->bufs[cj];
if (cur == p->lims[cj])
{
p->state = cj;
p->tempTarget = absVal;
return;
}
SetBe32(cur, absVal);
p->bufs[cj] = cur + 4;
}
}
}
}
}
}
}
if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
return;
for (; p->flushPos < 5; p->flushPos++)
if (RangeEnc_ShiftLow(p))
return;
p->state = BCJ2_ENC_STATE_OK;
}
void Bcj2Enc_Encode(CBcj2Enc *p)
{
PRF(printf("\n"));
PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
if (p->tempPos != 0)
{
unsigned extra = 0;
for (;;)
{
const Byte *src = p->src;
const Byte *srcLim = p->srcLim;
EBcj2Enc_FinishMode finishMode = p->finishMode;
p->src = p->temp;
p->srcLim = p->temp + p->tempPos;
if (src != srcLim)
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
Bcj2Enc_Encode_2(p);
{
unsigned num = (unsigned)(p->src - p->temp);
unsigned tempPos = p->tempPos - num;
unsigned i;
p->tempPos = tempPos;
for (i = 0; i < tempPos; i++)
p->temp[i] = p->temp[(size_t)i + num];
p->src = src;
p->srcLim = srcLim;
p->finishMode = finishMode;
if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)
return;
if (extra >= tempPos)
{
p->src = src - tempPos;
p->tempPos = 0;
break;
}
p->temp[tempPos] = src[0];
p->tempPos = tempPos + 1;
p->src = src + 1;
extra++;
}
}
}
PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
Bcj2Enc_Encode_2(p);
if (p->state == BCJ2_ENC_STATE_ORIG)
{
const Byte *src = p->src;
unsigned rem = (unsigned)(p->srcLim - src);
unsigned i;
for (i = 0; i < rem; i++)
p->temp[i] = src[i];
p->tempPos = rem;
p->src = src + rem;
}
}

230
bsnes/lzma/Bra.c Normal file
View File

@ -0,0 +1,230 @@
/* Bra.c -- Converters for RISC code
2017-04-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h"
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)3;
ip += 4;
p = data;
lim = data + size;
if (encoding)
for (;;)
{
for (;;)
{
if (p >= lim)
return p - data;
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v += ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
}
}
for (;;)
{
for (;;)
{
if (p >= lim)
return p - data;
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v -= ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
}
}
}
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)1;
p = data;
lim = data + size - 4;
if (encoding)
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return p - data;
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v += cur;
}
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return p - data;
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v -= cur;
}
/*
SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
SetUi16(p - 2, (UInt16)(v | 0xF800));
*/
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
}
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)3;
ip -= 4;
p = data;
lim = data + size;
for (;;)
{
for (;;)
{
if (p >= lim)
return p - data;
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
break;
}
{
UInt32 v = GetBe32(p - 4);
if (encoding)
v += ip + (UInt32)(p - data);
else
v -= ip + (UInt32)(p - data);
v &= 0x03FFFFFF;
v |= 0x48000000;
SetBe32(p - 4, v);
}
}
}
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)3;
ip -= 4;
p = data;
lim = data + size;
for (;;)
{
for (;;)
{
if (p >= lim)
return p - data;
/*
v = GetBe32(p);
p += 4;
m = v + ((UInt32)5 << 29);
m ^= (UInt32)7 << 29;
m += (UInt32)1 << 22;
if ((m & ((UInt32)0x1FF << 23)) == 0)
break;
*/
p += 4;
if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
(p[-4] == 0x7F && (p[-3] >= 0xC0)))
break;
}
{
UInt32 v = GetBe32(p - 4);
v <<= 2;
if (encoding)
v += ip + (UInt32)(p - data);
else
v -= ip + (UInt32)(p - data);
v &= 0x01FFFFFF;
v -= (UInt32)1 << 24;
v ^= 0xFF000000;
v >>= 2;
v |= 0x40000000;
SetBe32(p - 4, v);
}
}
}

64
bsnes/lzma/Bra.h Normal file
View File

@ -0,0 +1,64 @@
/* Bra.h -- Branch converters for executables
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __BRA_H
#define __BRA_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
These functions convert relative addresses to absolute addresses
in CALL instructions to increase the compression ratio.
In:
data - data buffer
size - size of data
ip - current virtual Instruction Pinter (IP) value
state - state variable for x86 converter
encoding - 0 (for decoding), 1 (for encoding)
Out:
state - state variable for x86 converter
Returns:
The number of processed bytes. If you call these functions with multiple calls,
you must start next call with first byte after block of processed bytes.
Type Endian Alignment LookAhead
x86 little 1 4
ARMT little 2 2
ARM little 4 0
PPC big 4 0
SPARC big 4 0
IA64 little 16 0
size must be >= Alignment + LookAhead, if it's not last block.
If (size < Alignment + LookAhead), converter returns 0.
Example:
UInt32 ip = 0;
for ()
{
; size must be >= Alignment + LookAhead, if it's not last block
SizeT processed = Convert(data, size, ip, 1);
data += processed;
size -= processed;
ip += processed;
}
*/
#define x86_Convert_Init(state) { state = 0; }
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
EXTERN_C_END
#endif

82
bsnes/lzma/Bra86.c Normal file
View File

@ -0,0 +1,82 @@
/* Bra86.c -- Converter for x86 code (BCJ)
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Bra.h"
#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
{
SizeT pos = 0;
UInt32 mask = *state & 7;
if (size < 5)
return 0;
size -= 4;
ip += 5;
for (;;)
{
Byte *p = data + pos;
const Byte *limit = data + size;
for (; p < limit; p++)
if ((*p & 0xFE) == 0xE8)
break;
{
SizeT d = (SizeT)(p - data - pos);
pos = (SizeT)(p - data);
if (p >= limit)
{
*state = (d > 2 ? 0 : mask >> (unsigned)d);
return pos;
}
if (d > 2)
mask = 0;
else
{
mask >>= (unsigned)d;
if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
{
mask = (mask >> 1) | 4;
pos++;
continue;
}
}
}
if (Test86MSByte(p[4]))
{
UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
UInt32 cur = ip + (UInt32)pos;
pos += 5;
if (encoding)
v += cur;
else
v -= cur;
if (mask != 0)
{
unsigned sh = (mask & 6) << 2;
if (Test86MSByte((Byte)(v >> sh)))
{
v ^= (((UInt32)0x100 << sh) - 1);
if (encoding)
v += cur;
else
v -= cur;
}
mask = 0;
}
p[1] = (Byte)v;
p[2] = (Byte)(v >> 8);
p[3] = (Byte)(v >> 16);
p[4] = (Byte)(0 - ((v >> 24) & 1));
}
else
{
mask = (mask >> 1) | 4;
pos++;
}
}
}

53
bsnes/lzma/BraIA64.c Normal file
View File

@ -0,0 +1,53 @@
/* BraIA64.c -- Converter for IA-64 code
2017-01-26 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h"
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
SizeT i;
if (size < 16)
return 0;
size -= 16;
i = 0;
do
{
unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
if (m)
{
m++;
do
{
Byte *p = data + (i + (size_t)m * 5 - 8);
if (((p[3] >> m) & 15) == 5
&& (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
{
unsigned raw = GetUi32(p);
unsigned v = raw >> m;
v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
v <<= 4;
if (encoding)
v += ip + (UInt32)i;
else
v -= ip + (UInt32)i;
v >>= 4;
v &= 0x1FFFFF;
v += 0x700000;
v &= 0x8FFFFF;
raw &= ~((UInt32)0x8FFFFF << m);
raw |= (v << m);
SetUi32(p, raw);
}
}
while (++m <= 4);
}
i += 16;
}
while (i <= size);
return i;
}

33
bsnes/lzma/Compiler.h Normal file
View File

@ -0,0 +1,33 @@
/* Compiler.h
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
#ifdef _MSC_VER
#ifdef UNDER_CE
#define RPC_NO_WINDOWS_H
/* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif
#if _MSC_VER >= 1300
#pragma warning(disable : 4996) // This function or variable may be unsafe
#else
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#endif
#define UNUSED_VAR(x) (void)x;
/* #define UNUSED_VAR(x) x=x; */
#endif

218
bsnes/lzma/CpuArch.c Normal file
View File

@ -0,0 +1,218 @@
/* CpuArch.c -- CPU specific code
2018-02-18: Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
#define USE_ASM
#endif
#if !defined(USE_ASM) && _MSC_VER >= 1500
#include <intrin.h>
#endif
#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
static UInt32 CheckFlag(UInt32 flag)
{
#ifdef _MSC_VER
__asm pushfd;
__asm pop EAX;
__asm mov EDX, EAX;
__asm xor EAX, flag;
__asm push EAX;
__asm popfd;
__asm pushfd;
__asm pop EAX;
__asm xor EAX, EDX;
__asm push EDX;
__asm popfd;
__asm and flag, EAX;
#else
__asm__ __volatile__ (
"pushf\n\t"
"pop %%EAX\n\t"
"movl %%EAX,%%EDX\n\t"
"xorl %0,%%EAX\n\t"
"push %%EAX\n\t"
"popf\n\t"
"pushf\n\t"
"pop %%EAX\n\t"
"xorl %%EDX,%%EAX\n\t"
"push %%EDX\n\t"
"popf\n\t"
"andl %%EAX, %0\n\t":
"=c" (flag) : "c" (flag) :
"%eax", "%edx");
#endif
return flag;
}
#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
#else
#define CHECK_CPUID_IS_SUPPORTED
#endif
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
#ifdef _MSC_VER
UInt32 a2, b2, c2, d2;
__asm xor EBX, EBX;
__asm xor ECX, ECX;
__asm xor EDX, EDX;
__asm mov EAX, function;
__asm cpuid;
__asm mov a2, EAX;
__asm mov b2, EBX;
__asm mov c2, ECX;
__asm mov d2, EDX;
*a = a2;
*b = b2;
*c = c2;
*d = d2;
#else
__asm__ __volatile__ (
#if defined(MY_CPU_AMD64) && defined(__PIC__)
"mov %%rbx, %%rdi;"
"cpuid;"
"xchg %%rbx, %%rdi;"
: "=a" (*a) ,
"=D" (*b) ,
#elif defined(MY_CPU_X86) && defined(__PIC__)
"mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*a) ,
"=D" (*b) ,
#else
"cpuid"
: "=a" (*a) ,
"=b" (*b) ,
#endif
"=c" (*c) ,
"=d" (*d)
: "0" (function)) ;
#endif
#else
int CPUInfo[4];
__cpuid(CPUInfo, function);
*a = CPUInfo[0];
*b = CPUInfo[1];
*c = CPUInfo[2];
*d = CPUInfo[3];
#endif
}
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
{
CHECK_CPUID_IS_SUPPORTED
MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
return True;
}
static const UInt32 kVendors[][3] =
{
{ 0x756E6547, 0x49656E69, 0x6C65746E},
{ 0x68747541, 0x69746E65, 0x444D4163},
{ 0x746E6543, 0x48727561, 0x736C7561}
};
int x86cpuid_GetFirm(const Cx86cpuid *p)
{
unsigned i;
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
{
const UInt32 *v = kVendors[i];
if (v[0] == p->vendor[0] &&
v[1] == p->vendor[1] &&
v[2] == p->vendor[2])
return (int)i;
}
return -1;
}
BoolInt CPU_Is_InOrder()
{
Cx86cpuid p;
int firm;
UInt32 family, model;
if (!x86cpuid_CheckAndRead(&p))
return True;
family = x86cpuid_GetFamily(p.ver);
model = x86cpuid_GetModel(p.ver);
firm = x86cpuid_GetFirm(&p);
switch (firm)
{
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
/* In-Order Atom CPU */
model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
|| model == 0x26 /* 45 nm, Z6xx */
|| model == 0x27 /* 32 nm, Z2460 */
|| model == 0x35 /* 32 nm, Z2760 */
|| model == 0x36 /* 32 nm, N2xxx, D2xxx */
)));
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
}
return True;
}
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi))
return False;
return (vi.dwMajorVersion >= 5);
}
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
#else
#define CHECK_SYS_SSE_SUPPORT
#endif
BoolInt CPU_Is_Aes_Supported()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return False;
return (p.c >> 25) & 1;
}
BoolInt CPU_IsSupported_PageGB()
{
Cx86cpuid cpuid;
if (!x86cpuid_CheckAndRead(&cpuid))
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
if (d[0] < 0x80000001)
return False;
}
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
return (d[3] >> 26) & 1;
}
}
#endif

336
bsnes/lzma/CpuArch.h Normal file
View File

@ -0,0 +1,336 @@
/* CpuArch.h -- CPU specific code
2018-02-18 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
MY_CPU_LE means that CPU is LITTLE ENDIAN.
MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
*/
#if defined(_M_X64) \
|| defined(_M_AMD64) \
|| defined(__x86_64__) \
|| defined(__AMD64__) \
|| defined(__amd64__)
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
#else
#define MY_CPU_NAME "x64"
#endif
#define MY_CPU_64BIT
#endif
#if defined(_M_IX86) \
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
#define MY_CPU_32BIT
#endif
#if defined(_M_ARM64) \
|| defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \
|| defined(__aarch64__)
#define MY_CPU_ARM64
#define MY_CPU_NAME "arm64"
#define MY_CPU_64BIT
#endif
#if defined(_M_ARM) \
|| defined(_M_ARM_NT) \
|| defined(_M_ARMT) \
|| defined(__arm__) \
|| defined(__thumb__) \
|| defined(__ARMEL__) \
|| defined(__ARMEB__) \
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
#define MY_CPU_NAME "arm"
#define MY_CPU_32BIT
#endif
#if defined(_M_IA64) \
|| defined(__ia64__)
#define MY_CPU_IA64
#define MY_CPU_NAME "ia64"
#define MY_CPU_64BIT
#endif
#if defined(__mips64) \
|| defined(__mips64__) \
|| (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
#define MY_CPU_NAME "mips64"
#define MY_CPU_64BIT
#elif defined(__mips__)
#define MY_CPU_NAME "mips"
/* #define MY_CPU_32BIT */
#endif
#if defined(__ppc64__) \
|| defined(__powerpc64__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
#else
#define MY_CPU_NAME "ppc64"
#endif
#define MY_CPU_64BIT
#elif defined(__ppc__) \
|| defined(__powerpc__)
#define MY_CPU_NAME "ppc"
#define MY_CPU_32BIT
#endif
#if defined(__sparc64__)
#define MY_CPU_NAME "sparc64"
#define MY_CPU_64BIT
#elif defined(__sparc__)
#define MY_CPU_NAME "sparc"
/* #define MY_CPU_32BIT */
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64
#endif
#ifdef _WIN32
#ifdef MY_CPU_ARM
#define MY_CPU_ARM_LE
#endif
#ifdef MY_CPU_ARM64
#define MY_CPU_ARM64_LE
#endif
#ifdef _M_IA64
#define MY_CPU_IA64_LE
#endif
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \
|| defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \
|| defined(__THUMBEL__) \
|| defined(__AARCH64EL__) \
|| defined(__MIPSEL__) \
|| defined(__MIPSEL) \
|| defined(_MIPSEL) \
|| defined(__BFIN__) \
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
#define MY_CPU_LE
#endif
#if defined(__BIG_ENDIAN__) \
|| defined(__ARMEB__) \
|| defined(__THUMBEB__) \
|| defined(__AARCH64EB__) \
|| defined(__MIPSEB__) \
|| defined(__MIPSEB) \
|| defined(_MIPSEB) \
|| defined(__m68k__) \
|| defined(__s390__) \
|| defined(__s390x__) \
|| defined(__zarch__) \
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
#define MY_CPU_BE
#endif
#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
#error Stop_Compiling_Bad_Endian
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT
#endif
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
#define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE)
#define MY_CPU_NAME "BE"
#else
/*
#define MY_CPU_NAME ""
*/
#endif
#endif
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) \
|| defined(__ARM_FEATURE_UNALIGNED)
#define MY_CPU_LE_UNALIGN
#endif
#endif
#ifdef MY_CPU_LE_UNALIGN
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
#else
#define GetUi16(p) ( (UInt16) ( \
((const Byte *)(p))[0] | \
((UInt16)((const Byte *)(p))[1] << 8) ))
#define GetUi32(p) ( \
((const Byte *)(p))[0] | \
((UInt32)((const Byte *)(p))[1] << 8) | \
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); \
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
#define MY__has_builtin(x) 0
#endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
#include <stdlib.h>
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
#elif defined(MY_CPU_LE_UNALIGN) && ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
#else
#define GetBe32(p) ( \
((UInt32)((const Byte *)(p))[0] << 24) | \
((UInt32)((const Byte *)(p))[1] << 16) | \
((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] )
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \
_ppp_[2] = (Byte)(_vvv_ >> 8); \
_ppp_[3] = (Byte)_vvv_; }
#endif
#ifndef GetBe16
#define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] ))
#endif
#ifdef MY_CPU_X86_OR_AMD64
typedef struct
{
UInt32 maxFunc;
UInt32 vendor[3];
UInt32 ver;
UInt32 b;
UInt32 c;
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder();
BoolInt CPU_Is_Aes_Supported();
BoolInt CPU_IsSupported_PageGB();
#endif
EXTERN_C_END
#endif

64
bsnes/lzma/Delta.c Normal file
View File

@ -0,0 +1,64 @@
/* Delta.c -- Delta converter
2009-05-26 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Delta.h"
void Delta_Init(Byte *state)
{
unsigned i;
for (i = 0; i < DELTA_STATE_SIZE; i++)
state[i] = 0;
}
static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
{
unsigned i;
for (i = 0; i < size; i++)
dest[i] = src[i];
}
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte buf[DELTA_STATE_SIZE];
unsigned j = 0;
MyMemCpy(buf, state, delta);
{
SizeT i;
for (i = 0; i < size;)
{
for (j = 0; j < delta && i < size; i++, j++)
{
Byte b = data[i];
data[i] = (Byte)(b - buf[j]);
buf[j] = b;
}
}
}
if (j == delta)
j = 0;
MyMemCpy(state, buf + j, delta - j);
MyMemCpy(state + delta - j, buf, j);
}
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte buf[DELTA_STATE_SIZE];
unsigned j = 0;
MyMemCpy(buf, state, delta);
{
SizeT i;
for (i = 0; i < size;)
{
for (j = 0; j < delta && i < size; i++, j++)
{
buf[j] = data[i] = (Byte)(buf[j] + data[i]);
}
}
}
if (j == delta)
j = 0;
MyMemCpy(state, buf + j, delta - j);
MyMemCpy(state + delta - j, buf, j);
}

19
bsnes/lzma/Delta.h Normal file
View File

@ -0,0 +1,19 @@
/* Delta.h -- Delta converter
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __DELTA_H
#define __DELTA_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define DELTA_STATE_SIZE 256
void Delta_Init(Byte *state);
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size);
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size);
EXTERN_C_END
#endif

108
bsnes/lzma/DllSecur.c Normal file
View File

@ -0,0 +1,108 @@
/* DllSecur.c -- DLL loading security
2018-02-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
#include <windows.h>
#include "DllSecur.h"
#ifndef UNDER_CE
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
#define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800
static const char * const g_Dlls =
#ifndef _CONSOLE
"UXTHEME\0"
#endif
"USERENV\0"
"SETUPAPI\0"
"APPHELP\0"
"PROPSYS\0"
"DWMAPI\0"
"CRYPTBASE\0"
"OLEACC\0"
"CLBCATQ\0"
"VERSION\0"
;
#endif
void My_SetDefaultDllDirectories()
{
#ifndef UNDER_CE
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
GetVersionEx(&vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
#endif
}
void LoadSecurityDlls()
{
#ifndef UNDER_CE
wchar_t buf[MAX_PATH + 100];
{
// at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
}
{
unsigned len = GetSystemDirectoryW(buf, MAX_PATH + 2);
if (len == 0 || len > MAX_PATH)
return;
}
{
const char *dll;
unsigned pos = (unsigned)lstrlenW(buf);
if (buf[pos - 1] != '\\')
buf[pos++] = '\\';
for (dll = g_Dlls; dll[0] != 0;)
{
unsigned k = 0;
for (;;)
{
char c = *dll++;
buf[pos + k] = (Byte)c;
k++;
if (c == 0)
break;
}
lstrcatW(buf, L".dll");
LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
}
}
#endif
}
#endif

20
bsnes/lzma/DllSecur.h Normal file
View File

@ -0,0 +1,20 @@
/* DllSecur.h -- DLL loading for security
2018-02-19 : Igor Pavlov : Public domain */
#ifndef __DLL_SECUR_H
#define __DLL_SECUR_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#ifdef _WIN32
void My_SetDefaultDllDirectories();
void LoadSecurityDlls();
#endif
EXTERN_C_END
#endif

1127
bsnes/lzma/LzFind.c Normal file

File diff suppressed because it is too large Load Diff

121
bsnes/lzma/LzFind.h Normal file
View File

@ -0,0 +1,121 @@
/* LzFind.h -- Match finder for LZ algorithms
2017-06-10 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
#include "7zTypes.h"
EXTERN_C_BEGIN
typedef UInt32 CLzRef;
typedef struct _CMatchFinder
{
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
UInt32 streamPos;
UInt32 lenLimit;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
Byte streamEndWasReached;
Byte btMode;
Byte bigHash;
Byte directInput;
UInt32 matchMaxLen;
CLzRef *hash;
CLzRef *son;
UInt32 hashMask;
UInt32 cutValue;
Byte *bufferBase;
ISeqInStream *stream;
UInt32 blockSize;
UInt32 keepSizeBefore;
UInt32 keepSizeAfter;
UInt32 numHashBytes;
size_t directInputRem;
UInt32 historySize;
UInt32 fixedHashSize;
UInt32 hashSizeSum;
SRes result;
UInt32 crc[256];
size_t numRefs;
UInt64 expectedDataSize;
} CMatchFinder;
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
int MatchFinder_NeedMove(CMatchFinder *p);
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p);
/* Conditions:
historySize <= 3 GB
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
*/
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
Conditions:
Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
*/
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
{
Mf_Init_Func Init;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
} IMatchFinder;
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_3(CMatchFinder *p, int readData);
void MatchFinder_Init(CMatchFinder *p);
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
EXTERN_C_END
#endif

853
bsnes/lzma/LzFindMt.c Normal file
View File

@ -0,0 +1,853 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2018-12-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "LzHash.h"
#include "LzFindMt.h"
static void MtSync_Construct(CMtSync *p)
{
p->wasCreated = False;
p->csWasInitialized = False;
p->csWasEntered = False;
Thread_Construct(&p->thread);
Event_Construct(&p->canStart);
Event_Construct(&p->wasStarted);
Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore);
Semaphore_Construct(&p->filledSemaphore);
}
static void MtSync_GetNextBlock(CMtSync *p)
{
if (p->needStart)
{
p->numProcessedBlocks = 1;
p->needStart = False;
p->stopWriting = False;
p->exit = False;
Event_Reset(&p->wasStarted);
Event_Reset(&p->wasStopped);
Event_Set(&p->canStart);
Event_Wait(&p->wasStarted);
// if (mt) MatchFinder_Init_LowHash(mt->MatchFinder);
}
else
{
CriticalSection_Leave(&p->cs);
p->csWasEntered = False;
p->numProcessedBlocks++;
Semaphore_Release1(&p->freeSemaphore);
}
Semaphore_Wait(&p->filledSemaphore);
CriticalSection_Enter(&p->cs);
p->csWasEntered = True;
}
/* MtSync_StopWriting must be called if Writing was started */
static void MtSync_StopWriting(CMtSync *p)
{
UInt32 myNumBlocks = p->numProcessedBlocks;
if (!Thread_WasCreated(&p->thread) || p->needStart)
return;
p->stopWriting = True;
if (p->csWasEntered)
{
CriticalSection_Leave(&p->cs);
p->csWasEntered = False;
}
Semaphore_Release1(&p->freeSemaphore);
Event_Wait(&p->wasStopped);
while (myNumBlocks++ != p->numProcessedBlocks)
{
Semaphore_Wait(&p->filledSemaphore);
Semaphore_Release1(&p->freeSemaphore);
}
p->needStart = True;
}
static void MtSync_Destruct(CMtSync *p)
{
if (Thread_WasCreated(&p->thread))
{
MtSync_StopWriting(p);
p->exit = True;
if (p->needStart)
Event_Set(&p->canStart);
Thread_Wait(&p->thread);
Thread_Close(&p->thread);
}
if (p->csWasInitialized)
{
CriticalSection_Delete(&p->cs);
p->csWasInitialized = False;
}
Event_Close(&p->canStart);
Event_Close(&p->wasStarted);
Event_Close(&p->wasStopped);
Semaphore_Close(&p->freeSemaphore);
Semaphore_Close(&p->filledSemaphore);
p->wasCreated = False;
}
#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
{
if (p->wasCreated)
return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs));
p->csWasInitialized = True;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
p->needStart = True;
RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
p->wasCreated = True;
return SZ_OK;
}
static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
{
SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
if (res != SZ_OK)
MtSync_Destruct(p);
return res;
}
void MtSync_Init(CMtSync *p) { p->needStart = True; }
#define kMtMaxValForNormalize 0xFFFFFFFF
#define DEF_GetHeads2(name, v, action) \
static void GetHeads ## name(const Byte *p, UInt32 pos, \
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
{ action; for (; numHeads != 0; numHeads--) { \
const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } }
#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
static void HashThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->hashSync;
for (;;)
{
UInt32 numProcessedBlocks = 0;
Event_Wait(&p->canStart);
Event_Set(&p->wasStarted);
MatchFinder_Init_HighHash(mt->MatchFinder);
for (;;)
{
if (p->exit)
return;
if (p->stopWriting)
{
p->numProcessedBlocks = numProcessedBlocks;
Event_Set(&p->wasStopped);
break;
}
{
CMatchFinder *mf = mt->MatchFinder;
if (MatchFinder_NeedMove(mf))
{
CriticalSection_Enter(&mt->btSync.cs);
CriticalSection_Enter(&mt->hashSync.cs);
{
const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
ptrdiff_t offset;
MatchFinder_MoveBlock(mf);
offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
mt->pointerToCurPos -= offset;
mt->buffer -= offset;
}
CriticalSection_Leave(&mt->btSync.cs);
CriticalSection_Leave(&mt->hashSync.cs);
continue;
}
Semaphore_Wait(&p->freeSemaphore);
MatchFinder_ReadIfRequired(mf);
if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
{
UInt32 subValue = (mf->pos - mf->historySize - 1);
MatchFinder_ReduceOffsets(mf, subValue);
MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
}
{
UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
UInt32 num = mf->streamPos - mf->pos;
heads[0] = 2;
heads[1] = num;
if (num >= mf->numHashBytes)
{
num = num - mf->numHashBytes + 1;
if (num > kMtHashBlockSize - 2)
num = kMtHashBlockSize - 2;
mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
heads[0] = 2 + num;
}
mf->pos += num;
mf->buffer += num;
}
}
Semaphore_Release1(&p->filledSemaphore);
}
}
}
static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
{
MtSync_GetNextBlock(&p->hashSync);
p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
p->hashNumAvail = p->hashBuf[p->hashBufPos++];
}
#define kEmptyHashValue 0
#define MFMT_GM_INLINE
#ifdef MFMT_GM_INLINE
/*
we use size_t for _cyclicBufferPos instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
MY_NO_INLINE
static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
{
do
{
UInt32 *_distances = ++distances;
UInt32 delta = *hash++;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
UInt32 cutValue = _cutValue;
unsigned maxLen = (unsigned)_maxLen;
/*
if (size > 1)
{
UInt32 delta = *hash;
if (delta < _cyclicBufferSize)
{
UInt32 cyc1 = _cyclicBufferPos + 1;
CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
Byte b = *(cur + 1 - delta);
_distances[0] = pair[0];
_distances[1] = b;
}
}
*/
if (cutValue == 0 || delta >= _cyclicBufferSize)
{
*ptr0 = *ptr1 = kEmptyHashValue;
}
else
for(;;)
{
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
UInt32 pair0 = *pair;
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
while (++len != lenLimit)
if (pb[len] != cur[len])
break;
if (maxLen < len)
{
maxLen = len;
*distances++ = (UInt32)len;
*distances++ = delta - 1;
if (len == lenLimit)
{
UInt32 pair1 = pair[1];
*ptr1 = pair0;
*ptr0 = pair1;
break;
}
}
}
{
UInt32 curMatch = pos - delta;
// delta = pos - *pair;
// delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
if (pb[len] < cur[len])
{
delta = pos - pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = pos - *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
}
}
if (--cutValue == 0 || delta >= _cyclicBufferSize)
{
*ptr0 = *ptr1 = kEmptyHashValue;
break;
}
}
pos++;
_cyclicBufferPos++;
cur++;
{
UInt32 num = (UInt32)(distances - _distances);
_distances[-1] = num;
}
}
while (distances < limit && --size != 0);
*posRes = pos;
return distances;
}
#endif
static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2
distances[1] = p->hashNumAvail;
while (curPos < limit)
{
if (p->hashBufPos == p->hashBufPosLimit)
{
MatchFinderMt_GetNextBlock_Hash(p);
distances[1] = numProcessed + p->hashNumAvail;
if (p->hashNumAvail >= p->numHashBytes)
continue;
distances[0] = curPos + p->hashNumAvail;
distances += curPos;
for (; p->hashNumAvail != 0; p->hashNumAvail--)
*distances++ = 0;
return;
}
{
UInt32 size = p->hashBufPosLimit - p->hashBufPos;
UInt32 lenLimit = p->matchMaxLen;
UInt32 pos = p->pos;
UInt32 cyclicBufferPos = p->cyclicBufferPos;
if (lenLimit >= p->hashNumAvail)
lenLimit = p->hashNumAvail;
{
UInt32 size2 = p->hashNumAvail - lenLimit + 1;
if (size2 < size)
size = size2;
size2 = p->cyclicBufferSize - cyclicBufferPos;
if (size2 < size)
size = size2;
}
#ifndef MFMT_GM_INLINE
while (curPos < limit && size-- != 0)
{
UInt32 *startDistances = distances + curPos;
UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
startDistances + 1, p->numHashBytes - 1) - startDistances);
*startDistances = num - 1;
curPos += num;
cyclicBufferPos++;
pos++;
p->buffer++;
}
#else
{
UInt32 posRes;
curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
distances + limit,
size, &posRes) - distances);
p->hashBufPos += posRes - pos;
cyclicBufferPos += posRes - pos;
p->buffer += posRes - pos;
pos = posRes;
}
#endif
numProcessed += pos - p->pos;
p->hashNumAvail -= pos - p->pos;
p->pos = pos;
if (cyclicBufferPos == p->cyclicBufferSize)
cyclicBufferPos = 0;
p->cyclicBufferPos = cyclicBufferPos;
}
}
distances[0] = curPos;
}
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
{
CMtSync *sync = &p->hashSync;
if (!sync->needStart)
{
CriticalSection_Enter(&sync->cs);
sync->csWasEntered = True;
}
BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
{
UInt32 subValue = p->pos - p->cyclicBufferSize;
MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
p->pos -= subValue;
}
if (!sync->needStart)
{
CriticalSection_Leave(&sync->cs);
sync->csWasEntered = False;
}
}
void BtThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->btSync;
for (;;)
{
UInt32 blockIndex = 0;
Event_Wait(&p->canStart);
Event_Set(&p->wasStarted);
for (;;)
{
if (p->exit)
return;
if (p->stopWriting)
{
p->numProcessedBlocks = blockIndex;
MtSync_StopWriting(&mt->hashSync);
Event_Set(&p->wasStopped);
break;
}
Semaphore_Wait(&p->freeSemaphore);
BtFillBlock(mt, blockIndex++);
Semaphore_Release1(&p->filledSemaphore);
}
}
}
void MatchFinderMt_Construct(CMatchFinderMt *p)
{
p->hashBuf = NULL;
MtSync_Construct(&p->hashSync);
MtSync_Construct(&p->btSync);
}
static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->hashBuf);
p->hashBuf = NULL;
}
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
{
MtSync_Destruct(&p->hashSync);
MtSync_Destruct(&p->btSync);
MatchFinderMt_FreeMem(p, alloc);
}
#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
{
Byte allocaDummy[0x180];
unsigned i = 0;
for (i = 0; i < 16; i++)
allocaDummy[i] = (Byte)0;
if (allocaDummy[0] == 0)
BtThreadFunc((CMatchFinderMt *)p);
return 0;
}
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
{
CMatchFinder *mf = p->MatchFinder;
p->historySize = historySize;
if (kMtBtBlockSize <= matchMaxLen * 4)
return SZ_ERROR_PARAM;
if (!p->hashBuf)
{
p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
if (!p->hashBuf)
return SZ_ERROR_MEM;
p->btBuf = p->hashBuf + kHashBufferSize;
}
keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
keepAddBufferAfter += kMtHashBlockSize;
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM;
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
return SZ_OK;
}
/* Call it after ReleaseStream / SetStream */
static void MatchFinderMt_Init(CMatchFinderMt *p)
{
CMatchFinder *mf = p->MatchFinder;
p->btBufPos =
p->btBufPosLimit = 0;
p->hashBufPos =
p->hashBufPosLimit = 0;
/* Init without data reading. We don't want to read data in this thread */
MatchFinder_Init_3(mf, False);
MatchFinder_Init_LowHash(mf);
p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
p->btNumAvailBytes = 0;
p->lzPos = p->historySize + 1;
p->hash = mf->hash;
p->fixedHashSize = mf->fixedHashSize;
p->crc = mf->crc;
p->son = mf->son;
p->matchMaxLen = mf->matchMaxLen;
p->numHashBytes = mf->numHashBytes;
p->pos = mf->pos;
p->buffer = mf->buffer;
p->cyclicBufferPos = mf->cyclicBufferPos;
p->cyclicBufferSize = mf->cyclicBufferSize;
p->cutValue = mf->cutValue;
}
/* ReleaseStream is required to finish multithreading */
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
{
MtSync_StopWriting(&p->btSync);
/* p->MatchFinder->ReleaseStream(); */
}
static void MatchFinderMt_Normalize(CMatchFinderMt *p)
{
MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
p->lzPos = p->historySize + 1;
}
static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{
UInt32 blockIndex;
MtSync_GetNextBlock(&p->btSync);
blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
p->btBufPosLimit += p->btBuf[p->btBufPos++];
p->btNumAvailBytes = p->btBuf[p->btBufPos++];
if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
MatchFinderMt_Normalize(p);
}
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
{
return p->pointerToCurPos;
}
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
{
GET_NEXT_BLOCK_IF_REQUIRED;
return p->btNumAvailBytes;
}
static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
{
UInt32 h2, curMatch2;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 lzPos = p->lzPos;
MT_HASH2_CALC
curMatch2 = hash[h2];
hash[h2] = lzPos;
if (curMatch2 >= matchMinPos)
if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
{
*distances++ = 2;
*distances++ = lzPos - curMatch2 - 1;
}
return distances;
}
static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
{
UInt32 h2, h3, curMatch2, curMatch3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 lzPos = p->lzPos;
MT_HASH3_CALC
curMatch2 = hash[ h2];
curMatch3 = (hash + kFix3HashSize)[h3];
hash[ h2] = lzPos;
(hash + kFix3HashSize)[h3] = lzPos;
if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
{
distances[1] = lzPos - curMatch2 - 1;
if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
{
distances[0] = 3;
return distances + 2;
}
distances[0] = 2;
distances += 2;
}
if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
{
*distances++ = 3;
*distances++ = lzPos - curMatch3 - 1;
}
return distances;
}
/*
static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
{
UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 lzPos = p->lzPos;
MT_HASH4_CALC
curMatch2 = hash[ h2];
curMatch3 = (hash + kFix3HashSize)[h3];
curMatch4 = (hash + kFix4HashSize)[h4];
hash[ h2] = lzPos;
(hash + kFix3HashSize)[h3] = lzPos;
(hash + kFix4HashSize)[h4] = lzPos;
if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
{
distances[1] = lzPos - curMatch2 - 1;
if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
{
distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
return distances + 2;
}
distances[0] = 2;
distances += 2;
}
if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
{
distances[1] = lzPos - curMatch3 - 1;
if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
{
distances[0] = 4;
return distances + 2;
}
distances[0] = 3;
distances += 2;
}
if (curMatch4 >= matchMinPos)
if (
cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
)
{
*distances++ = 4;
*distances++ = lzPos - curMatch4 - 1;
}
return distances;
}
*/
#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
{
const UInt32 *btBuf = p->btBuf + p->btBufPos;
UInt32 len = *btBuf++;
p->btBufPos += 1 + len;
p->btNumAvailBytes--;
{
UInt32 i;
for (i = 0; i < len; i += 2)
{
UInt32 v0 = btBuf[0];
UInt32 v1 = btBuf[1];
btBuf += 2;
distances[0] = v0;
distances[1] = v1;
distances += 2;
}
}
INCREASE_LZ_POS
return len;
}
static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
{
const UInt32 *btBuf = p->btBuf + p->btBufPos;
UInt32 len = *btBuf++;
p->btBufPos += 1 + len;
if (len == 0)
{
/* change for bt5 ! */
if (p->btNumAvailBytes-- >= 4)
len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
}
else
{
/* Condition: there are matches in btBuf with length < p->numHashBytes */
UInt32 *distances2;
p->btNumAvailBytes--;
distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
do
{
UInt32 v0 = btBuf[0];
UInt32 v1 = btBuf[1];
btBuf += 2;
distances2[0] = v0;
distances2[1] = v1;
distances2 += 2;
}
while ((len -= 2) != 0);
len = (UInt32)(distances2 - (distances));
}
INCREASE_LZ_POS
return len;
}
#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER2_MT { p->btNumAvailBytes--;
SKIP_FOOTER_MT
}
static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(2)
UInt32 h2;
MT_HASH2_CALC
hash[h2] = p->lzPos;
SKIP_FOOTER_MT
}
static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(3)
UInt32 h2, h3;
MT_HASH3_CALC
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
SKIP_FOOTER_MT
}
/*
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(4)
UInt32 h2, h3, h4;
MT_HASH4_CALC
(hash + kFix4HashSize)[h4] =
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
SKIP_FOOTER_MT
}
*/
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
switch (p->MatchFinder->numHashBytes)
{
case 2:
p->GetHeadsFunc = GetHeads2;
p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
break;
case 3:
p->GetHeadsFunc = GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
break;
default:
/* case 4: */
p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
break;
/*
default:
p->GetHeadsFunc = GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
break;
*/
}
}

101
bsnes/lzma/LzFindMt.h Normal file
View File

@ -0,0 +1,101 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
#include "LzFind.h"
#include "Threads.h"
EXTERN_C_BEGIN
#define kMtHashBlockSize (1 << 13)
#define kMtHashNumBlocks (1 << 3)
#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
#define kMtBtBlockSize (1 << 14)
#define kMtBtNumBlocks (1 << 6)
#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
typedef struct _CMtSync
{
BoolInt wasCreated;
BoolInt needStart;
BoolInt exit;
BoolInt stopWriting;
CThread thread;
CAutoResetEvent canStart;
CAutoResetEvent wasStarted;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
BoolInt csWasInitialized;
BoolInt csWasEntered;
CCriticalSection cs;
UInt32 numProcessedBlocks;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
#define kMtCacheLineDummy 128
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct _CMatchFinderMt
{
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
UInt32 btBufPos;
UInt32 btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
UInt32 *hash;
UInt32 fixedHashSize;
UInt32 historySize;
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
/* LZ + BT */
CMtSync btSync;
Byte btDummy[kMtCacheLineDummy];
/* BT */
UInt32 *hashBuf;
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
CLzRef *son;
UInt32 matchMaxLen;
UInt32 numHashBytes;
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be historySize + 1 */
UInt32 cutValue;
/* BT + Hash */
CMtSync hashSync;
/* Byte hashDummy[kMtCacheLineDummy]; */
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
} CMatchFinderMt;
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END
#endif

57
bsnes/lzma/LzHash.h Normal file
View File

@ -0,0 +1,57 @@
/* LzHash.h -- HASH functions for LZ algorithms
2015-04-12 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
#define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
#define HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
#define HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
#define HASH5_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
temp ^= (p->crc[cur[3]] << 5); \
h4 = temp & (kHash4Size - 1); \
hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
#define MT_HASH2_CALC \
h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
#endif

488
bsnes/lzma/Lzma2Dec.c Normal file
View File

@ -0,0 +1,488 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2019-02-02 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
#include "Precomp.h"
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
#include <string.h>
#include "Lzma2Dec.h"
/*
00000000 - End of data
00000001 U U - Uncompressed, reset dic, need reset state and set new prop
00000010 U U - Uncompressed, no reset
100uuuuu U U P P - LZMA, no reset
101uuuuu U U P P - LZMA, reset state
110uuuuu U U P P S - LZMA, reset state + set new prop
111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic
u, U - Unpack Size
P - Pack Size
S - Props
*/
#define LZMA2_CONTROL_COPY_RESET_DIC 1
#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0)
#define LZMA2_LCLP_MAX 4
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
typedef enum
{
LZMA2_STATE_CONTROL,
LZMA2_STATE_UNPACK0,
LZMA2_STATE_UNPACK1,
LZMA2_STATE_PACK0,
LZMA2_STATE_PACK1,
LZMA2_STATE_PROP,
LZMA2_STATE_DATA,
LZMA2_STATE_DATA_CONT,
LZMA2_STATE_FINISHED,
LZMA2_STATE_ERROR
} ELzma2State;
static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
{
UInt32 dicSize;
if (prop > 40)
return SZ_ERROR_UNSUPPORTED;
dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
props[0] = (Byte)LZMA2_LCLP_MAX;
props[1] = (Byte)(dicSize);
props[2] = (Byte)(dicSize >> 8);
props[3] = (Byte)(dicSize >> 16);
props[4] = (Byte)(dicSize >> 24);
return SZ_OK;
}
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
{
Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props));
return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
}
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
{
Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props));
return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
}
void Lzma2Dec_Init(CLzma2Dec *p)
{
p->state = LZMA2_STATE_CONTROL;
p->needInitLevel = 0xE0;
p->isExtraMode = False;
p->unpackSize = 0;
// p->decoder.dicPos = 0; // we can use it instead of full init
LzmaDec_Init(&p->decoder);
}
static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{
case LZMA2_STATE_CONTROL:
p->isExtraMode = False;
p->control = b;
PRF(printf("\n %8X", (unsigned)p->decoder.dicPos));
PRF(printf(" %02X", (unsigned)b));
if (b == 0)
return LZMA2_STATE_FINISHED;
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (b == LZMA2_CONTROL_COPY_RESET_DIC)
p->needInitLevel = 0xC0;
else if (b > 2 || p->needInitLevel == 0xE0)
return LZMA2_STATE_ERROR;
}
else
{
if (b < p->needInitLevel)
return LZMA2_STATE_ERROR;
p->needInitLevel = 0;
p->unpackSize = (UInt32)(b & 0x1F) << 16;
}
return LZMA2_STATE_UNPACK0;
case LZMA2_STATE_UNPACK0:
p->unpackSize |= (UInt32)b << 8;
return LZMA2_STATE_UNPACK1;
case LZMA2_STATE_UNPACK1:
p->unpackSize |= (UInt32)b;
p->unpackSize++;
PRF(printf(" %7u", (unsigned)p->unpackSize));
return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
case LZMA2_STATE_PACK0:
p->packSize = (UInt32)b << 8;
return LZMA2_STATE_PACK1;
case LZMA2_STATE_PACK1:
p->packSize |= (UInt32)b;
p->packSize++;
// if (p->packSize < 5) return LZMA2_STATE_ERROR;
PRF(printf(" %5u", (unsigned)p->packSize));
return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA;
case LZMA2_STATE_PROP:
{
unsigned lc, lp;
if (b >= (9 * 5 * 5))
return LZMA2_STATE_ERROR;
lc = b % 9;
b /= 9;
p->decoder.prop.pb = (Byte)(b / 5);
lp = b % 5;
if (lc + lp > LZMA2_LCLP_MAX)
return LZMA2_STATE_ERROR;
p->decoder.prop.lc = (Byte)lc;
p->decoder.prop.lp = (Byte)lp;
return LZMA2_STATE_DATA;
}
}
return LZMA2_STATE_ERROR;
}
static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
{
memcpy(p->dic + p->dicPos, src, size);
p->dicPos += size;
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)size;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
*srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
while (p->state != LZMA2_STATE_ERROR)
{
SizeT dicPos;
if (p->state == LZMA2_STATE_FINISHED)
{
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
}
dicPos = p->decoder.dicPos;
if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_OK;
}
if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
{
if (*srcLen == inSize)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
(*srcLen)++;
p->state = Lzma2Dec_UpdateState(p, *src++);
if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
break;
continue;
}
{
SizeT inCur = inSize - *srcLen;
SizeT outCur = dicLimit - dicPos;
ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
if (outCur >= p->unpackSize)
{
outCur = (SizeT)p->unpackSize;
curFinishMode = LZMA_FINISH_END;
}
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (inCur == 0)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (p->state == LZMA2_STATE_DATA)
{
BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
LzmaDec_InitDicAndState(&p->decoder, initDic, False);
}
if (inCur > outCur)
inCur = outCur;
if (inCur == 0)
break;
LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur);
src += inCur;
*srcLen += inCur;
p->unpackSize -= (UInt32)inCur;
p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
}
else
{
SRes res;
if (p->state == LZMA2_STATE_DATA)
{
BoolInt initDic = (p->control >= 0xE0);
BoolInt initState = (p->control >= 0xA0);
LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
p->state = LZMA2_STATE_DATA_CONT;
}
if (inCur > p->packSize)
inCur = (SizeT)p->packSize;
res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status);
src += inCur;
*srcLen += inCur;
p->packSize -= (UInt32)inCur;
outCur = p->decoder.dicPos - dicPos;
p->unpackSize -= (UInt32)outCur;
if (res != 0)
break;
if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
{
if (p->packSize == 0)
break;
return SZ_OK;
}
if (inCur == 0 && outCur == 0)
{
if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|| p->unpackSize != 0
|| p->packSize != 0)
break;
p->state = LZMA2_STATE_CONTROL;
}
*status = LZMA_STATUS_NOT_SPECIFIED;
}
}
}
*status = LZMA_STATUS_NOT_SPECIFIED;
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
SizeT outSize,
const Byte *src, SizeT *srcLen,
int checkFinishBlock)
{
SizeT inSize = *srcLen;
*srcLen = 0;
while (p->state != LZMA2_STATE_ERROR)
{
if (p->state == LZMA2_STATE_FINISHED)
return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK;
if (outSize == 0 && !checkFinishBlock)
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
{
if (*srcLen == inSize)
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
(*srcLen)++;
p->state = Lzma2Dec_UpdateState(p, *src++);
if (p->state == LZMA2_STATE_UNPACK0)
{
// if (p->decoder.dicPos != 0)
if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0)
return LZMA2_PARSE_STATUS_NEW_BLOCK;
// if (outSize == 0) return LZMA_STATUS_NOT_FINISHED;
}
// The following code can be commented.
// It's not big problem, if we read additional input bytes.
// It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state.
if (outSize == 0 && p->state != LZMA2_STATE_FINISHED)
{
// checkFinishBlock is true. So we expect that block must be finished,
// We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here
// break;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
}
if (p->state == LZMA2_STATE_DATA)
return LZMA2_PARSE_STATUS_NEW_CHUNK;
continue;
}
if (outSize == 0)
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
{
SizeT inCur = inSize - *srcLen;
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (inCur == 0)
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
if (inCur > p->unpackSize)
inCur = p->unpackSize;
if (inCur > outSize)
inCur = outSize;
p->decoder.dicPos += inCur;
src += inCur;
*srcLen += inCur;
outSize -= inCur;
p->unpackSize -= (UInt32)inCur;
p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
}
else
{
p->isExtraMode = True;
if (inCur == 0)
{
if (p->packSize != 0)
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
}
else if (p->state == LZMA2_STATE_DATA)
{
p->state = LZMA2_STATE_DATA_CONT;
if (*src != 0)
{
// first byte of lzma chunk must be Zero
*srcLen += 1;
p->packSize--;
break;
}
}
if (inCur > p->packSize)
inCur = (SizeT)p->packSize;
src += inCur;
*srcLen += inCur;
p->packSize -= (UInt32)inCur;
if (p->packSize == 0)
{
SizeT rem = outSize;
if (rem > p->unpackSize)
rem = p->unpackSize;
p->decoder.dicPos += rem;
p->unpackSize -= (UInt32)rem;
outSize -= rem;
if (p->unpackSize == 0)
p->state = LZMA2_STATE_CONTROL;
}
}
}
}
p->state = LZMA2_STATE_ERROR;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
}
SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen, inSize = *srcLen;
*srcLen = *destLen = 0;
for (;;)
{
SizeT inCur = inSize, outCur, dicPos;
ELzmaFinishMode curFinishMode;
SRes res;
if (p->decoder.dicPos == p->decoder.dicBufSize)
p->decoder.dicPos = 0;
dicPos = p->decoder.dicPos;
curFinishMode = LZMA_FINISH_ANY;
outCur = p->decoder.dicBufSize - dicPos;
if (outCur >= outSize)
{
outCur = outSize;
curFinishMode = finishMode;
}
res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status);
src += inCur;
inSize -= inCur;
*srcLen += inCur;
outCur = p->decoder.dicPos - dicPos;
memcpy(dest, p->decoder.dic + dicPos, outCur);
dest += outCur;
outSize -= outCur;
*destLen += outCur;
if (res != 0)
return res;
if (outCur == 0 || outSize == 0)
return SZ_OK;
}
}
SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc)
{
CLzma2Dec p;
SRes res;
SizeT outSize = *destLen, inSize = *srcLen;
*destLen = *srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
Lzma2Dec_Construct(&p);
RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
p.decoder.dic = dest;
p.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&p);
*srcLen = inSize;
res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
*destLen = p.decoder.dicPos;
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
res = SZ_ERROR_INPUT_EOF;
Lzma2Dec_FreeProbs(&p, alloc);
return res;
}

120
bsnes/lzma/Lzma2Dec.h Normal file
View File

@ -0,0 +1,120 @@
/* Lzma2Dec.h -- LZMA2 Decoder
2018-02-19 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_H
#define __LZMA2_DEC_H
#include "LzmaDec.h"
EXTERN_C_BEGIN
/* ---------- State Interface ---------- */
typedef struct
{
unsigned state;
Byte control;
Byte needInitLevel;
Byte isExtraMode;
Byte _pad_;
UInt32 packSize;
UInt32 unpackSize;
CLzmaDec decoder;
} CLzma2Dec;
#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
void Lzma2Dec_Init(CLzma2Dec *p);
/*
finishMode:
It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
LZMA_FINISH_ANY - use smallest number of input bytes
LZMA_FINISH_END - read EndOfStream marker after decoding
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_NEEDS_MORE_INPUT
SZ_ERROR_DATA - Data error
*/
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- LZMA2 block and chunk parsing ---------- */
/*
Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data.
It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code:
- LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input.
- LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read.
CLzma2Dec::unpackSize contains unpack size of that chunk
*/
typedef enum
{
/*
LZMA_STATUS_NOT_SPECIFIED // data error
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED //
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK // unused
*/
LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1,
LZMA2_PARSE_STATUS_NEW_CHUNK
} ELzma2ParseStatus;
ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
SizeT outSize, // output size
const Byte *src, SizeT *srcLen,
int checkFinishBlock // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position.
);
/*
LZMA2 parser doesn't decode LZMA chunks, so we must read
full input LZMA chunk to decode some part of LZMA chunk.
Lzma2Dec_GetUnpackExtra() returns the value that shows
max possible number of output bytes that can be output by decoder
at current input positon.
*/
#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0);
/* ---------- One Call Interface ---------- */
/*
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - use smallest number of input bytes
LZMA_FINISH_END - read EndOfStream marker after decoding
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
*/
SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc);
EXTERN_C_END
#endif

1082
bsnes/lzma/Lzma2DecMt.c Normal file

File diff suppressed because it is too large Load Diff

79
bsnes/lzma/Lzma2DecMt.h Normal file
View File

@ -0,0 +1,79 @@
/* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread
2018-02-17 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_MT_H
#define __LZMA2_DEC_MT_H
#include "7zTypes.h"
EXTERN_C_BEGIN
typedef struct
{
size_t inBufSize_ST;
size_t outStep_ST;
#ifndef _7ZIP_ST
unsigned numThreads;
size_t inBufSize_MT;
size_t outBlockMax;
size_t inBlockMax;
#endif
} CLzma2DecMtProps;
/* init to single-thread mode */
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p);
/* ---------- CLzma2DecMtHandle Interface ---------- */
/* Lzma2DecMt_ * functions can return the following exit codes:
SRes:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
// SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzma2DecMtHandle;
CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
void Lzma2DecMt_Destroy(CLzma2DecMtHandle p);
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
Byte prop,
const CLzma2DecMtProps *props,
ISeqOutStream *outStream,
const UInt64 *outDataSize, // NULL means undefined
int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished
// Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
// const Byte *inData, size_t inDataSize,
// out variables:
UInt64 *inProcessed,
int *isMT, /* out: (*isMT == 0), if single thread decoding was used */
// UInt64 *outProcessed,
ICompressProgress *progress);
/* ---------- Read from CLzma2DecMtHandle Interface ---------- */
SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
Byte prop,
const CLzma2DecMtProps *props,
const UInt64 *outDataSize, int finishMode,
ISeqInStream *inStream);
SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
Byte *data, size_t *outSize,
UInt64 *inStreamProcessed);
EXTERN_C_END
#endif

803
bsnes/lzma/Lzma2Enc.c Normal file
View File

@ -0,0 +1,803 @@
/* Lzma2Enc.c -- LZMA2 Encoder
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
/* #define _7ZIP_ST */
#include "Lzma2Enc.h"
#ifndef _7ZIP_ST
#include "MtCoder.h"
#else
#define MTCODER__THREADS_MAX 1
#endif
#define LZMA2_CONTROL_LZMA (1 << 7)
#define LZMA2_CONTROL_COPY_NO_RESET 2
#define LZMA2_CONTROL_COPY_RESET_DIC 1
#define LZMA2_CONTROL_EOF 0
#define LZMA2_LCLP_MAX 4
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
#define LZMA2_PACK_SIZE_MAX (1 << 16)
#define LZMA2_COPY_CHUNK_SIZE LZMA2_PACK_SIZE_MAX
#define LZMA2_UNPACK_SIZE_MAX (1 << 21)
#define LZMA2_KEEP_WINDOW_SIZE LZMA2_UNPACK_SIZE_MAX
#define LZMA2_CHUNK_SIZE_COMPRESSED_MAX ((1 << 16) + 16)
#define PRF(x) /* x */
/* ---------- CLimitedSeqInStream ---------- */
typedef struct
{
ISeqInStream vt;
ISeqInStream *realStream;
UInt64 limit;
UInt64 processed;
int finished;
} CLimitedSeqInStream;
static void LimitedSeqInStream_Init(CLimitedSeqInStream *p)
{
p->limit = (UInt64)(Int64)-1;
p->processed = 0;
p->finished = 0;
}
static SRes LimitedSeqInStream_Read(const ISeqInStream *pp, void *data, size_t *size)
{
CLimitedSeqInStream *p = CONTAINER_FROM_VTBL(pp, CLimitedSeqInStream, vt);
size_t size2 = *size;
SRes res = SZ_OK;
if (p->limit != (UInt64)(Int64)-1)
{
UInt64 rem = p->limit - p->processed;
if (size2 > rem)
size2 = (size_t)rem;
}
if (size2 != 0)
{
res = ISeqInStream_Read(p->realStream, data, &size2);
p->finished = (size2 == 0 ? 1 : 0);
p->processed += size2;
}
*size = size2;
return res;
}
/* ---------- CLzma2EncInt ---------- */
typedef struct
{
CLzmaEncHandle enc;
Byte propsAreSet;
Byte propsByte;
Byte needInitState;
Byte needInitProp;
UInt64 srcPos;
} CLzma2EncInt;
static SRes Lzma2EncInt_InitStream(CLzma2EncInt *p, const CLzma2EncProps *props)
{
if (!p->propsAreSet)
{
SizeT propsSize = LZMA_PROPS_SIZE;
Byte propsEncoded[LZMA_PROPS_SIZE];
RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps));
RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize));
p->propsByte = propsEncoded[0];
p->propsAreSet = True;
}
return SZ_OK;
}
static void Lzma2EncInt_InitBlock(CLzma2EncInt *p)
{
p->srcPos = 0;
p->needInitState = True;
p->needInitProp = True;
}
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
void LzmaEnc_Finish(CLzmaEncHandle pp);
void LzmaEnc_SaveState(CLzmaEncHandle pp);
void LzmaEnc_RestoreState(CLzmaEncHandle pp);
/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp);
*/
static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
size_t *packSizeRes, ISeqOutStream *outStream)
{
size_t packSizeLimit = *packSizeRes;
size_t packSize = packSizeLimit;
UInt32 unpackSize = LZMA2_UNPACK_SIZE_MAX;
unsigned lzHeaderSize = 5 + (p->needInitProp ? 1 : 0);
BoolInt useCopyBlock;
SRes res;
*packSizeRes = 0;
if (packSize < lzHeaderSize)
return SZ_ERROR_OUTPUT_EOF;
packSize -= lzHeaderSize;
LzmaEnc_SaveState(p->enc);
res = LzmaEnc_CodeOneMemBlock(p->enc, p->needInitState,
outBuf + lzHeaderSize, &packSize, LZMA2_PACK_SIZE_MAX, &unpackSize);
PRF(printf("\npackSize = %7d unpackSize = %7d ", packSize, unpackSize));
if (unpackSize == 0)
return res;
if (res == SZ_OK)
useCopyBlock = (packSize + 2 >= unpackSize || packSize > (1 << 16));
else
{
if (res != SZ_ERROR_OUTPUT_EOF)
return res;
res = SZ_OK;
useCopyBlock = True;
}
if (useCopyBlock)
{
size_t destPos = 0;
PRF(printf("################# COPY "));
while (unpackSize > 0)
{
UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE;
if (packSizeLimit - destPos < u + 3)
return SZ_ERROR_OUTPUT_EOF;
outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET);
outBuf[destPos++] = (Byte)((u - 1) >> 8);
outBuf[destPos++] = (Byte)(u - 1);
memcpy(outBuf + destPos, LzmaEnc_GetCurBuf(p->enc) - unpackSize, u);
unpackSize -= u;
destPos += u;
p->srcPos += u;
if (outStream)
{
*packSizeRes += destPos;
if (ISeqOutStream_Write(outStream, outBuf, destPos) != destPos)
return SZ_ERROR_WRITE;
destPos = 0;
}
else
*packSizeRes = destPos;
/* needInitState = True; */
}
LzmaEnc_RestoreState(p->enc);
return SZ_OK;
}
{
size_t destPos = 0;
UInt32 u = unpackSize - 1;
UInt32 pm = (UInt32)(packSize - 1);
unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0);
PRF(printf(" "));
outBuf[destPos++] = (Byte)(LZMA2_CONTROL_LZMA | (mode << 5) | ((u >> 16) & 0x1F));
outBuf[destPos++] = (Byte)(u >> 8);
outBuf[destPos++] = (Byte)u;
outBuf[destPos++] = (Byte)(pm >> 8);
outBuf[destPos++] = (Byte)pm;
if (p->needInitProp)
outBuf[destPos++] = p->propsByte;
p->needInitProp = False;
p->needInitState = False;
destPos += packSize;
p->srcPos += unpackSize;
if (outStream)
if (ISeqOutStream_Write(outStream, outBuf, destPos) != destPos)
return SZ_ERROR_WRITE;
*packSizeRes = destPos;
return SZ_OK;
}
}
/* ---------- Lzma2 Props ---------- */
void Lzma2EncProps_Init(CLzma2EncProps *p)
{
LzmaEncProps_Init(&p->lzmaProps);
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO;
p->numBlockThreads_Reduced = -1;
p->numBlockThreads_Max = -1;
p->numTotalThreads = -1;
}
void Lzma2EncProps_Normalize(CLzma2EncProps *p)
{
UInt64 fileSize;
int t1, t1n, t2, t2r, t3;
{
CLzmaEncProps lzmaProps = p->lzmaProps;
LzmaEncProps_Normalize(&lzmaProps);
t1n = lzmaProps.numThreads;
}
t1 = p->lzmaProps.numThreads;
t2 = p->numBlockThreads_Max;
t3 = p->numTotalThreads;
if (t2 > MTCODER__THREADS_MAX)
t2 = MTCODER__THREADS_MAX;
if (t3 <= 0)
{
if (t2 <= 0)
t2 = 1;
t3 = t1n * t2;
}
else if (t2 <= 0)
{
t2 = t3 / t1n;
if (t2 == 0)
{
t1 = 1;
t2 = t3;
}
if (t2 > MTCODER__THREADS_MAX)
t2 = MTCODER__THREADS_MAX;
}
else if (t1 <= 0)
{
t1 = t3 / t2;
if (t1 == 0)
t1 = 1;
}
else
t3 = t1n * t2;
p->lzmaProps.numThreads = t1;
t2r = t2;
fileSize = p->lzmaProps.reduceSize;
if ( p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
&& p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO
&& (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1))
p->lzmaProps.reduceSize = p->blockSize;
LzmaEncProps_Normalize(&p->lzmaProps);
p->lzmaProps.reduceSize = fileSize;
t1 = p->lzmaProps.numThreads;
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID)
{
t2r = t2 = 1;
t3 = t1;
}
else if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO && t2 <= 1)
{
/* if there is no block multi-threading, we use SOLID block */
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID;
}
else
{
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO)
{
const UInt32 kMinSize = (UInt32)1 << 20;
const UInt32 kMaxSize = (UInt32)1 << 28;
const UInt32 dictSize = p->lzmaProps.dictSize;
UInt64 blockSize = (UInt64)dictSize << 2;
if (blockSize < kMinSize) blockSize = kMinSize;
if (blockSize > kMaxSize) blockSize = kMaxSize;
if (blockSize < dictSize) blockSize = dictSize;
blockSize += (kMinSize - 1);
blockSize &= ~(UInt64)(kMinSize - 1);
p->blockSize = blockSize;
}
if (t2 > 1 && fileSize != (UInt64)(Int64)-1)
{
UInt64 numBlocks = fileSize / p->blockSize;
if (numBlocks * p->blockSize != fileSize)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
t2r = (unsigned)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
}
}
}
p->numBlockThreads_Max = t2;
p->numBlockThreads_Reduced = t2r;
p->numTotalThreads = t3;
}
static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize)
{
return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK;
}
/* ---------- Lzma2 ---------- */
typedef struct
{
Byte propEncoded;
CLzma2EncProps props;
UInt64 expectedDataSize;
Byte *tempBufLzma;
ISzAllocPtr alloc;
ISzAllocPtr allocBig;
CLzma2EncInt coders[MTCODER__THREADS_MAX];
#ifndef _7ZIP_ST
ISeqOutStream *outStream;
Byte *outBuf;
size_t outBuf_Rem; /* remainder in outBuf */
size_t outBufSize; /* size of allocated outBufs[i] */
size_t outBufsDataSizes[MTCODER__BLOCKS_MAX];
BoolInt mtCoder_WasConstructed;
CMtCoder mtCoder;
Byte *outBufs[MTCODER__BLOCKS_MAX];
#endif
} CLzma2Enc;
CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzma2Enc *p = (CLzma2Enc *)ISzAlloc_Alloc(alloc, sizeof(CLzma2Enc));
if (!p)
return NULL;
Lzma2EncProps_Init(&p->props);
Lzma2EncProps_Normalize(&p->props);
p->expectedDataSize = (UInt64)(Int64)-1;
p->tempBufLzma = NULL;
p->alloc = alloc;
p->allocBig = allocBig;
{
unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++)
p->coders[i].enc = NULL;
}
#ifndef _7ZIP_ST
p->mtCoder_WasConstructed = False;
{
unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
p->outBufs[i] = NULL;
p->outBufSize = 0;
}
#endif
return p;
}
#ifndef _7ZIP_ST
static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p)
{
unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
if (p->outBufs[i])
{
ISzAlloc_Free(p->alloc, p->outBufs[i]);
p->outBufs[i] = NULL;
}
p->outBufSize = 0;
}
#endif
void Lzma2Enc_Destroy(CLzma2EncHandle pp)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++)
{
CLzma2EncInt *t = &p->coders[i];
if (t->enc)
{
LzmaEnc_Destroy(t->enc, p->alloc, p->allocBig);
t->enc = NULL;
}
}
#ifndef _7ZIP_ST
if (p->mtCoder_WasConstructed)
{
MtCoder_Destruct(&p->mtCoder);
p->mtCoder_WasConstructed = False;
}
Lzma2Enc_FreeOutBufs(p);
#endif
ISzAlloc_Free(p->alloc, p->tempBufLzma);
p->tempBufLzma = NULL;
ISzAlloc_Free(p->alloc, pp);
}
SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
CLzmaEncProps lzmaProps = props->lzmaProps;
LzmaEncProps_Normalize(&lzmaProps);
if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX)
return SZ_ERROR_PARAM;
p->props = *props;
Lzma2EncProps_Normalize(&p->props);
return SZ_OK;
}
void Lzma2Enc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
p->expectedDataSize = expectedDataSiize;
}
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
unsigned i;
UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps);
for (i = 0; i < 40; i++)
if (dicSize <= LZMA2_DIC_SIZE_FROM_PROP(i))
break;
return (Byte)i;
}
static SRes Lzma2Enc_EncodeMt1(
CLzma2Enc *me,
CLzma2EncInt *p,
ISeqOutStream *outStream,
Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
const Byte *inData, size_t inDataSize,
int finished,
ICompressProgress *progress)
{
UInt64 unpackTotal = 0;
UInt64 packTotal = 0;
size_t outLim = 0;
CLimitedSeqInStream limitedInStream;
if (outBuf)
{
outLim = *outBufSize;
*outBufSize = 0;
}
if (!p->enc)
{
p->propsAreSet = False;
p->enc = LzmaEnc_Create(me->alloc);
if (!p->enc)
return SZ_ERROR_MEM;
}
limitedInStream.realStream = inStream;
if (inStream)
{
limitedInStream.vt.Read = LimitedSeqInStream_Read;
}
if (!outBuf)
{
// outStream version works only in one thread. So we use CLzma2Enc::tempBufLzma
if (!me->tempBufLzma)
{
me->tempBufLzma = (Byte *)ISzAlloc_Alloc(me->alloc, LZMA2_CHUNK_SIZE_COMPRESSED_MAX);
if (!me->tempBufLzma)
return SZ_ERROR_MEM;
}
}
RINOK(Lzma2EncInt_InitStream(p, &me->props));
for (;;)
{
SRes res = SZ_OK;
size_t inSizeCur = 0;
Lzma2EncInt_InitBlock(p);
LimitedSeqInStream_Init(&limitedInStream);
limitedInStream.limit = me->props.blockSize;
if (inStream)
{
UInt64 expected = (UInt64)(Int64)-1;
// inStream version works only in one thread. So we use CLzma2Enc::expectedDataSize
if (me->expectedDataSize != (UInt64)(Int64)-1
&& me->expectedDataSize >= unpackTotal)
expected = me->expectedDataSize - unpackTotal;
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
&& expected > me->props.blockSize)
expected = (size_t)me->props.blockSize;
LzmaEnc_SetDataSize(p->enc, expected);
RINOK(LzmaEnc_PrepareForLzma2(p->enc,
&limitedInStream.vt,
LZMA2_KEEP_WINDOW_SIZE,
me->alloc,
me->allocBig));
}
else
{
inSizeCur = inDataSize - (size_t)unpackTotal;
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
&& inSizeCur > me->props.blockSize)
inSizeCur = (size_t)me->props.blockSize;
// LzmaEnc_SetDataSize(p->enc, inSizeCur);
RINOK(LzmaEnc_MemPrepare(p->enc,
inData + (size_t)unpackTotal, inSizeCur,
LZMA2_KEEP_WINDOW_SIZE,
me->alloc,
me->allocBig));
}
for (;;)
{
size_t packSize = LZMA2_CHUNK_SIZE_COMPRESSED_MAX;
if (outBuf)
packSize = outLim - (size_t)packTotal;
res = Lzma2EncInt_EncodeSubblock(p,
outBuf ? outBuf + (size_t)packTotal : me->tempBufLzma, &packSize,
outBuf ? NULL : outStream);
if (res != SZ_OK)
break;
packTotal += packSize;
if (outBuf)
*outBufSize = (size_t)packTotal;
res = Progress(progress, unpackTotal + p->srcPos, packTotal);
if (res != SZ_OK)
break;
/*
if (LzmaEnc_GetNumAvailableBytes(p->enc) == 0)
break;
*/
if (packSize == 0)
break;
}
LzmaEnc_Finish(p->enc);
unpackTotal += p->srcPos;
RINOK(res);
if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur))
return SZ_ERROR_FAIL;
if (inStream ? limitedInStream.finished : (unpackTotal == inDataSize))
{
if (finished)
{
if (outBuf)
{
size_t destPos = *outBufSize;
if (destPos >= outLim)
return SZ_ERROR_OUTPUT_EOF;
outBuf[destPos] = 0;
*outBufSize = destPos + 1;
}
else
{
Byte b = 0;
if (ISeqOutStream_Write(outStream, &b, 1) != 1)
return SZ_ERROR_WRITE;
}
}
return SZ_OK;
}
}
}
#ifndef _7ZIP_ST
static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex,
const Byte *src, size_t srcSize, int finished)
{
CLzma2Enc *me = (CLzma2Enc *)pp;
size_t destSize = me->outBufSize;
SRes res;
CMtProgressThunk progressThunk;
Byte *dest = me->outBufs[outBufIndex];
me->outBufsDataSizes[outBufIndex] = 0;
if (!dest)
{
dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize);
if (!dest)
return SZ_ERROR_MEM;
me->outBufs[outBufIndex] = dest;
}
MtProgressThunk_CreateVTable(&progressThunk);
progressThunk.mtProgress = &me->mtCoder.mtProgress;
progressThunk.inSize = 0;
progressThunk.outSize = 0;
res = Lzma2Enc_EncodeMt1(me,
&me->coders[coderIndex],
NULL, dest, &destSize,
NULL, src, srcSize,
finished,
&progressThunk.vt);
me->outBufsDataSizes[outBufIndex] = destSize;
return res;
}
static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex)
{
CLzma2Enc *me = (CLzma2Enc *)pp;
size_t size = me->outBufsDataSizes[outBufIndex];
const Byte *data = me->outBufs[outBufIndex];
if (me->outStream)
return ISeqOutStream_Write(me->outStream, data, size) == size ? SZ_OK : SZ_ERROR_WRITE;
if (size > me->outBuf_Rem)
return SZ_ERROR_OUTPUT_EOF;
memcpy(me->outBuf, data, size);
me->outBuf_Rem -= size;
me->outBuf += size;
return SZ_OK;
}
#endif
SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
ISeqOutStream *outStream,
Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
const Byte *inData, size_t inDataSize,
ICompressProgress *progress)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
if (inStream && inData)
return SZ_ERROR_PARAM;
if (outStream && outBuf)
return SZ_ERROR_PARAM;
{
unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++)
p->coders[i].propsAreSet = False;
}
#ifndef _7ZIP_ST
if (p->props.numBlockThreads_Reduced > 1)
{
IMtCoderCallback2 vt;
if (!p->mtCoder_WasConstructed)
{
p->mtCoder_WasConstructed = True;
MtCoder_Construct(&p->mtCoder);
}
vt.Code = Lzma2Enc_MtCallback_Code;
vt.Write = Lzma2Enc_MtCallback_Write;
p->outStream = outStream;
p->outBuf = NULL;
p->outBuf_Rem = 0;
if (!outStream)
{
p->outBuf = outBuf;
p->outBuf_Rem = *outBufSize;
*outBufSize = 0;
}
p->mtCoder.allocBig = p->allocBig;
p->mtCoder.progress = progress;
p->mtCoder.inStream = inStream;
p->mtCoder.inData = inData;
p->mtCoder.inDataSize = inDataSize;
p->mtCoder.mtCallback = &vt;
p->mtCoder.mtCallbackObject = p;
p->mtCoder.blockSize = (size_t)p->props.blockSize;
if (p->mtCoder.blockSize != p->props.blockSize)
return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */
{
size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16;
if (destBlockSize < p->mtCoder.blockSize)
return SZ_ERROR_PARAM;
if (p->outBufSize != destBlockSize)
Lzma2Enc_FreeOutBufs(p);
p->outBufSize = destBlockSize;
}
p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
{
SRes res = MtCoder_Code(&p->mtCoder);
if (!outStream)
*outBufSize = p->outBuf - outBuf;
return res;
}
}
#endif
return Lzma2Enc_EncodeMt1(p,
&p->coders[0],
outStream, outBuf, outBufSize,
inStream, inData, inDataSize,
True, /* finished */
progress);
}

55
bsnes/lzma/Lzma2Enc.h Normal file
View File

@ -0,0 +1,55 @@
/* Lzma2Enc.h -- LZMA2 Encoder
2017-07-27 : Igor Pavlov : Public domain */
#ifndef __LZMA2_ENC_H
#define __LZMA2_ENC_H
#include "LzmaEnc.h"
EXTERN_C_BEGIN
#define LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO 0
#define LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID ((UInt64)(Int64)-1)
typedef struct
{
CLzmaEncProps lzmaProps;
UInt64 blockSize;
int numBlockThreads_Reduced;
int numBlockThreads_Max;
int numTotalThreads;
} CLzma2EncProps;
void Lzma2EncProps_Init(CLzma2EncProps *p);
void Lzma2EncProps_Normalize(CLzma2EncProps *p);
/* ---------- CLzmaEnc2Handle Interface ---------- */
/* Lzma2Enc_* functions can return the following exit codes:
SRes:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzma2EncHandle;
CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig);
void Lzma2Enc_Destroy(CLzma2EncHandle p);
SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props);
void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize);
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p);
SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
ISeqOutStream *outStream,
Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
const Byte *inData, size_t inDataSize,
ICompressProgress *progress);
EXTERN_C_END
#endif

111
bsnes/lzma/Lzma86.h Normal file
View File

@ -0,0 +1,111 @@
/* Lzma86.h -- LZMA + x86 (BCJ) Filter
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __LZMA86_H
#define __LZMA86_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define LZMA86_SIZE_OFFSET (1 + 5)
#define LZMA86_HEADER_SIZE (LZMA86_SIZE_OFFSET + 8)
/*
It's an example for LZMA + x86 Filter use.
You can use .lzma86 extension, if you write that stream to file.
.lzma86 header adds one additional byte to standard .lzma header.
.lzma86 header (14 bytes):
Offset Size Description
0 1 = 0 - no filter, pure LZMA
= 1 - x86 filter + LZMA
1 1 lc, lp and pb in encoded form
2 4 dictSize (little endian)
6 8 uncompressed size (little endian)
Lzma86_Encode
-------------
level - compression level: 0 <= level <= 9, the default value for "level" is 5.
dictSize - The dictionary size in bytes. The maximum value is
128 MB = (1 << 27) bytes for 32-bit version
1 GB = (1 << 30) bytes for 64-bit version
The default value is 16 MB = (1 << 24) bytes, for level = 5.
It's recommended to use the dictionary that is larger than 4 KB and
that can be calculated as (1 << N) or (3 << N) sizes.
For better compression ratio dictSize must be >= inSize.
filterMode:
SZ_FILTER_NO - no Filter
SZ_FILTER_YES - x86 Filter
SZ_FILTER_AUTO - it tries both alternatives to select best.
Encoder will use 2 or 3 passes:
2 passes when FILTER_NO provides better compression.
3 passes when FILTER_YES provides better compression.
Lzma86Encode allocates Data with MyAlloc functions.
RAM Requirements for compressing:
RamSize = dictionarySize * 11.5 + 6MB + FilterBlockSize
filterMode FilterBlockSize
SZ_FILTER_NO 0
SZ_FILTER_YES inSize
SZ_FILTER_AUTO inSize
Return code:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater
SZ_ERROR_OUTPUT_EOF - output buffer overflow
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
enum ESzFilterMode
{
SZ_FILTER_NO,
SZ_FILTER_YES,
SZ_FILTER_AUTO
};
SRes Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode);
/*
Lzma86_GetUnpackSize:
In:
src - input data
srcLen - input data size
Out:
unpackSize - size of uncompressed stream
Return code:
SZ_OK - OK
SZ_ERROR_INPUT_EOF - Error in headers
*/
SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize);
/*
Lzma86_Decode:
In:
dest - output data
destLen - output data size
src - input data
srcLen - input data size
Out:
destLen - processed output size
srcLen - processed input size
Return code:
SZ_OK - OK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - unsupported file
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer
*/
SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen);
EXTERN_C_END
#endif

54
bsnes/lzma/Lzma86Dec.c Normal file
View File

@ -0,0 +1,54 @@
/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder
2016-05-16 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Lzma86.h"
#include "Alloc.h"
#include "Bra.h"
#include "LzmaDec.h"
SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize)
{
unsigned i;
if (srcLen < LZMA86_HEADER_SIZE)
return SZ_ERROR_INPUT_EOF;
*unpackSize = 0;
for (i = 0; i < sizeof(UInt64); i++)
*unpackSize += ((UInt64)src[LZMA86_SIZE_OFFSET + i]) << (8 * i);
return SZ_OK;
}
SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen)
{
SRes res;
int useFilter;
SizeT inSizePure;
ELzmaStatus status;
if (*srcLen < LZMA86_HEADER_SIZE)
return SZ_ERROR_INPUT_EOF;
useFilter = src[0];
if (useFilter > 1)
{
*destLen = 0;
return SZ_ERROR_UNSUPPORTED;
}
inSizePure = *srcLen - LZMA86_HEADER_SIZE;
res = LzmaDecode(dest, destLen, src + LZMA86_HEADER_SIZE, &inSizePure,
src + 1, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc);
*srcLen = inSizePure + LZMA86_HEADER_SIZE;
if (res != SZ_OK)
return res;
if (useFilter == 1)
{
UInt32 x86State;
x86_Convert_Init(x86State);
x86_Convert(dest, *destLen, 0, &x86State, 0);
}
return SZ_OK;
}

106
bsnes/lzma/Lzma86Enc.c Normal file
View File

@ -0,0 +1,106 @@
/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "Lzma86.h"
#include "Alloc.h"
#include "Bra.h"
#include "LzmaEnc.h"
#define SZE_OUT_OVERFLOW SZE_DATA_ERROR
int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode)
{
size_t outSize2 = *destLen;
Byte *filteredStream;
BoolInt useFilter;
int mainResult = SZ_ERROR_OUTPUT_EOF;
CLzmaEncProps props;
LzmaEncProps_Init(&props);
props.level = level;
props.dictSize = dictSize;
*destLen = 0;
if (outSize2 < LZMA86_HEADER_SIZE)
return SZ_ERROR_OUTPUT_EOF;
{
int i;
UInt64 t = srcLen;
for (i = 0; i < 8; i++, t >>= 8)
dest[LZMA86_SIZE_OFFSET + i] = (Byte)t;
}
filteredStream = 0;
useFilter = (filterMode != SZ_FILTER_NO);
if (useFilter)
{
if (srcLen != 0)
{
filteredStream = (Byte *)MyAlloc(srcLen);
if (filteredStream == 0)
return SZ_ERROR_MEM;
memcpy(filteredStream, src, srcLen);
}
{
UInt32 x86State;
x86_Convert_Init(x86State);
x86_Convert(filteredStream, srcLen, 0, &x86State, 1);
}
}
{
size_t minSize = 0;
BoolInt bestIsFiltered = False;
/* passes for SZ_FILTER_AUTO:
0 - BCJ + LZMA
1 - LZMA
2 - BCJ + LZMA agaian, if pass 0 (BCJ + LZMA) is better.
*/
int numPasses = (filterMode == SZ_FILTER_AUTO) ? 3 : 1;
int i;
for (i = 0; i < numPasses; i++)
{
size_t outSizeProcessed = outSize2 - LZMA86_HEADER_SIZE;
size_t outPropsSize = 5;
SRes curRes;
BoolInt curModeIsFiltered = (numPasses > 1 && i == numPasses - 1);
if (curModeIsFiltered && !bestIsFiltered)
break;
if (useFilter && i == 0)
curModeIsFiltered = True;
curRes = LzmaEncode(dest + LZMA86_HEADER_SIZE, &outSizeProcessed,
curModeIsFiltered ? filteredStream : src, srcLen,
&props, dest + 1, &outPropsSize, 0,
NULL, &g_Alloc, &g_Alloc);
if (curRes != SZ_ERROR_OUTPUT_EOF)
{
if (curRes != SZ_OK)
{
mainResult = curRes;
break;
}
if (outSizeProcessed <= minSize || mainResult != SZ_OK)
{
minSize = outSizeProcessed;
bestIsFiltered = curModeIsFiltered;
mainResult = SZ_OK;
}
}
}
dest[0] = (Byte)(bestIsFiltered ? 1 : 0);
*destLen = LZMA86_HEADER_SIZE + minSize;
}
if (useFilter)
MyFree(filteredStream);
return mainResult;
}

1185
bsnes/lzma/LzmaDec.c Normal file

File diff suppressed because it is too large Load Diff

234
bsnes/lzma/LzmaDec.h Normal file
View File

@ -0,0 +1,234 @@
/* LzmaDec.h -- LZMA Decoder
2018-04-21 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */
typedef
#ifdef _LZMA_PROB32
UInt32
#else
UInt16
#endif
CLzmaProb;
/* ---------- LZMA Properties ---------- */
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps
{
Byte lc;
Byte lp;
Byte pb;
Byte _pad_;
UInt32 dicSize;
} CLzmaProps;
/* LzmaProps_Decode - decodes properties
Returns:
SZ_OK
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
/* ---------- LZMA Decoder state ---------- */
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
#define LZMA_REQUIRED_INPUT_MAX 20
typedef struct
{
/* Don't change this structure. ASM code can use it. */
CLzmaProps prop;
CLzmaProb *probs;
CLzmaProb *probs_1664;
Byte *dic;
SizeT dicBufSize;
SizeT dicPos;
const Byte *buf;
UInt32 range;
UInt32 code;
UInt32 processedPos;
UInt32 checkDicSize;
UInt32 reps[4];
UInt32 state;
UInt32 remainLen;
UInt32 numProbs;
unsigned tempBufSize;
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
void LzmaDec_Init(CLzmaDec *p);
/* There are two types of LZMA streams:
- Stream with end mark. That end mark adds about 6 bytes to compressed size.
- Stream without end mark. You must know exact uncompressed size to decompress such stream. */
typedef enum
{
LZMA_FINISH_ANY, /* finish at any point */
LZMA_FINISH_END /* block must be finished at the end */
} ELzmaFinishMode;
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
You must use LZMA_FINISH_END, when you know that current output buffer
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
and output value of destLen will be less than output buffer size limit.
You can check status result also.
You can use multiple checks to test data integrity after full decompression:
1) Check Result and "status" variable.
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
You must use correct finish mode in that case. */
typedef enum
{
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
} ELzmaStatus;
/* ELzmaStatus is used only as output value for function call */
/* ---------- Interfaces ---------- */
/* There are 3 levels of interfaces:
1) Dictionary Interface
2) Buffer Interface
3) One Call Interface
You can select any of these interfaces, but don't mix functions from different
groups for same object. */
/* There are two variants to allocate state for Dictionary Interface:
1) LzmaDec_Allocate / LzmaDec_Free
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
You can use variant 2, if you set dictionary buffer manually.
For Buffer Interface you must always use variant 1.
LzmaDec_Allocate* can return:
SZ_OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
/* ---------- Dictionary Interface ---------- */
/* You can use it, if you want to eliminate the overhead for data copying from
dictionary to some other external buffer.
You must work with CLzmaDec variables directly in this interface.
STEPS:
LzmaDec_Construct()
LzmaDec_Allocate()
for (each new stream)
{
LzmaDec_Init()
while (it needs more decompression)
{
LzmaDec_DecodeToDic()
use data from CLzmaDec::dic and update CLzmaDec::dicPos
}
}
LzmaDec_Free()
*/
/* LzmaDec_DecodeToDic
The decoding to internal dictionary buffer (CLzmaDec::dic).
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
finishMode:
It has meaning only if the decoding reaches output limit (dicLimit).
LZMA_FINISH_ANY - Decode just dicLimit bytes.
LZMA_FINISH_END - Stream must be finished after dicLimit.
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- Buffer Interface ---------- */
/* It's zlib-like interface.
See LzmaDec_DecodeToDic description for information about STEPS and return results,
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
to work with CLzmaDec variables manually.
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
*/
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- One Call Interface ---------- */
/* LzmaDecode
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
ELzmaStatus *status, ISzAllocPtr alloc);
EXTERN_C_END
#endif

2976
bsnes/lzma/LzmaEnc.c Normal file

File diff suppressed because it is too large Load Diff

76
bsnes/lzma/LzmaEnc.h Normal file
View File

@ -0,0 +1,76 @@
/* LzmaEnc.h -- LZMA Encoder
2017-07-27 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaEncProps
{
int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
(1 << 12) <= dictSize <= (3 << 29) for 64-bit version
default = (1 << 24) */
int lc; /* 0 <= lc <= 8, default = 3 */
int lp; /* 0 <= lp <= 4, default = 0 */
int pb; /* 0 <= pb <= 4, default = 2 */
int algo; /* 0 - fast, 1 - normal, default = 1 */
int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);
void LzmaEncProps_Normalize(CLzmaEncProps *p);
UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
/* ---------- CLzmaEncHandle Interface ---------- */
/* LzmaEnc* functions can return the following exit codes:
SRes:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzmaEncHandle;
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
/* ---------- One Call Interface ---------- */
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END
#endif

40
bsnes/lzma/LzmaLib.c Normal file
View File

@ -0,0 +1,40 @@
/* LzmaLib.c -- LZMA library wrapper
2015-06-13 : Igor Pavlov : Public domain */
#include "Alloc.h"
#include "LzmaDec.h"
#include "LzmaEnc.h"
#include "LzmaLib.h"
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize,
int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
int lc, /* 0 <= lc <= 8, default = 3 */
int lp, /* 0 <= lp <= 4, default = 0 */
int pb, /* 0 <= pb <= 4, default = 2 */
int fb, /* 5 <= fb <= 273, default = 32 */
int numThreads /* 1 or 2, default = 2 */
)
{
CLzmaEncProps props;
LzmaEncProps_Init(&props);
props.level = level;
props.dictSize = dictSize;
props.lc = lc;
props.lp = lp;
props.pb = pb;
props.fb = fb;
props.numThreads = numThreads;
return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
NULL, &g_Alloc, &g_Alloc);
}
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
const unsigned char *props, size_t propsSize)
{
ELzmaStatus status;
return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
}

131
bsnes/lzma/LzmaLib.h Normal file
View File

@ -0,0 +1,131 @@
/* LzmaLib.h -- LZMA library interface
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H
#define __LZMA_LIB_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define MY_STDAPI int MY_STD_CALL
#define LZMA_PROPS_SIZE 5
/*
RAM requirements for LZMA:
for compression: (dictSize * 11.5 + 6 MB) + state_size
for decompression: dictSize + state_size
state_size = (4 + (1.5 << (lc + lp))) KB
by default (lc=3, lp=0), state_size = 16 KB.
LZMA properties (5 bytes) format
Offset Size Description
0 1 lc, lp and pb in encoded form.
1 4 dictSize (little endian).
*/
/*
LzmaCompress
------------
outPropsSize -
In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
LZMA Encoder will use defult values for any parameter, if it is
-1 for any from: level, loc, lp, pb, fb, numThreads
0 for dictSize
level - compression level: 0 <= level <= 9;
level dictSize algo fb
0: 16 KB 0 32
1: 64 KB 0 32
2: 256 KB 0 32
3: 1 MB 0 32
4: 4 MB 0 32
5: 16 MB 1 32
6: 32 MB 1 32
7+: 64 MB 1 64
The default value for "level" is 5.
algo = 0 means fast method
algo = 1 means normal method
dictSize - The dictionary size in bytes. The maximum value is
128 MB = (1 << 27) bytes for 32-bit version
1 GB = (1 << 30) bytes for 64-bit version
The default value is 16 MB = (1 << 24) bytes.
It's recommended to use the dictionary that is larger than 4 KB and
that can be calculated as (1 << N) or (3 << N) sizes.
lc - The number of literal context bits (high bits of previous literal).
It can be in the range from 0 to 8. The default value is 3.
Sometimes lc=4 gives the gain for big files.
lp - The number of literal pos bits (low bits of current position for literals).
It can be in the range from 0 to 4. The default value is 0.
The lp switch is intended for periodical data when the period is equal to 2^lp.
For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
better to set lc=0, if you change lp switch.
pb - The number of pos bits (low bits of current position).
It can be in the range from 0 to 4. The default value is 2.
The pb switch is intended for periodical data when the period is equal 2^pb.
fb - Word size (the number of fast bytes).
It can be in the range from 5 to 273. The default value is 32.
Usually, a big number gives a little bit better compression ratio and
slower compression process.
numThreads - The number of thereads. 1 or 2. The default value is 2.
Fast mode (algo = 0) can use only 1 thread.
Out:
destLen - processed output size
Returns:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater
SZ_ERROR_OUTPUT_EOF - output buffer overflow
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* default = (1 << 24) */
int lc, /* 0 <= lc <= 8, default = 3 */
int lp, /* 0 <= lp <= 4, default = 0 */
int pb, /* 0 <= pb <= 4, default = 2 */
int fb, /* 5 <= fb <= 273, default = 32 */
int numThreads /* 1 or 2, default = 2 */
);
/*
LzmaUncompress
--------------
In:
dest - output data
destLen - output data size
src - input data
srcLen - input data size
Out:
destLen - processed output size
srcLen - processed input size
Returns:
SZ_OK - OK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation arror
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
*/
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
const unsigned char *props, size_t propsSize);
EXTERN_C_END
#endif

601
bsnes/lzma/MtCoder.c Normal file
View File

@ -0,0 +1,601 @@
/* MtCoder.c -- Multi-thread Coder
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "MtCoder.h"
#ifndef _7ZIP_ST
SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
{
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
UInt64 inSize2 = 0;
UInt64 outSize2 = 0;
if (inSize != (UInt64)(Int64)-1)
{
inSize2 = inSize - thunk->inSize;
thunk->inSize = inSize;
}
if (outSize != (UInt64)(Int64)-1)
{
outSize2 = outSize - thunk->outSize;
thunk->outSize = outSize;
}
return MtProgress_ProgressAdd(thunk->mtProgress, inSize2, outSize2);
}
void MtProgressThunk_CreateVTable(CMtProgressThunk *p)
{
p->vt.Progress = MtProgressThunk_Progress;
}
#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
{
if (Event_IsCreated(p))
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp);
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
{
WRes wres = ArEvent_OptCreate_And_Reset(&t->startEvent);
if (wres == 0)
{
t->stop = False;
if (!Thread_WasCreated(&t->thread))
wres = Thread_Create(&t->thread, ThreadFunc, t);
if (wres == 0)
wres = Event_Set(&t->startEvent);
}
if (wres == 0)
return SZ_OK;
return MY_SRes_HRESULT_FROM_WRes(wres);
}
static void MtCoderThread_Destruct(CMtCoderThread *t)
{
if (Thread_WasCreated(&t->thread))
{
t->stop = 1;
Event_Set(&t->startEvent);
Thread_Wait(&t->thread);
Thread_Close(&t->thread);
}
Event_Close(&t->startEvent);
if (t->inBuf)
{
ISzAlloc_Free(t->mtCoder->allocBig, t->inBuf);
t->inBuf = NULL;
}
}
static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
{
size_t size = *processedSize;
*processedSize = 0;
while (size != 0)
{
size_t cur = size;
SRes res = ISeqInStream_Read(stream, data, &cur);
*processedSize += cur;
data += cur;
size -= cur;
RINOK(res);
if (cur == 0)
return SZ_OK;
}
return SZ_OK;
}
/*
ThreadFunc2() returns:
SZ_OK - in all normal cases (even for stream error or memory allocation error)
SZ_ERROR_THREAD - in case of failure in system synch function
*/
static SRes ThreadFunc2(CMtCoderThread *t)
{
CMtCoder *mtc = t->mtCoder;
for (;;)
{
unsigned bi;
SRes res;
SRes res2;
BoolInt finished;
unsigned bufIndex;
size_t size;
const Byte *inData;
UInt64 readProcessed = 0;
RINOK_THREAD(Event_Wait(&mtc->readEvent))
/* after Event_Wait(&mtc->readEvent) we must call Event_Set(&mtc->readEvent) in any case to unlock another threads */
if (mtc->stopReading)
{
return Event_Set(&mtc->readEvent) == 0 ? SZ_OK : SZ_ERROR_THREAD;
}
res = MtProgress_GetError(&mtc->mtProgress);
size = 0;
inData = NULL;
finished = True;
if (res == SZ_OK)
{
size = mtc->blockSize;
if (mtc->inStream)
{
if (!t->inBuf)
{
t->inBuf = (Byte *)ISzAlloc_Alloc(mtc->allocBig, mtc->blockSize);
if (!t->inBuf)
res = SZ_ERROR_MEM;
}
if (res == SZ_OK)
{
res = FullRead(mtc->inStream, t->inBuf, &size);
readProcessed = mtc->readProcessed + size;
mtc->readProcessed = readProcessed;
}
if (res != SZ_OK)
{
mtc->readRes = res;
/* after reading error - we can stop encoding of previous blocks */
MtProgress_SetError(&mtc->mtProgress, res);
}
else
finished = (size != mtc->blockSize);
}
else
{
size_t rem;
readProcessed = mtc->readProcessed;
rem = mtc->inDataSize - (size_t)readProcessed;
if (size > rem)
size = rem;
inData = mtc->inData + (size_t)readProcessed;
readProcessed += size;
mtc->readProcessed = readProcessed;
finished = (mtc->inDataSize == (size_t)readProcessed);
}
}
/* we must get some block from blocksSemaphore before Event_Set(&mtc->readEvent) */
res2 = SZ_OK;
if (Semaphore_Wait(&mtc->blocksSemaphore) != 0)
{
res2 = SZ_ERROR_THREAD;
if (res == SZ_OK)
{
res = res2;
// MtProgress_SetError(&mtc->mtProgress, res);
}
}
bi = mtc->blockIndex;
if (++mtc->blockIndex >= mtc->numBlocksMax)
mtc->blockIndex = 0;
bufIndex = (unsigned)(int)-1;
if (res == SZ_OK)
res = MtProgress_GetError(&mtc->mtProgress);
if (res != SZ_OK)
finished = True;
if (!finished)
{
if (mtc->numStartedThreads < mtc->numStartedThreadsLimit
&& mtc->expectedDataSize != readProcessed)
{
res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]);
if (res == SZ_OK)
mtc->numStartedThreads++;
else
{
MtProgress_SetError(&mtc->mtProgress, res);
finished = True;
}
}
}
if (finished)
mtc->stopReading = True;
RINOK_THREAD(Event_Set(&mtc->readEvent))
if (res2 != SZ_OK)
return res2;
if (res == SZ_OK)
{
CriticalSection_Enter(&mtc->cs);
bufIndex = mtc->freeBlockHead;
mtc->freeBlockHead = mtc->freeBlockList[bufIndex];
CriticalSection_Leave(&mtc->cs);
res = mtc->mtCallback->Code(mtc->mtCallbackObject, t->index, bufIndex,
mtc->inStream ? t->inBuf : inData, size, finished);
// MtProgress_Reinit(&mtc->mtProgress, t->index);
if (res != SZ_OK)
MtProgress_SetError(&mtc->mtProgress, res);
}
{
CMtCoderBlock *block = &mtc->blocks[bi];
block->res = res;
block->bufIndex = bufIndex;
block->finished = finished;
}
#ifdef MTCODER__USE_WRITE_THREAD
RINOK_THREAD(Event_Set(&mtc->writeEvents[bi]))
#else
{
unsigned wi;
{
CriticalSection_Enter(&mtc->cs);
wi = mtc->writeIndex;
if (wi == bi)
mtc->writeIndex = (unsigned)(int)-1;
else
mtc->ReadyBlocks[bi] = True;
CriticalSection_Leave(&mtc->cs);
}
if (wi != bi)
{
if (res != SZ_OK || finished)
return 0;
continue;
}
if (mtc->writeRes != SZ_OK)
res = mtc->writeRes;
for (;;)
{
if (res == SZ_OK && bufIndex != (unsigned)(int)-1)
{
res = mtc->mtCallback->Write(mtc->mtCallbackObject, bufIndex);
if (res != SZ_OK)
{
mtc->writeRes = res;
MtProgress_SetError(&mtc->mtProgress, res);
}
}
if (++wi >= mtc->numBlocksMax)
wi = 0;
{
BoolInt isReady;
CriticalSection_Enter(&mtc->cs);
if (bufIndex != (unsigned)(int)-1)
{
mtc->freeBlockList[bufIndex] = mtc->freeBlockHead;
mtc->freeBlockHead = bufIndex;
}
isReady = mtc->ReadyBlocks[wi];
if (isReady)
mtc->ReadyBlocks[wi] = False;
else
mtc->writeIndex = wi;
CriticalSection_Leave(&mtc->cs);
RINOK_THREAD(Semaphore_Release1(&mtc->blocksSemaphore))
if (!isReady)
break;
}
{
CMtCoderBlock *block = &mtc->blocks[wi];
if (res == SZ_OK && block->res != SZ_OK)
res = block->res;
bufIndex = block->bufIndex;
finished = block->finished;
}
}
}
#endif
if (finished || res != SZ_OK)
return 0;
}
}
static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
{
CMtCoderThread *t = (CMtCoderThread *)pp;
for (;;)
{
if (Event_Wait(&t->startEvent) != 0)
return SZ_ERROR_THREAD;
if (t->stop)
return 0;
{
SRes res = ThreadFunc2(t);
CMtCoder *mtc = t->mtCoder;
if (res != SZ_OK)
{
MtProgress_SetError(&mtc->mtProgress, res);
}
#ifndef MTCODER__USE_WRITE_THREAD
{
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads)
if (Event_Set(&mtc->finishedEvent) != 0)
return SZ_ERROR_THREAD;
}
#endif
}
}
}
void MtCoder_Construct(CMtCoder *p)
{
unsigned i;
p->blockSize = 0;
p->numThreadsMax = 0;
p->expectedDataSize = (UInt64)(Int64)-1;
p->inStream = NULL;
p->inData = NULL;
p->inDataSize = 0;
p->progress = NULL;
p->allocBig = NULL;
p->mtCallback = NULL;
p->mtCallbackObject = NULL;
p->allocatedBufsSize = 0;
Event_Construct(&p->readEvent);
Semaphore_Construct(&p->blocksSemaphore);
for (i = 0; i < MTCODER__THREADS_MAX; i++)
{
CMtCoderThread *t = &p->threads[i];
t->mtCoder = p;
t->index = i;
t->inBuf = NULL;
t->stop = False;
Event_Construct(&t->startEvent);
Thread_Construct(&t->thread);
}
#ifdef MTCODER__USE_WRITE_THREAD
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
Event_Construct(&p->writeEvents[i]);
#else
Event_Construct(&p->finishedEvent);
#endif
CriticalSection_Init(&p->cs);
CriticalSection_Init(&p->mtProgress.cs);
}
static void MtCoder_Free(CMtCoder *p)
{
unsigned i;
/*
p->stopReading = True;
if (Event_IsCreated(&p->readEvent))
Event_Set(&p->readEvent);
*/
for (i = 0; i < MTCODER__THREADS_MAX; i++)
MtCoderThread_Destruct(&p->threads[i]);
Event_Close(&p->readEvent);
Semaphore_Close(&p->blocksSemaphore);
#ifdef MTCODER__USE_WRITE_THREAD
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
Event_Close(&p->writeEvents[i]);
#else
Event_Close(&p->finishedEvent);
#endif
}
void MtCoder_Destruct(CMtCoder *p)
{
MtCoder_Free(p);
CriticalSection_Delete(&p->cs);
CriticalSection_Delete(&p->mtProgress.cs);
}
SRes MtCoder_Code(CMtCoder *p)
{
unsigned numThreads = p->numThreadsMax;
unsigned numBlocksMax;
unsigned i;
SRes res = SZ_OK;
if (numThreads > MTCODER__THREADS_MAX)
numThreads = MTCODER__THREADS_MAX;
numBlocksMax = MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads);
if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++;
if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++;
if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++;
if (numBlocksMax > MTCODER__BLOCKS_MAX)
numBlocksMax = MTCODER__BLOCKS_MAX;
if (p->blockSize != p->allocatedBufsSize)
{
for (i = 0; i < MTCODER__THREADS_MAX; i++)
{
CMtCoderThread *t = &p->threads[i];
if (t->inBuf)
{
ISzAlloc_Free(p->allocBig, t->inBuf);
t->inBuf = NULL;
}
}
p->allocatedBufsSize = p->blockSize;
}
p->readRes = SZ_OK;
MtProgress_Init(&p->mtProgress, p->progress);
#ifdef MTCODER__USE_WRITE_THREAD
for (i = 0; i < numBlocksMax; i++)
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->writeEvents[i]));
}
#else
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent));
#endif
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
if (Semaphore_IsCreated(&p->blocksSemaphore))
{
RINOK_THREAD(Semaphore_Close(&p->blocksSemaphore));
}
RINOK_THREAD(Semaphore_Create(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
}
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)
p->freeBlockList[i] = i + 1;
p->freeBlockList[MTCODER__BLOCKS_MAX - 1] = (unsigned)(int)-1;
p->freeBlockHead = 0;
p->readProcessed = 0;
p->blockIndex = 0;
p->numBlocksMax = numBlocksMax;
p->stopReading = False;
#ifndef MTCODER__USE_WRITE_THREAD
p->writeIndex = 0;
p->writeRes = SZ_OK;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
p->ReadyBlocks[i] = False;
p->numFinishedThreads = 0;
#endif
p->numStartedThreadsLimit = numThreads;
p->numStartedThreads = 0;
// for (i = 0; i < numThreads; i++)
{
CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];
RINOK(MtCoderThread_CreateAndStart(nextThread));
}
RINOK_THREAD(Event_Set(&p->readEvent))
#ifdef MTCODER__USE_WRITE_THREAD
{
unsigned bi = 0;
for (;; bi++)
{
if (bi >= numBlocksMax)
bi = 0;
RINOK_THREAD(Event_Wait(&p->writeEvents[bi]))
{
const CMtCoderBlock *block = &p->blocks[bi];
unsigned bufIndex = block->bufIndex;
BoolInt finished = block->finished;
if (res == SZ_OK && block->res != SZ_OK)
res = block->res;
if (bufIndex != (unsigned)(int)-1)
{
if (res == SZ_OK)
{
res = p->mtCallback->Write(p->mtCallbackObject, bufIndex);
if (res != SZ_OK)
MtProgress_SetError(&p->mtProgress, res);
}
CriticalSection_Enter(&p->cs);
{
p->freeBlockList[bufIndex] = p->freeBlockHead;
p->freeBlockHead = bufIndex;
}
CriticalSection_Leave(&p->cs);
}
RINOK_THREAD(Semaphore_Release1(&p->blocksSemaphore))
if (finished)
break;
}
}
}
#else
{
WRes wres = Event_Wait(&p->finishedEvent);
res = MY_SRes_HRESULT_FROM_WRes(wres);
}
#endif
if (res == SZ_OK)
res = p->readRes;
if (res == SZ_OK)
res = p->mtProgress.res;
#ifndef MTCODER__USE_WRITE_THREAD
if (res == SZ_OK)
res = p->writeRes;
#endif
if (res != SZ_OK)
MtCoder_Free(p);
return res;
}
#endif

141
bsnes/lzma/MtCoder.h Normal file
View File

@ -0,0 +1,141 @@
/* MtCoder.h -- Multi-thread Coder
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __MT_CODER_H
#define __MT_CODER_H
#include "MtDec.h"
EXTERN_C_BEGIN
/*
if ( defined MTCODER__USE_WRITE_THREAD) : main thread writes all data blocks to output stream
if (not defined MTCODER__USE_WRITE_THREAD) : any coder thread can write data blocks to output stream
*/
/* #define MTCODER__USE_WRITE_THREAD */
#ifndef _7ZIP_ST
#define MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
#define MTCODER__THREADS_MAX 64
#define MTCODER__BLOCKS_MAX (MTCODER__GET_NUM_BLOCKS_FROM_THREADS(MTCODER__THREADS_MAX) + 3)
#else
#define MTCODER__THREADS_MAX 1
#define MTCODER__BLOCKS_MAX 1
#endif
#ifndef _7ZIP_ST
typedef struct
{
ICompressProgress vt;
CMtProgress *mtProgress;
UInt64 inSize;
UInt64 outSize;
} CMtProgressThunk;
void MtProgressThunk_CreateVTable(CMtProgressThunk *p);
#define MtProgressThunk_Init(p) { (p)->inSize = 0; (p)->outSize = 0; }
struct _CMtCoder;
typedef struct
{
struct _CMtCoder *mtCoder;
unsigned index;
int stop;
Byte *inBuf;
CAutoResetEvent startEvent;
CThread thread;
} CMtCoderThread;
typedef struct
{
SRes (*Code)(void *p, unsigned coderIndex, unsigned outBufIndex,
const Byte *src, size_t srcSize, int finished);
SRes (*Write)(void *p, unsigned outBufIndex);
} IMtCoderCallback2;
typedef struct
{
SRes res;
unsigned bufIndex;
BoolInt finished;
} CMtCoderBlock;
typedef struct _CMtCoder
{
/* input variables */
size_t blockSize; /* size of input block */
unsigned numThreadsMax;
UInt64 expectedDataSize;
ISeqInStream *inStream;
const Byte *inData;
size_t inDataSize;
ICompressProgress *progress;
ISzAllocPtr allocBig;
IMtCoderCallback2 *mtCallback;
void *mtCallbackObject;
/* internal variables */
size_t allocatedBufsSize;
CAutoResetEvent readEvent;
CSemaphore blocksSemaphore;
BoolInt stopReading;
SRes readRes;
#ifdef MTCODER__USE_WRITE_THREAD
CAutoResetEvent writeEvents[MTCODER__BLOCKS_MAX];
#else
CAutoResetEvent finishedEvent;
SRes writeRes;
unsigned writeIndex;
Byte ReadyBlocks[MTCODER__BLOCKS_MAX];
LONG numFinishedThreads;
#endif
unsigned numStartedThreadsLimit;
unsigned numStartedThreads;
unsigned numBlocksMax;
unsigned blockIndex;
UInt64 readProcessed;
CCriticalSection cs;
unsigned freeBlockHead;
unsigned freeBlockList[MTCODER__BLOCKS_MAX];
CMtProgress mtProgress;
CMtCoderBlock blocks[MTCODER__BLOCKS_MAX];
CMtCoderThread threads[MTCODER__THREADS_MAX];
} CMtCoder;
void MtCoder_Construct(CMtCoder *p);
void MtCoder_Destruct(CMtCoder *p);
SRes MtCoder_Code(CMtCoder *p);
#endif
EXTERN_C_END
#endif

1138
bsnes/lzma/MtDec.c Normal file

File diff suppressed because it is too large Load Diff

201
bsnes/lzma/MtDec.h Normal file
View File

@ -0,0 +1,201 @@
/* MtDec.h -- Multi-thread Decoder
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H
#define __MT_DEC_H
#include "7zTypes.h"
#ifndef _7ZIP_ST
#include "Threads.h"
#endif
EXTERN_C_BEGIN
#ifndef _7ZIP_ST
#ifndef _7ZIP_ST
#define MTDEC__THREADS_MAX 32
#else
#define MTDEC__THREADS_MAX 1
#endif
typedef struct
{
ICompressProgress *progress;
SRes res;
UInt64 totalInSize;
UInt64 totalOutSize;
CCriticalSection cs;
} CMtProgress;
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress);
SRes MtProgress_Progress_ST(CMtProgress *p);
SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize);
SRes MtProgress_GetError(CMtProgress *p);
void MtProgress_SetError(CMtProgress *p, SRes res);
struct _CMtDec;
typedef struct
{
struct _CMtDec *mtDec;
unsigned index;
void *inBuf;
size_t inDataSize_Start; // size of input data in start block
UInt64 inDataSize; // total size of input data in all blocks
CThread thread;
CAutoResetEvent canRead;
CAutoResetEvent canWrite;
void *allocaPtr;
} CMtDecThread;
void MtDecThread_FreeInBufs(CMtDecThread *t);
typedef enum
{
MTDEC_PARSE_CONTINUE, // continue this block with more input data
MTDEC_PARSE_OVERFLOW, // MT buffers overflow, need switch to single-thread
MTDEC_PARSE_NEW, // new block
MTDEC_PARSE_END // end of block threading. But we still can return to threading after Write(&needContinue)
} EMtDecParseState;
typedef struct
{
// in
int startCall;
const Byte *src;
size_t srcSize;
// in : (srcSize == 0) is allowed
// out : it's allowed to return less that actually was used ?
int srcFinished;
// out
EMtDecParseState state;
BoolInt canCreateNewThread;
UInt64 outPos; // check it (size_t)
} CMtDecCallbackInfo;
typedef struct
{
void (*Parse)(void *p, unsigned coderIndex, CMtDecCallbackInfo *ci);
// PreCode() and Code():
// (SRes_return_result != SZ_OK) means stop decoding, no need another blocks
SRes (*PreCode)(void *p, unsigned coderIndex);
SRes (*Code)(void *p, unsigned coderIndex,
const Byte *src, size_t srcSize, int srcFinished,
UInt64 *inCodePos, UInt64 *outCodePos, int *stop);
// stop - means stop another Code calls
/* Write() must be called, if Parse() was called
set (needWrite) if
{
&& (was not interrupted by progress)
&& (was not interrupted in previous block)
}
out:
if (*needContinue), decoder still need to continue decoding with new iteration,
even after MTDEC_PARSE_END
if (*canRecode), we didn't flush current block data, so we still can decode current block later.
*/
SRes (*Write)(void *p, unsigned coderIndex,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode);
} IMtDecCallback;
typedef struct _CMtDec
{
/* input variables */
size_t inBufSize; /* size of input block */
unsigned numThreadsMax;
// size_t inBlockMax;
unsigned numThreadsMax_2;
ISeqInStream *inStream;
// const Byte *inData;
// size_t inDataSize;
ICompressProgress *progress;
ISzAllocPtr alloc;
IMtDecCallback *mtCallback;
void *mtCallbackObject;
/* internal variables */
size_t allocatedBufsSize;
BoolInt exitThread;
WRes exitThreadWRes;
UInt64 blockIndex;
BoolInt isAllocError;
BoolInt overflow;
SRes threadingErrorSRes;
BoolInt needContinue;
// CAutoResetEvent finishedEvent;
SRes readRes;
SRes codeRes;
BoolInt wasInterrupted;
unsigned numStartedThreads_Limit;
unsigned numStartedThreads;
Byte *crossBlock;
size_t crossStart;
size_t crossEnd;
UInt64 readProcessed;
BoolInt readWasFinished;
UInt64 inProcessed;
unsigned filledThreadStart;
unsigned numFilledThreads;
#ifndef _7ZIP_ST
BoolInt needInterrupt;
UInt64 interruptIndex;
CMtProgress mtProgress;
CMtDecThread threads[MTDEC__THREADS_MAX];
#endif
} CMtDec;
void MtDec_Construct(CMtDec *p);
void MtDec_Destruct(CMtDec *p);
/*
MtDec_Code() returns:
SZ_OK - in most cases
MY_SRes_HRESULT_FROM_WRes(WRes_error) - in case of unexpected error in threading function
*/
SRes MtDec_Code(CMtDec *p);
Byte *MtDec_GetCrossBuff(CMtDec *p);
int MtDec_PrepareRead(CMtDec *p);
const Byte *MtDec_Read(CMtDec *p, size_t *inLim);
#endif
EXTERN_C_END
#endif

85
bsnes/lzma/Ppmd.h Normal file
View File

@ -0,0 +1,85 @@
/* Ppmd.h -- PPMD codec common code
2017-04-03 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H
#define __PPMD_H
#include "CpuArch.h"
EXTERN_C_BEGIN
#ifdef MY_CPU_32BIT
#define PPMD_32BIT
#endif
#define PPMD_INT_BITS 7
#define PPMD_PERIOD_BITS 7
#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS))
#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift))
#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2)
#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob))
#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob))
#define PPMD_N1 4
#define PPMD_N2 4
#define PPMD_N3 4
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
#pragma pack(push, 1)
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
/* SEE-contexts for PPM-contexts with masked symbols */
typedef struct
{
UInt16 Summ; /* Freq */
Byte Shift; /* Speed of Freq change; low Shift is for fast change */
Byte Count; /* Count to next change of Shift */
} CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
{ (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
typedef struct
{
Byte Symbol;
Byte Freq;
UInt16 SuccessorLow;
UInt16 SuccessorHigh;
} CPpmd_State;
#pragma pack(pop)
typedef
#ifdef PPMD_32BIT
CPpmd_State *
#else
UInt32
#endif
CPpmd_State_Ref;
typedef
#ifdef PPMD_32BIT
void *
#else
UInt32
#endif
CPpmd_Void_Ref;
typedef
#ifdef PPMD_32BIT
Byte *
#else
UInt32
#endif
CPpmd_Byte_Ref;
#define PPMD_SetAllBitsIn256Bytes(p) \
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }}
EXTERN_C_END
#endif

712
bsnes/lzma/Ppmd7.c Normal file
View File

@ -0,0 +1,712 @@
/* Ppmd7.c -- PPMdH codec
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
#include <string.h>
#include "Ppmd7.h"
const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124
#define UNIT_SIZE 12
#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
#define I2U(indx) (p->Indx2Units[indx])
#ifdef PPMD_32BIT
#define REF(ptr) (ptr)
#else
#define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
#endif
#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
#define STATS(ctx) Ppmd7_GetStats(p, ctx)
#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
#define SUFFIX(ctx) CTX((ctx)->Suffix)
typedef CPpmd7_Context * CTX_PTR;
struct CPpmd7_Node_;
typedef
#ifdef PPMD_32BIT
struct CPpmd7_Node_ *
#else
UInt32
#endif
CPpmd7_Node_Ref;
typedef struct CPpmd7_Node_
{
UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */
UInt16 NU;
CPpmd7_Node_Ref Next; /* must be at offset >= 4 */
CPpmd7_Node_Ref Prev;
} CPpmd7_Node;
#ifdef PPMD_32BIT
#define NODE(ptr) (ptr)
#else
#define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs)))
#endif
void Ppmd7_Construct(CPpmd7 *p)
{
unsigned i, k, m;
p->Base = 0;
for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
{
unsigned step = (i >= 12 ? 4 : (i >> 2) + 1);
do { p->Units2Indx[k++] = (Byte)i; } while (--step);
p->Indx2Units[i] = (Byte)k;
}
p->NS2BSIndx[0] = (0 << 1);
p->NS2BSIndx[1] = (1 << 1);
memset(p->NS2BSIndx + 2, (2 << 1), 9);
memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11);
for (i = 0; i < 3; i++)
p->NS2Indx[i] = (Byte)i;
for (m = i, k = 1; i < 256; i++)
{
p->NS2Indx[i] = (Byte)m;
if (--k == 0)
k = (++m) - 2;
}
memset(p->HB2Flag, 0, 0x40);
memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40);
}
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Base);
p->Size = 0;
p->Base = 0;
}
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
size_t size2;
Ppmd7_Free(p, alloc);
size2 = 0
#ifndef PPMD_32BIT
+ UNIT_SIZE
#endif
;
p->AlignOffset =
#ifdef PPMD_32BIT
(4 - size) & 3;
#else
4 - (size & 3);
#endif
if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0)
return False;
p->Size = size;
}
return True;
}
static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
{
*((CPpmd_Void_Ref *)node) = p->FreeList[indx];
p->FreeList[indx] = REF(node);
}
static void *RemoveNode(CPpmd7 *p, unsigned indx)
{
CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
p->FreeList[indx] = *node;
return node;
}
static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
ptr = (Byte *)ptr + U2B(I2U(newIndx));
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
}
InsertNode(p, ptr, i);
}
static void GlueFreeBlocks(CPpmd7 *p)
{
#ifdef PPMD_32BIT
CPpmd7_Node headItem;
CPpmd7_Node_Ref head = &headItem;
#else
CPpmd7_Node_Ref head = p->AlignOffset + p->Size;
#endif
CPpmd7_Node_Ref n = head;
unsigned i;
p->GlueCount = 255;
/* create doubly-linked list of free blocks */
for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
UInt16 nu = I2U(i);
CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
p->FreeList[i] = 0;
while (next != 0)
{
CPpmd7_Node *node = NODE(next);
node->Next = n;
n = NODE(n)->Prev = next;
next = *(const CPpmd7_Node_Ref *)node;
node->Stamp = 0;
node->NU = (UInt16)nu;
}
}
NODE(head)->Stamp = 1;
NODE(head)->Next = n;
NODE(n)->Prev = head;
if (p->LoUnit != p->HiUnit)
((CPpmd7_Node *)p->LoUnit)->Stamp = 1;
/* Glue free blocks */
while (n != head)
{
CPpmd7_Node *node = NODE(n);
UInt32 nu = (UInt32)node->NU;
for (;;)
{
CPpmd7_Node *node2 = NODE(n) + nu;
nu += node2->NU;
if (node2->Stamp != 0 || nu >= 0x10000)
break;
NODE(node2->Prev)->Next = node2->Next;
NODE(node2->Next)->Prev = node2->Prev;
node->NU = (UInt16)nu;
}
n = node->Next;
}
/* Fill lists of free blocks */
for (n = NODE(head)->Next; n != head;)
{
CPpmd7_Node *node = NODE(n);
unsigned nu;
CPpmd7_Node_Ref next = node->Next;
for (nu = node->NU; nu > 128; nu -= 128, node += 128)
InsertNode(p, node, PPMD_NUM_INDEXES - 1);
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
InsertNode(p, node + k, nu - k - 1);
}
InsertNode(p, node, i);
n = next;
}
}
static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
{
unsigned i;
void *retVal;
if (p->GlueCount == 0)
{
GlueFreeBlocks(p);
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
}
i = indx;
do
{
if (++i == PPMD_NUM_INDEXES)
{
UInt32 numBytes = U2B(I2U(indx));
p->GlueCount--;
return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
}
}
while (p->FreeList[i] == 0);
retVal = RemoveNode(p, i);
SplitBlock(p, retVal, i, indx);
return retVal;
}
static void *AllocUnits(CPpmd7 *p, unsigned indx)
{
UInt32 numBytes;
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
numBytes = U2B(I2U(indx));
if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
{
void *retVal = p->LoUnit;
p->LoUnit += numBytes;
return retVal;
}
return AllocUnitsRare(p, indx);
}
#define MyMem12Cpy(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \
do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); }
static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
{
unsigned i0 = U2I(oldNU);
unsigned i1 = U2I(newNU);
if (i0 == i1)
return oldPtr;
if (p->FreeList[i1] != 0)
{
void *ptr = RemoveNode(p, i1);
MyMem12Cpy(ptr, oldPtr, newNU);
InsertNode(p, oldPtr, i0);
return ptr;
}
SplitBlock(p, oldPtr, i0, i1);
return oldPtr;
}
#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{
(p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
(p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
}
static void RestartModel(CPpmd7 *p)
{
unsigned i, k, m;
memset(p->FreeList, 0, sizeof(p->FreeList));
p->Text = p->Base + p->AlignOffset;
p->HiUnit = p->Text + p->Size;
p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
p->GlueCount = 0;
p->OrderFall = p->MaxOrder;
p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
p->PrevSuccess = 0;
p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
p->MinContext->Suffix = 0;
p->MinContext->NumStats = 256;
p->MinContext->SummFreq = 256 + 1;
p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
p->LoUnit += U2B(256 / 2);
p->MinContext->Stats = REF(p->FoundState);
for (i = 0; i < 256; i++)
{
CPpmd_State *s = &p->FoundState[i];
s->Symbol = (Byte)i;
s->Freq = 1;
SetSuccessor(s, 0);
}
for (i = 0; i < 128; i++)
for (k = 0; k < 8; k++)
{
UInt16 *dest = p->BinSumm[i] + k;
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
for (m = 0; m < 64; m += 8)
dest[m] = val;
}
for (i = 0; i < 25; i++)
for (k = 0; k < 16; k++)
{
CPpmd_See *s = &p->See[i][k];
s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4));
s->Count = 4;
}
}
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
{
p->MaxOrder = maxOrder;
RestartModel(p);
p->DummySee.Shift = PPMD_PERIOD_BITS;
p->DummySee.Summ = 0; /* unused */
p->DummySee.Count = 64; /* unused */
}
static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
{
CPpmd_State upState;
CTX_PTR c = p->MinContext;
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
CPpmd_State *ps[PPMD7_MAX_ORDER];
unsigned numPs = 0;
if (!skip)
ps[numPs++] = p->FoundState;
while (c->Suffix)
{
CPpmd_Void_Ref successor;
CPpmd_State *s;
c = SUFFIX(c);
if (c->NumStats != 1)
{
for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
}
else
s = ONE_STATE(c);
successor = SUCCESSOR(s);
if (successor != upBranch)
{
c = CTX(successor);
if (numPs == 0)
return c;
break;
}
ps[numPs++] = s;
}
upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
SetSuccessor(&upState, upBranch + 1);
if (c->NumStats == 1)
upState.Freq = ONE_STATE(c)->Freq;
else
{
UInt32 cf, s0;
CPpmd_State *s;
for (s = STATS(c); s->Symbol != upState.Symbol; s++);
cf = s->Freq - 1;
s0 = c->SummFreq - c->NumStats - cf;
upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0))));
}
do
{
/* Create Child */
CTX_PTR c1; /* = AllocContext(p); */
if (p->HiUnit != p->LoUnit)
c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0);
else
{
c1 = (CTX_PTR)AllocUnitsRare(p, 0);
if (!c1)
return NULL;
}
c1->NumStats = 1;
*ONE_STATE(c1) = upState;
c1->Suffix = REF(c);
SetSuccessor(ps[--numPs], REF(c1));
c = c1;
}
while (numPs != 0);
return c;
}
static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
{
CPpmd_State tmp = *t1;
*t1 = *t2;
*t2 = tmp;
}
static void UpdateModel(CPpmd7 *p)
{
CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
CTX_PTR c;
unsigned s0, ns;
if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
{
c = SUFFIX(p->MinContext);
if (c->NumStats == 1)
{
CPpmd_State *s = ONE_STATE(c);
if (s->Freq < 32)
s->Freq++;
}
else
{
CPpmd_State *s = STATS(c);
if (s->Symbol != p->FoundState->Symbol)
{
do { s++; } while (s->Symbol != p->FoundState->Symbol);
if (s[0].Freq >= s[-1].Freq)
{
SwapStates(&s[0], &s[-1]);
s--;
}
}
if (s->Freq < MAX_FREQ - 9)
{
s->Freq += 2;
c->SummFreq += 2;
}
}
}
if (p->OrderFall == 0)
{
p->MinContext = p->MaxContext = CreateSuccessors(p, True);
if (p->MinContext == 0)
{
RestartModel(p);
return;
}
SetSuccessor(p->FoundState, REF(p->MinContext));
return;
}
*p->Text++ = p->FoundState->Symbol;
successor = REF(p->Text);
if (p->Text >= p->UnitsStart)
{
RestartModel(p);
return;
}
if (fSuccessor)
{
if (fSuccessor <= successor)
{
CTX_PTR cs = CreateSuccessors(p, False);
if (cs == NULL)
{
RestartModel(p);
return;
}
fSuccessor = REF(cs);
}
if (--p->OrderFall == 0)
{
successor = fSuccessor;
p->Text -= (p->MaxContext != p->MinContext);
}
}
else
{
SetSuccessor(p->FoundState, successor);
fSuccessor = REF(p->MinContext);
}
s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1);
for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c))
{
unsigned ns1;
UInt32 cf, sf;
if ((ns1 = c->NumStats) != 1)
{
if ((ns1 & 1) == 0)
{
/* Expand for one UNIT */
unsigned oldNU = ns1 >> 1;
unsigned i = U2I(oldNU);
if (i != U2I((size_t)oldNU + 1))
{
void *ptr = AllocUnits(p, i + 1);
void *oldPtr;
if (!ptr)
{
RestartModel(p);
return;
}
oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU);
InsertNode(p, oldPtr, i);
c->Stats = STATS_REF(ptr);
}
}
c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1)));
}
else
{
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
if (!s)
{
RestartModel(p);
return;
}
*s = *ONE_STATE(c);
c->Stats = REF(s);
if (s->Freq < MAX_FREQ / 4 - 1)
s->Freq <<= 1;
else
s->Freq = MAX_FREQ - 4;
c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3));
}
cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6);
sf = (UInt32)s0 + c->SummFreq;
if (cf < 6 * sf)
{
cf = 1 + (cf > sf) + (cf >= 4 * sf);
c->SummFreq += 3;
}
else
{
cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
c->SummFreq = (UInt16)(c->SummFreq + cf);
}
{
CPpmd_State *s = STATS(c) + ns1;
SetSuccessor(s, successor);
s->Symbol = p->FoundState->Symbol;
s->Freq = (Byte)cf;
c->NumStats = (UInt16)(ns1 + 1);
}
}
p->MaxContext = p->MinContext = CTX(fSuccessor);
}
static void Rescale(CPpmd7 *p)
{
unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext);
CPpmd_State *s = p->FoundState;
{
CPpmd_State tmp = *s;
for (; s != stats; s--)
s[0] = s[-1];
*s = tmp;
}
escFreq = p->MinContext->SummFreq - s->Freq;
s->Freq += 4;
adder = (p->OrderFall != 0);
s->Freq = (Byte)((s->Freq + adder) >> 1);
sumFreq = s->Freq;
i = p->MinContext->NumStats - 1;
do
{
escFreq -= (++s)->Freq;
s->Freq = (Byte)((s->Freq + adder) >> 1);
sumFreq += s->Freq;
if (s[0].Freq > s[-1].Freq)
{
CPpmd_State *s1 = s;
CPpmd_State tmp = *s1;
do
s1[0] = s1[-1];
while (--s1 != stats && tmp.Freq > s1[-1].Freq);
*s1 = tmp;
}
}
while (--i);
if (s->Freq == 0)
{
unsigned numStats = p->MinContext->NumStats;
unsigned n0, n1;
do { i++; } while ((--s)->Freq == 0);
escFreq += i;
p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i);
if (p->MinContext->NumStats == 1)
{
CPpmd_State tmp = *stats;
do
{
tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1));
escFreq >>= 1;
}
while (escFreq > 1);
InsertNode(p, stats, U2I(((numStats + 1) >> 1)));
*(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
return;
}
n0 = (numStats + 1) >> 1;
n1 = (p->MinContext->NumStats + 1) >> 1;
if (n0 != n1)
p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
}
p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
p->FoundState = STATS(p->MinContext);
}
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
{
CPpmd_See *see;
unsigned nonMasked = p->MinContext->NumStats - numMasked;
if (p->MinContext->NumStats != 256)
{
see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] +
(nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) +
2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) +
4 * (unsigned)(numMasked > nonMasked) +
p->HiBitsFlag;
{
unsigned r = (see->Summ >> see->Shift);
see->Summ = (UInt16)(see->Summ - r);
*escFreq = r + (r == 0);
}
}
else
{
see = &p->DummySee;
*escFreq = 1;
}
return see;
}
static void NextContext(CPpmd7 *p)
{
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
if (p->OrderFall == 0 && (Byte *)c > p->Text)
p->MinContext = p->MaxContext = c;
else
UpdateModel(p);
}
void Ppmd7_Update1(CPpmd7 *p)
{
CPpmd_State *s = p->FoundState;
s->Freq += 4;
p->MinContext->SummFreq += 4;
if (s[0].Freq > s[-1].Freq)
{
SwapStates(&s[0], &s[-1]);
p->FoundState = --s;
if (s->Freq > MAX_FREQ)
Rescale(p);
}
NextContext(p);
}
void Ppmd7_Update1_0(CPpmd7 *p)
{
p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq);
p->RunLength += p->PrevSuccess;
p->MinContext->SummFreq += 4;
if ((p->FoundState->Freq += 4) > MAX_FREQ)
Rescale(p);
NextContext(p);
}
void Ppmd7_UpdateBin(CPpmd7 *p)
{
p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0));
p->PrevSuccess = 1;
p->RunLength++;
NextContext(p);
}
void Ppmd7_Update2(CPpmd7 *p)
{
p->MinContext->SummFreq += 4;
if ((p->FoundState->Freq += 4) > MAX_FREQ)
Rescale(p);
p->RunLength = p->InitRL;
UpdateModel(p);
}

142
bsnes/lzma/Ppmd7.h Normal file
View File

@ -0,0 +1,142 @@
/* Ppmd7.h -- PPMdH compression codec
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* This code supports virtual RangeDecoder and includes the implementation
of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
#ifndef __PPMD7_H
#define __PPMD7_H
#include "Ppmd.h"
EXTERN_C_BEGIN
#define PPMD7_MIN_ORDER 2
#define PPMD7_MAX_ORDER 64
#define PPMD7_MIN_MEM_SIZE (1 << 11)
#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3)
struct CPpmd7_Context_;
typedef
#ifdef PPMD_32BIT
struct CPpmd7_Context_ *
#else
UInt32
#endif
CPpmd7_Context_Ref;
typedef struct CPpmd7_Context_
{
UInt16 NumStats;
UInt16 SummFreq;
CPpmd_State_Ref Stats;
CPpmd7_Context_Ref Suffix;
} CPpmd7_Context;
#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
typedef struct
{
CPpmd7_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
Byte Indx2Units[PPMD_NUM_INDEXES];
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
CPpmd_See DummySee, See[25][16];
UInt16 BinSumm[128][64];
} CPpmd7;
void Ppmd7_Construct(CPpmd7 *p);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
#define Ppmd7_WasAllocated(p) ((p)->Base != NULL)
/* ---------- Internal Functions ---------- */
extern const Byte PPMD7_kExpEscape[16];
#ifdef PPMD_32BIT
#define Ppmd7_GetPtr(p, ptr) (ptr)
#define Ppmd7_GetContext(p, ptr) (ptr)
#define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
#else
#define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
#define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
#endif
void Ppmd7_Update1(CPpmd7 *p);
void Ppmd7_Update1_0(CPpmd7 *p);
void Ppmd7_Update2(CPpmd7 *p);
void Ppmd7_UpdateBin(CPpmd7 *p);
#define Ppmd7_GetBinSumm(p) \
&p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
(p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
((p->RunLength >> 26) & 0x20)]
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
/* ---------- Decode ---------- */
typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
struct IPpmd7_RangeDec
{
UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
};
typedef struct
{
IPpmd7_RangeDec vt;
UInt32 Range;
UInt32 Code;
IByteIn *Stream;
} CPpmd7z_RangeDec;
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
/* ---------- Encode ---------- */
typedef struct
{
UInt64 Low;
UInt32 Range;
Byte Cache;
UInt64 CacheSize;
IByteOut *Stream;
} CPpmd7z_RangeEnc;
void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
EXTERN_C_END
#endif

191
bsnes/lzma/Ppmd7Dec.c Normal file
View File

@ -0,0 +1,191 @@
/* Ppmd7Dec.c -- PPMdH Decoder
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTopValue (1 << 24)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
if (IByteIn_Read(p->Stream) != 0)
return False;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
return (p->Code < 0xFFFFFFFF);
}
#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
{
GET_Ppmd7z_RangeDec
return p->Code / (p->Range /= total);
}
static void Range_Normalize(CPpmd7z_RangeDec *p)
{
if (p->Range < kTopValue)
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Range <<= 8;
if (p->Range < kTopValue)
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Range <<= 8;
}
}
}
static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
{
GET_Ppmd7z_RangeDec
p->Code -= start * p->Range;
p->Range *= size;
Range_Normalize(p);
}
static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
{
GET_Ppmd7z_RangeDec
UInt32 newBound = (p->Range >> 14) * size0;
UInt32 symbol;
if (p->Code < newBound)
{
symbol = 0;
p->Range = newBound;
}
else
{
symbol = 1;
p->Code -= newBound;
p->Range -= newBound;
}
Range_Normalize(p);
return symbol;
}
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
{
p->vt.GetThreshold = Range_GetThreshold;
p->vt.Decode = Range_Decode;
p->vt.DecodeBit = Range_DecodeBit;
}
#define MASK(sym) ((signed char *)charMask)[sym]
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
{
Byte symbol;
rc->Decode(rc, 0, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
Ppmd7_Update1_0(p);
return symbol;
}
p->PrevSuccess = 0;
i = p->MinContext->NumStats - 1;
do
{
if ((hiCnt += (++s)->Freq) > count)
{
Byte symbol;
rc->Decode(rc, hiCnt - s->Freq, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
Ppmd7_Update1(p);
return symbol;
}
}
while (--i);
if (count >= p->MinContext->SummFreq)
return -2;
p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats - 1;
do { MASK((--s)->Symbol) = 0; } while (--i);
}
else
{
UInt16 *prob = Ppmd7_GetBinSumm(p);
if (rc->DecodeBit(rc, *prob) == 0)
{
Byte symbol;
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
Ppmd7_UpdateBin(p);
return symbol;
}
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *ps[256], *s;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
unsigned i, num, numMasked = p->MinContext->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
return -1;
p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
}
while (p->MinContext->NumStats == numMasked);
hiCnt = 0;
s = Ppmd7_GetStats(p, p->MinContext);
i = 0;
num = p->MinContext->NumStats - numMasked;
do
{
int k = (int)(MASK(s->Symbol));
hiCnt += (s->Freq & k);
ps[i] = s++;
i -= k;
}
while (i != num);
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
count = rc->GetThreshold(rc, freqSum);
if (count < hiCnt)
{
Byte symbol;
CPpmd_State **pps = ps;
for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
s = *pps;
rc->Decode(rc, hiCnt - s->Freq, s->Freq);
Ppmd_See_Update(see);
p->FoundState = s;
symbol = s->Symbol;
Ppmd7_Update2(p);
return symbol;
}
if (count >= freqSum)
return -2;
rc->Decode(rc, hiCnt, freqSum - hiCnt);
see->Summ = (UInt16)(see->Summ + freqSum);
do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
}
}

187
bsnes/lzma/Ppmd7Enc.c Normal file
View File

@ -0,0 +1,187 @@
/* Ppmd7Enc.c -- PPMdH Encoder
2017-04-03 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTopValue (1 << 24)
void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p)
{
p->Low = 0;
p->Range = 0xFFFFFFFF;
p->Cache = 0;
p->CacheSize = 1;
}
static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p)
{
if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0)
{
Byte temp = p->Cache;
do
{
IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32)));
temp = 0xFF;
}
while (--p->CacheSize != 0);
p->Cache = (Byte)((UInt32)p->Low >> 24);
}
p->CacheSize++;
p->Low = (UInt32)p->Low << 8;
}
static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total)
{
p->Low += start * (p->Range /= total);
p->Range *= size;
while (p->Range < kTopValue)
{
p->Range <<= 8;
RangeEnc_ShiftLow(p);
}
}
static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0)
{
p->Range = (p->Range >> 14) * size0;
while (p->Range < kTopValue)
{
p->Range <<= 8;
RangeEnc_ShiftLow(p);
}
}
static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0)
{
UInt32 newBound = (p->Range >> 14) * size0;
p->Low += newBound;
p->Range -= newBound;
while (p->Range < kTopValue)
{
p->Range <<= 8;
RangeEnc_ShiftLow(p);
}
}
void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
{
unsigned i;
for (i = 0; i < 5; i++)
RangeEnc_ShiftLow(p);
}
#define MASK(sym) ((signed char *)charMask)[sym]
void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
if (s->Symbol == symbol)
{
RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq);
p->FoundState = s;
Ppmd7_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
i = p->MinContext->NumStats - 1;
do
{
if ((++s)->Symbol == symbol)
{
RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq);
p->FoundState = s;
Ppmd7_Update1(p);
return;
}
sum += s->Freq;
}
while (--i);
p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats - 1;
do { MASK((--s)->Symbol) = 0; } while (--i);
RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
}
else
{
UInt16 *prob = Ppmd7_GetBinSumm(p);
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
if (s->Symbol == symbol)
{
RangeEnc_EncodeBit_0(rc, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
p->FoundState = s;
Ppmd7_UpdateBin(p);
return;
}
else
{
RangeEnc_EncodeBit_1(rc, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
p->PrevSuccess = 0;
}
}
for (;;)
{
UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
UInt32 sum;
unsigned i, numMasked = p->MinContext->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
return; /* EndMarker (symbol = -1) */
p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
}
while (p->MinContext->NumStats == numMasked);
see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
s = Ppmd7_GetStats(p, p->MinContext);
sum = 0;
i = p->MinContext->NumStats;
do
{
int cur = s->Symbol;
if (cur == symbol)
{
UInt32 low = sum;
CPpmd_State *s1 = s;
do
{
sum += (s->Freq & (int)(MASK(s->Symbol)));
s++;
}
while (--i);
RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq);
Ppmd_See_Update(see);
p->FoundState = s1;
Ppmd7_Update2(p);
return;
}
sum += (s->Freq & (int)(MASK(cur)));
MASK(cur) = 0;
s++;
}
while (--i);
RangeEnc_Encode(rc, sum, escFreq, sum + escFreq);
see->Summ = (UInt16)(see->Summ + sum + escFreq);
}
}

10
bsnes/lzma/Precomp.h Normal file
View File

@ -0,0 +1,10 @@
/* Precomp.h -- StdAfx
2013-11-12 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H
#define __7Z_PRECOMP_H
#include "Compiler.h"
/* #include "7zTypes.h" */
#endif

Some files were not shown because too many files have changed in this diff Show More