Colorspace Handler: Pad out some small arrays so that they can be useful for SIMD vector loads. Up to 512-bit vector loads are supported.

- For practical reasons, we shouldn't need to pad out any arrays past 64 bytes any time soon.
This commit is contained in:
rogerman 2022-04-05 22:28:43 -07:00
parent b1d49d14ec
commit c5c9e2d3a7
2 changed files with 37 additions and 18 deletions

View File

@ -84,7 +84,7 @@ CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
CACHE_ALIGN const u32 material_5bit_to_31bit[32] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
@ -97,21 +97,33 @@ CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
CACHE_ALIGN const u8 material_5bit_to_6bit[64] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F,
// Mirror of first 32 bytes of this array.
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
CACHE_ALIGN const u8 material_5bit_to_8bit[64] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF,
// Mirror of first 32 bytes of this array.
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
CACHE_ALIGN const u8 material_6bit_to_8bit[64] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
@ -122,18 +134,25 @@ CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
CACHE_ALIGN const u8 material_3bit_to_5bit[64] = {
0, 4, 8, 13, 17, 22, 26, 31, 0,0,0,0,0,0,0,0,
0, 4, 8, 13, 17, 22, 26, 31, 0,0,0,0,0,0,0,0,
0, 4, 8, 13, 17, 22, 26, 31, 0,0,0,0,0,0,0,0,
0, 4, 8, 13, 17, 22, 26, 31, 0,0,0,0,0,0,0,0
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
CACHE_ALIGN const u8 material_3bit_to_6bit[64] = {
0, 8, 16, 26, 34, 44, 52, 63, 0,0,0,0,0,0,0,0,
0, 8, 16, 26, 34, 44, 52, 63, 0,0,0,0,0,0,0,0,
0, 8, 16, 26, 34, 44, 52, 63, 0,0,0,0,0,0,0,0,
0, 8, 16, 26, 34, 44, 52, 63, 0,0,0,0,0,0,0,0
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
CACHE_ALIGN const u8 material_3bit_to_8bit[64] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF, 0,0,0,0,0,0,0,0,
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF, 0,0,0,0,0,0,0,0,
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF, 0,0,0,0,0,0,0,0,
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF, 0,0,0,0,0,0,0,0
};
void ColorspaceHandlerInit()

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2022 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -89,12 +89,12 @@ union FragmentColor
};
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes.
extern CACHE_ALIGN const u8 material_5bit_to_8bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes.
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[64]; // Padded for vector lookup table routines. Only the first 8 indices are valid. Data is mirrored across 128-bit lanes.
extern CACHE_ALIGN const u8 material_3bit_to_6bit[64]; // Padded for vector lookup table routines. Only the first 8 indices are valid. Data is mirrored across 128-bit lanes.
extern CACHE_ALIGN const u8 material_3bit_to_8bit[64]; // Padded for vector lookup table routines. Only the first 8 indices are valid. Data is mirrored across 128-bit lanes.
extern CACHE_ALIGN u16 color_5551_swap_rb[65536];
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];