[Glide64] get TxUtil.cpp to not use asm

debcb5b25d
This commit is contained in:
zilmar 2015-10-14 11:20:03 +11:00
parent 81ab3fc877
commit 34de7c6804
5 changed files with 136 additions and 558 deletions

View File

@ -89,7 +89,7 @@ TxCache::add(uint64 checksum, GHQTexInfo *info, int dataSize)
if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) {
/* zlib compress it. compression level:1 (best speed) */
uint32 destLen = _gzdestLen;
uLongf destLen = _gzdestLen;
dest = (dest == _gzdest0) ? _gzdest1 : _gzdest0;
if (compress2(dest, &destLen, info->data, dataSize, 1) != Z_OK) {
dest = info->data;
@ -208,7 +208,7 @@ TxCache::get(uint64 checksum, GHQTexInfo *info)
/* zlib decompress it */
if (info->format & GR_TEXFMT_GZ) {
uint32 destLen = _gzdestLen;
uLongf destLen = _gzdestLen;
uint8 *dest = (_gzdest0 == info->data) ? _gzdest1 : _gzdest0;
if (uncompress(dest, &destLen, info->data, ((*itMap).second)->size) != Z_OK) {
DBG_INFO(80, L"Error: zlib decompression failed!\n");
@ -236,13 +236,13 @@ TxCache::save(const wchar_t *path, const wchar_t *filename, int config)
cachepath.CreateDirectory();
/* Ugly hack to enable fopen/gzopen in Win9x */
#ifdef WIN32
#ifdef _WIN32
wchar_t curpath[MAX_PATH];
GETCWD(MAX_PATH, curpath);
cachepath.ChangeDirectory();
#else
char curpath[MAX_PATH];
wcstombs(cbuf, cachepath.string().c_str(), MAX_PATH);
wcstombs(cbuf, cachepath.wstring().c_str(), MAX_PATH);
GETCWD(MAX_PATH, curpath);
CHDIR(cbuf);
#endif

View File

@ -43,17 +43,17 @@ public:
};
#ifdef DEBUG
#define DBG_INFO TxDbg::getInstance()->output
#define INFO DBG_INFO
#define DBG_INFO(...) TxDbg::getInstance()->output(__VA_ARGS__)
#define INFO(...) DBG_INFO(__VA_ARGS__)
#else
#define DBG_INFO 0 && (wchar_t)
#define DBG_INFO(...)
#ifdef GHQCHK
#define INFO TxDbg::getInstance()->output
#define INFO(...) TxDbg::getInstance()->output(__VA_ARGS__)
#else
#if 0 /* XXX enable this to log basic hires texture checks */
#define INFO TxDbg::getInstance()->output
#define INFO(...) TxDbg::getInstance()->output(__VA_ARGS__)
#else
#define INFO DBG_INFO
#define INFO(...) DBG_INFO(__VA_ARGS__)
#endif
#endif
#endif

View File

@ -21,7 +21,7 @@
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifdef WIN32
#ifdef _WIN32
#pragma warning(disable: 4786)
#endif
@ -29,7 +29,6 @@
#include "TxFilter.h"
#include "TextureFilters.h"
#include "TxDbg.h"
#include "bldno.h"
void TxFilter::clear()
{
@ -83,6 +82,11 @@ TxFilter::TxFilter(int maxwidth, int maxheight, int maxbpp, int options,
/* shamelessness :P this first call to the debug output message creates
* a file in the executable directory. */
INFO(0, L"------------------------------------------------------------------\n");
#ifdef GHQCHK
INFO(0, L" GlideHQ Hires Texture Checker 1.02.00.%d\n", 0);
#else
INFO(0, L" GlideHQ version 1.02.00.%d\n", 0);
#endif
INFO(0, L" Copyright (C) 2010 Hiroshi Morii All Rights Reserved\n");
INFO(0, L" email : koolsmoky(at)users.sourceforge.net\n");
INFO(0, L" website : http://www.3dfxzone.it/koolsmoky\n");

View File

@ -35,11 +35,13 @@
#define TAPIENTRY
#endif
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned long uint32;
#include <common/stdtypes.h>
#ifdef WIN32
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
#ifdef _WIN32
#define KBHIT(key) ((GetAsyncKeyState(key) & 0x8001) == 0x8001)
#else
#define KBHIT(key) (0)

View File

@ -304,6 +304,18 @@ TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride
return ret;
}
// rotate left
template<class T> static T __ROL__(T value, unsigned int count)
{
const unsigned int nbits = sizeof(T) * 8;
count %= nbits;
T high = value >> (nbits - count);
value <<= count;
value |= high;
return value;
}
/* Rice CRC32 for hires texture packs */
/* NOTE: The following is used in Glide64 to calculate the CRC32
* for Rice hires texture packs.
@ -321,96 +333,28 @@ TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride
uint32
TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
{
/* NOTE: bytes_per_width must be equal or larger than 4 */
const uint8_t *row;
uint32_t crc32Ret;
int cur_height;
uint32_t pos;
uint32_t word;
uint32_t word_hash = 0;
uint32_t tmp;
const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
uint32 crc32Ret = 0;
const uint32 bytes_per_width = ((width << size) + 1) >> 1;
row = src;
crc32Ret = 0;
/*if (bytes_per_width < 4) return 0;*/
try {
#ifdef WIN32
#ifdef _M_IX86
__asm {
push ebx;
push esi;
push edi;
mov ecx, dword ptr [src];
mov eax, dword ptr [height];
mov edx, 0;
dec eax;
loop2:
mov ebx, dword ptr [bytes_per_width];
sub ebx, 4;
loop1:
mov esi, dword ptr [ecx+ebx];
xor esi, ebx;
rol edx, 4;
add edx, esi;
sub ebx, 4;
jge loop1;
xor esi, eax;
add edx, esi;
add ecx, dword ptr [rowStride];
dec eax;
jge loop2;
mov dword ptr [crc32Ret], edx;
pop edi;
pop esi;
pop ebx;
for (cur_height = height - 1; cur_height >= 0; cur_height--) {
for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
word = *(uint32_t *)&row[pos];
word_hash = pos ^ word;
tmp = __ROL__(crc32Ret, 4);
crc32Ret = word_hash + tmp;
}
#else
DebugBreak();
#endif
#else
asm volatile(
"pushl %%ebx \n"
"pushl %%esi \n"
"pushl %%edi \n"
"movl %0, %%ecx \n"
"movl %1, %%eax \n"
"movl $0, %%edx \n"
"decl %%eax \n"
"0: \n"
"movl %2, %%ebx \n"
"subl $4, %%ebx \n"
"1: \n"
"movl (%%ecx,%%ebx), %%esi \n"
"xorl %%ebx, %%esi \n"
"roll $4, %%edx \n"
"addl %%esi, %%edx \n"
"subl $4, %%ebx \n"
"jge 1b \n"
"xorl %%eax, %%esi \n"
"addl %%esi, %%edx \n"
"addl %3, %%ecx \n"
"decl %%eax \n"
"jge 0b \n"
"movl %%edx, %4 \n"
"popl %%edi \n"
"popl %%esi \n"
"popl %%ebx \n"
:
: "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret)
: "memory", "cc"
);
#endif
} catch(...) {
DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
crc32Ret += cur_height ^ word_hash;
row += rowStride;
}
return crc32Ret;
}
@ -418,247 +362,50 @@ boolean
TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
uint32* crc32, uint32* cimax)
{
/* NOTE: bytes_per_width must be equal or larger than 4 */
const uint8_t *row;
uint32_t crc32Ret;
uint32_t cimaxRet;
int cur_height;
uint32_t pos;
uint32_t word;
uint32_t word_hash = 0;
uint32_t tmp;
const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
uint32 crc32Ret = 0;
uint32 cimaxRet = 0;
const uint32 bytes_per_width = ((width << size) + 1) >> 1;
row = src;
crc32Ret = 0;
cimaxRet = 0;
/*if (bytes_per_width < 4) return 0;*/
/* 4bit CI */
try {
#ifdef WIN32
#ifdef _M_IX86
__asm {
push ebx;
push esi;
push edi;
mov ecx, dword ptr [src];
mov eax, dword ptr [height];
mov edx, 0;
mov edi, 0;
dec eax;
loop2:
mov ebx, dword ptr [bytes_per_width];
sub ebx, 4;
loop1:
mov esi, dword ptr [ecx+ebx];
cmp edi, 0x0000000f;
je findmax0;
push ecx;
mov ecx, esi;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax8;
mov edi, ecx;
findmax8:
mov ecx, esi;
shr ecx, 4;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax7;
mov edi, ecx;
findmax7:
mov ecx, esi;
shr ecx, 8;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax6;
mov edi, ecx;
findmax6:
mov ecx, esi;
shr ecx, 12;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax5;
mov edi, ecx;
findmax5:
mov ecx, esi;
shr ecx, 16;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax4;
mov edi, ecx;
findmax4:
mov ecx, esi;
shr ecx, 20;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax3;
mov edi, ecx;
findmax3:
mov ecx, esi;
shr ecx, 24;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax2;
mov edi, ecx;
findmax2:
mov ecx, esi;
shr ecx, 28;
and ecx, 0x0000000f;
cmp ecx, edi;
jb findmax1;
mov edi, ecx;
findmax1:
pop ecx;
findmax0:
xor esi, ebx;
rol edx, 4;
add edx, esi;
sub ebx, 4;
jge loop1;
xor esi, eax;
add edx, esi;
add ecx, dword ptr [rowStride];
dec eax;
jge loop2;
mov dword ptr [crc32Ret], edx;
mov dword ptr [cimaxRet], edi;
pop edi;
pop esi;
pop ebx;
for (cur_height = height - 1; cur_height >= 0; cur_height--) {
for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
word = *(uint32_t *)&row[pos];
if (cimaxRet != 15) {
if ((word & 0xF) >= cimaxRet)
cimaxRet = word & 0xF;
if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
cimaxRet = (uint8_t)word >> 4;
if (((word >> 8) & 0xF) >= cimaxRet)
cimaxRet = (word >> 8) & 0xF;
if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
cimaxRet = (uint16_t)word >> 12;
if (((word >> 16) & 0xF) >= cimaxRet)
cimaxRet = (word >> 16) & 0xF;
if (((word >> 20) & 0xF) >= cimaxRet)
cimaxRet = (word >> 20) & 0xF;
if (((word >> 24) & 0xF) >= cimaxRet)
cimaxRet = (word >> 24) & 0xF;
if (word >> 28 >= cimaxRet )
cimaxRet = word >> 28;
}
word_hash = pos ^ word;
tmp = __ROL__(crc32Ret, 4);
crc32Ret = word_hash + tmp;
}
#else
DebugBreak();
#endif
#else
asm volatile(
"pushl %%ebx \n"
"pushl %%esi \n"
"pushl %%edi \n"
"movl %0, %%ecx \n"
"movl %1, %%eax \n"
"movl $0, %%edx \n"
"movl $0, %%edi \n"
"decl %%eax \n"
"0: \n"
"movl %2, %%ebx \n"
"subl $4, %%ebx \n"
"1: \n"
"movl (%%ecx,%%ebx), %%esi \n"
"cmpl $0x0000000f, %%edi \n"
"je 10f \n"
"pushl %%ecx \n"
"movl %%esi, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 2f \n"
"movl %%ecx, %%edi \n"
"2: \n"
"movl %%esi, %%ecx \n"
"shrl $4, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 3f \n"
"movl %%ecx, %%edi \n"
"3: \n"
"movl %%esi, %%ecx \n"
"shrl $8, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 4f \n"
"movl %%ecx, %%edi \n"
"4: \n"
"movl %%esi, %%ecx \n"
"shrl $12, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 5f \n"
"movl %%ecx, %%edi \n"
"5: \n"
"movl %%esi, %%ecx \n"
"shrl $16, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 6f \n"
"movl %%ecx, %%edi \n"
"6: \n"
"movl %%esi, %%ecx \n"
"shrl $20, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 7f \n"
"movl %%ecx, %%edi \n"
"7: \n"
"movl %%esi, %%ecx \n"
"shrl $24, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 8f \n"
"movl %%ecx, %%edi \n"
"8: \n"
"movl %%esi, %%ecx \n"
"shrl $28, %%ecx \n"
"andl $0x0000000f, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 9f \n"
"movl %%ecx, %%edi \n"
"9: \n"
"popl %%ecx \n"
"10: \n"
"xorl %%ebx, %%esi \n"
"roll $4, %%edx \n"
"addl %%esi, %%edx \n"
"subl $4, %%ebx \n"
"jge 1b \n"
"xorl %%eax, %%esi \n"
"addl %%esi, %%edx \n"
"addl %3, %%ecx \n"
"decl %%eax \n"
"jge 0b \n"
"movl %%edx, %4 \n"
"movl %%edi, %5 \n"
"popl %%edi \n"
"popl %%esi \n"
"popl %%ebx \n"
:
: "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
: "memory", "cc"
);
#endif
} catch(...) {
DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
crc32Ret += cur_height ^ word_hash;
row += rowStride;
}
*crc32 = crc32Ret;
*cimax = cimaxRet;
return 1;
}
@ -666,184 +413,42 @@ boolean
TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
uint32* crc32, uint32* cimax)
{
/* NOTE: bytes_per_width must be equal or larger than 4 */
const uint8_t *row;
uint32_t crc32Ret;
uint32_t cimaxRet;
int cur_height;
uint32_t pos;
uint32_t word;
uint32_t word_hash = 0;
uint32_t tmp;
const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
uint32 crc32Ret = 0;
uint32 cimaxRet = 0;
const uint32 bytes_per_width = ((width << size) + 1) >> 1;
row = src;
crc32Ret = 0;
cimaxRet = 0;
/*if (bytes_per_width < 4) return 0;*/
/* 8bit CI */
try {
#ifdef _M_IX86
#ifdef WIN32
__asm {
push ebx;
push esi;
push edi;
mov ecx, dword ptr [src];
mov eax, dword ptr [height];
mov edx, 0;
mov edi, 0;
dec eax;
loop2:
mov ebx, dword ptr [bytes_per_width];
sub ebx, 4;
loop1:
mov esi, dword ptr [ecx+ebx];
cmp edi, 0x000000ff;
je findmax0;
push ecx;
mov ecx, esi;
and ecx, 0x000000ff;
cmp ecx, edi;
jb findmax4;
mov edi, ecx;
findmax4:
mov ecx, esi;
shr ecx, 8;
and ecx, 0x000000ff;
cmp ecx, edi;
jb findmax3;
mov edi, ecx;
findmax3:
mov ecx, esi;
shr ecx, 16;
and ecx, 0x000000ff;
cmp ecx, edi;
jb findmax2;
mov edi, ecx;
findmax2:
mov ecx, esi;
shr ecx, 24;
and ecx, 0x000000ff;
cmp ecx, edi;
jb findmax1;
mov edi, ecx;
findmax1:
pop ecx;
findmax0:
xor esi, ebx;
rol edx, 4;
add edx, esi;
sub ebx, 4;
jge loop1;
xor esi, eax;
add edx, esi;
add ecx, dword ptr [rowStride];
dec eax;
jge loop2;
mov dword ptr [crc32Ret], edx;
mov dword ptr [cimaxRet], edi;
pop edi;
pop esi;
pop ebx;
for (cur_height = height - 1; cur_height >= 0; cur_height--) {
for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
word = *(uint32_t *)&row[pos];
if (cimaxRet != 255) {
if ((uint8_t)word >= cimaxRet)
cimaxRet = (uint8_t)word;
if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
cimaxRet = (uint16_t)word >> 8;
if (((word >> 16) & 0xFF) >= cimaxRet)
cimaxRet = (word >> 16) & 0xFF;
if (word >> 24 >= cimaxRet)
cimaxRet = word >> 24;
}
word_hash = pos ^ word;
tmp = __ROL__(crc32Ret, 4);
crc32Ret = word_hash + tmp;
}
#else
asm volatile(
"pushl %%ebx \n"
"pushl %%esi \n"
"pushl %%edi \n"
"movl %0, %%ecx \n"
"movl %1, %%eax \n"
"movl $0, %%edx \n"
"movl $0, %%edi \n"
"decl %%eax \n"
"0: \n"
"movl %2, %%ebx \n"
"subl $4, %%ebx \n"
"1: \n"
"movl (%%ecx,%%ebx), %%esi \n"
"cmpl $0x000000ff, %%edi \n"
"je 6f \n"
"pushl %%ecx \n"
"movl %%esi, %%ecx \n"
"andl $0x000000ff, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 2f \n"
"movl %%ecx, %%edi \n"
"2: \n"
"movl %%esi, %%ecx \n"
"shrl $8, %%ecx \n"
"andl $0x000000ff, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 3f \n"
"movl %%ecx, %%edi \n"
"3: \n"
"movl %%esi, %%ecx \n"
"shrl $16, %%ecx \n"
"andl $0x000000ff, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 4f \n"
"movl %%ecx, %%edi \n"
"4: \n"
"movl %%esi, %%ecx \n"
"shrl $24, %%ecx \n"
"andl $0x000000ff, %%ecx \n"
"cmpl %%edi, %%ecx \n"
"jb 5f \n"
"movl %%ecx, %%edi \n"
"5: \n"
"popl %%ecx \n"
"6: \n"
"xorl %%ebx, %%esi \n"
"roll $4, %%edx \n"
"addl %%esi, %%edx \n"
"subl $4, %%ebx \n"
"jge 1b \n"
"xorl %%eax, %%esi \n"
"addl %%esi, %%edx \n"
"addl %3, %%ecx \n"
"decl %%eax \n"
"jge 0b \n"
"movl %%edx, %4 \n"
"movl %%edi, %5 \n"
"popl %%edi \n"
"popl %%esi \n"
"popl %%ebx \n"
:
: "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
: "memory", "cc"
);
#endif
#else
DebugBreak();
#endif
crc32Ret += cur_height ^ word_hash;
row += rowStride;
}
catch (...) {
DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
}
*crc32 = crc32Ret;
*cimax = cimaxRet;
return 1;
}
@ -895,59 +500,26 @@ TxUtil::grAspectRatioLog2(int w, int h)
int
TxUtil::getNumberofProcessors()
{
int numcore = 1;
int numcore = 1, ret;
/* number of logical processors per physical processor */
try {
#ifdef WIN32
#if 1
/* use win32 api */
SYSTEM_INFO siSysInfo;
ZeroMemory(&siSysInfo, sizeof(SYSTEM_INFO));
GetSystemInfo(&siSysInfo);
numcore = siSysInfo.dwNumberOfProcessors;
#else
__asm {
push ebx;
mov eax, 1;
cpuid;
test edx, 0x10000000; /* check HTT */
jz uniproc;
and ebx, 0x00ff0000; /* mask logical core counter bit */
shr ebx, 16;
mov dword ptr [numcore], ebx;
uniproc:
pop ebx;
}
#ifdef _WIN32
#ifndef _SC_NPROCESSORS_ONLN
SYSTEM_INFO info;
GetSystemInfo(&info);
#define sysconf(a) info.dwNumberOfProcessors
#define _SC_NPROCESSORS_ONLN
#endif
#else
asm volatile(
"pushl %%ebx \n"
"movl $1, %%eax \n"
"cpuid \n"
"testl $0x10000000, %%edx \n"
"jz 0f \n"
"andl $0x00ff0000, %%ebx \n"
"shrl $16, %%ebx \n"
"movl %%ebx, %0 \n"
"0: \n"
"popl %%ebx \n"
:
: "m"(numcore)
: "memory", "cc"
);
#endif
} catch(...) {
DBG_INFO(80, L"Error: number of processor detection failed!\n");
#ifdef _SC_NPROCESSORS_ONLN
ret = sysconf(_SC_NPROCESSORS_CONF);
if (ret >= 1) {
numcore = ret;
}
if (numcore > MAX_NUMCORE) numcore = MAX_NUMCORE;
DBG_INFO(80, L"Number of processors : %d\n", numcore);
ret = sysconf(_SC_NPROCESSORS_ONLN);
if (ret < 1) {
numcore = ret;
}
#endif
return numcore;
}