From 9c0a897e57b7e3a20f9fd33648b0d9fae79f4658 Mon Sep 17 00:00:00 2001 From: zilmar Date: Thu, 21 Jan 2016 04:39:18 +1100 Subject: [PATCH] [Glide64] Fix some of the run time checks --- Source/Glide64/TexCache.cpp | 2 +- Source/Glide64/TexLoad16b.h | 331 ++++++++++++++++++------------------ Source/Glide64/Util.h | 77 +++++---- Source/Glide64/rdp.cpp | 4 +- 4 files changed, 209 insertions(+), 205 deletions(-) diff --git a/Source/Glide64/TexCache.cpp b/Source/Glide64/TexCache.cpp index 6181de9b0..a2c5deda5 100644 --- a/Source/Glide64/TexCache.cpp +++ b/Source/Glide64/TexCache.cpp @@ -163,7 +163,7 @@ uint32_t textureCRC(uint8_t *addr, int width, int height, int line) for (; height; height--) { for (i = width; i; --i) { twopixel_crc = i * (uint64_t)(pixelpos[1] + pixelpos[0] + crc); - crc = (uint32_t)((twopixel_crc >> 32) + twopixel_crc); + crc = (uint32_t)(((twopixel_crc >> 32) + twopixel_crc) & 0xFFFFFFFF); pixelpos += 2; } crc = ((unsigned int)height * (uint64_t)crc >> 32) + height * crc; diff --git a/Source/Glide64/TexLoad16b.h b/Source/Glide64/TexLoad16b.h index 2d276983c..7c605cca7 100644 --- a/Source/Glide64/TexLoad16b.h +++ b/Source/Glide64/TexLoad16b.h @@ -39,157 +39,150 @@ static inline void load16bRGBA(uint8_t *src, uint8_t *dst, int wid_64, int height, int line, int ext) { - uint32_t *v6; - uint32_t *v7; - int v8; - int v9; - uint32_t v10; - uint32_t v11; - uint32_t *v12; - uint32_t *v13; - int v14; - uint32_t v15; - uint32_t v16; - int v17; - int v18; + uint32_t *v6; + uint32_t *v7; + int v8; + int v9; + uint32_t v10; + uint32_t v11; + uint32_t *v12; + uint32_t *v13; + int v14; + uint32_t v15; + uint32_t v16; + int v17; + int v18; - v6 = (uint32_t *)src; - v7 = (uint32_t *)dst; - v8 = height; - do - { - v17 = v8; - v9 = wid_64; + v6 = (uint32_t *)src; + v7 = (uint32_t *)dst; + v8 = height; do { - v10 = bswap32(*v6); - v11 = bswap32(v6[1]); - ALOWORD(v10) = __ROR__((uint16_t)v10, 1); - ALOWORD(v11) = __ROR__((uint16_t)v11, 1); - v10 = __ROR__(v10, 16); - v11 = __ROR__(v11, 16); - ALOWORD(v10) = __ROR__((uint16_t)v10, 1); - ALOWORD(v11) = __ROR__((uint16_t)v11, 1); - *v7 = v10; - v7[1] = v11; - v6 += 2; - v7 += 2; - --v9; - } - while ( v9 ); - if ( v17 == 1 ) - break; - v18 = v17 - 1; - v12 = (uint32_t *)&src[(line + (uintptr_t)v6 - (uintptr_t)src) & 0xFFF]; - v13 = (uint32_t *)((char *)v7 + ext); - v14 = wid_64; - do - { - v15 = bswap32(v12[1]); - v16 = bswap32(*v12); - ALOWORD(v15) = __ROR__((uint16_t)v15, 1); - ALOWORD(v16) = __ROR__((uint16_t)v16, 1); - v15 = __ROR__(v15, 16); - v16 = __ROR__(v16, 16); - ALOWORD(v15) = __ROR__((uint16_t)v15, 1); - ALOWORD(v16) = __ROR__((uint16_t)v16, 1); - *v13 = v15; - v13[1] = v16; - v12 += 2; - v13 += 2; - --v14; - } - while ( v14 ); - v6 = (uint32_t *)&src[(line + (uintptr_t)v12 - (uintptr_t)src) & 0xFFF]; - v7 = (uint32_t *)((char *)v13 + ext); - v8 = v18 - 1; - } - while ( v18 != 1 ); + v17 = v8; + v9 = wid_64; + do + { + v10 = bswap32(*v6); + v11 = bswap32(v6[1]); + ALOWORD(v10) = __ROR__((uint16_t)(v10 & 0xFFFF), 1); + ALOWORD(v11) = __ROR__((uint16_t)(v11 & 0xFFFF), 1); + v10 = __ROR__(v10, 16); + v11 = __ROR__(v11, 16); + ALOWORD(v10) = __ROR__((uint16_t)(v10 & 0xFFFF), 1); + ALOWORD(v11) = __ROR__((uint16_t)(v11 & 0xFFFF), 1); + *v7 = v10; + v7[1] = v11; + v6 += 2; + v7 += 2; + --v9; + } while (v9); + if (v17 == 1) + break; + v18 = v17 - 1; + v12 = (uint32_t *)&src[(line + (uintptr_t)v6 - (uintptr_t)src) & 0xFFF]; + v13 = (uint32_t *)((char *)v7 + ext); + v14 = wid_64; + do + { + v15 = bswap32(v12[1]); + v16 = bswap32(*v12); + ALOWORD(v15) = __ROR__((uint16_t)(v15 & 0xFFFF), 1); + ALOWORD(v16) = __ROR__((uint16_t)(v16 & 0xFFFF), 1); + v15 = __ROR__(v15, 16); + v16 = __ROR__(v16, 16); + ALOWORD(v15) = __ROR__((uint16_t)(v15 & 0xFFFF), 1); + ALOWORD(v16) = __ROR__((uint16_t)(v16 & 0xFFFF), 1); + *v13 = v15; + v13[1] = v16; + v12 += 2; + v13 += 2; + --v14; + } while (v14); + v6 = (uint32_t *)&src[(line + (uintptr_t)v12 - (uintptr_t)src) & 0xFFF]; + v7 = (uint32_t *)((char *)v13 + ext); + v8 = v18 - 1; + } while (v18 != 1); } static inline void load16bIA(uint8_t *src, uint8_t *dst, int wid_64, int height, int line, int ext) { - uint32_t *v6; - uint32_t *v7; - int v8; - int v9; - uint32_t v10; - uint32_t *v11; - uint32_t *v12; - int v13; - uint32_t v14; - int v15; - int v16; + uint32_t *v6; + uint32_t *v7; + int v8; + int v9; + uint32_t v10; + uint32_t *v11; + uint32_t *v12; + int v13; + uint32_t v14; + int v15; + int v16; - v6 = (uint32_t *)src; - v7 = (uint32_t *)dst; - v8 = height; - do - { - v15 = v8; - v9 = wid_64; + v6 = (uint32_t *)src; + v7 = (uint32_t *)dst; + v8 = height; do { - v10 = v6[1]; - *v7 = *v6; - v7[1] = v10; - v6 += 2; - v7 += 2; - --v9; - } - while ( v9 ); - if ( v15 == 1 ) - break; - v16 = v15 - 1; - v11 = (uint32_t *)((char *)v6 + line); - v12 = (uint32_t *)((char *)v7 + ext); - v13 = wid_64; - do - { - v14 = *v11; - *v12 = v11[1]; - v12[1] = v14; - v11 += 2; - v12 += 2; - --v13; - } - while ( v13 ); - v6 = (uint32_t *)((char *)v11 + line); - v7 = (uint32_t *)((char *)v12 + ext); - v8 = v16 - 1; - } - while ( v16 != 1 ); + v15 = v8; + v9 = wid_64; + do + { + v10 = v6[1]; + *v7 = *v6; + v7[1] = v10; + v6 += 2; + v7 += 2; + --v9; + } while (v9); + if (v15 == 1) + break; + v16 = v15 - 1; + v11 = (uint32_t *)((char *)v6 + line); + v12 = (uint32_t *)((char *)v7 + ext); + v13 = wid_64; + do + { + v14 = *v11; + *v12 = v11[1]; + v12[1] = v14; + v11 += 2; + v12 += 2; + --v13; + } while (v13); + v6 = (uint32_t *)((char *)v11 + line); + v7 = (uint32_t *)((char *)v12 + ext); + v8 = v16 - 1; + } while (v16 != 1); } - //**************************************************************** // Size: 2, Format: 0 // -uint32_t Load16bRGBA (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int /*tile*/) +uint32_t Load16bRGBA(wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int /*tile*/) { - if (wid_64 < 1) wid_64 = 1; - if (height < 1) height = 1; - int ext = (real_width - (wid_64 << 2)) << 1; + if (wid_64 < 1) wid_64 = 1; + if (height < 1) height = 1; + int ext = (real_width - (wid_64 << 2)) << 1; - load16bRGBA((uint8_t *)src, (uint8_t *)dst, wid_64, height, line, ext); + load16bRGBA((uint8_t *)src, (uint8_t *)dst, wid_64, height, line, ext); - return (1 << 16) | GR_TEXFMT_ARGB_1555; + return (1 << 16) | GR_TEXFMT_ARGB_1555; } //**************************************************************** // Size: 2, Format: 3 // -uint32_t Load16bIA (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int /*tile*/) +uint32_t Load16bIA(wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int /*tile*/) { - if (wid_64 < 1) wid_64 = 1; - if (height < 1) height = 1; - int ext = (real_width - (wid_64 << 2)) << 1; + if (wid_64 < 1) wid_64 = 1; + if (height < 1) height = 1; + int ext = (real_width - (wid_64 << 2)) << 1; - load16bIA((uint8_t *)src, (uint8_t *)dst, wid_64, height, line, ext); + load16bIA((uint8_t *)src, (uint8_t *)dst, wid_64, height, line, ext); - return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88; + return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88; } //**************************************************************** @@ -198,60 +191,60 @@ uint32_t Load16bIA (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int li uint16_t yuv_to_rgb565(uint8_t y, uint8_t u, uint8_t v) { - //* - float r = y + (1.370705f * (v-128)); - float g = y - (0.698001f * (v-128)) - (0.337633f * (u-128)); - float b = y + (1.732446f * (u-128)); - r *= 0.125f; - g *= 0.25f; - b *= 0.125f; - //clipping the result - if (r > 31) r = 31; - if (g > 63) g = 63; - if (b > 31) b = 31; - if (r < 0) r = 0; - if (g < 0) g = 0; - if (b < 0) b = 0; - uint16_t c = (uint16_t)(((uint16_t)(r) << 11) | - ((uint16_t)(g) << 5) | - (uint16_t)(b) ); - return c; - //*/ - /* - const uint32_t c = y - 16; - const uint32_t d = u - 128; - const uint32_t e = v - 128; + //* + float r = y + (1.370705f * (v - 128)); + float g = y - (0.698001f * (v - 128)) - (0.337633f * (u - 128)); + float b = y + (1.732446f * (u - 128)); + r *= 0.125f; + g *= 0.25f; + b *= 0.125f; + //clipping the result + if (r > 31) r = 31; + if (g > 63) g = 63; + if (b > 31) b = 31; + if (r < 0) r = 0; + if (g < 0) g = 0; + if (b < 0) b = 0; + uint16_t c = (uint16_t)(((uint16_t)(r) << 11) | + ((uint16_t)(g) << 5) | + (uint16_t)(b)); + return c; + //*/ + /* + const uint32_t c = y - 16; + const uint32_t d = u - 128; + const uint32_t e = v - 128; - uint32_t r = (298 * c + 409 * e + 128) & 0xf800; - uint32_t g = ((298 * c - 100 * d - 208 * e + 128) >> 5) & 0x7e0; - uint32_t b = ((298 * c + 516 * d + 128) >> 11) & 0x1f; + uint32_t r = (298 * c + 409 * e + 128) & 0xf800; + uint32_t g = ((298 * c - 100 * d - 208 * e + 128) >> 5) & 0x7e0; + uint32_t b = ((298 * c + 516 * d + 128) >> 11) & 0x1f; - WORD texel = (WORD)(r | g | b); + WORD texel = (WORD)(r | g | b); - return texel; - */ + return texel; + */ } //**************************************************************** // Size: 2, Format: 1 // -uint32_t Load16bYUV (wxUIntPtr dst, wxUIntPtr /*src*/, int /*wid_64*/, int /*height*/, int /*line*/, int /*real_width*/, int tile) +uint32_t Load16bYUV(wxUIntPtr dst, wxUIntPtr /*src*/, int /*wid_64*/, int /*height*/, int /*line*/, int /*real_width*/, int tile) { - uint32_t * mb = (uint32_t*)(gfx.RDRAM+rdp.addr[rdp.tiles[tile].t_mem]); //pointer to the macro block - uint16_t * tex = (uint16_t*)dst; - uint16_t i; - for (i = 0; i < 128; i++) - { - uint32_t t = mb[i]; //each uint32_t contains 2 pixels - uint8_t y1 = (uint8_t)t&0xFF; - uint8_t v = (uint8_t)(t>>8)&0xFF; - uint8_t y0 = (uint8_t)(t>>16)&0xFF; - uint8_t u = (uint8_t)(t>>24)&0xFF; - uint16_t c = yuv_to_rgb565(y0, u, v); - *(tex++) = c; - c = yuv_to_rgb565(y1, u, v); - *(tex++) = c; - } - return (1 << 16) | GR_TEXFMT_RGB_565; + uint32_t * mb = (uint32_t*)(gfx.RDRAM + rdp.addr[rdp.tiles[tile].t_mem]); //pointer to the macro block + uint16_t * tex = (uint16_t*)dst; + uint16_t i; + for (i = 0; i < 128; i++) + { + uint32_t t = mb[i]; //each uint32_t contains 2 pixels + uint8_t y1 = (uint8_t)t & 0xFF; + uint8_t v = (uint8_t)(t >> 8) & 0xFF; + uint8_t y0 = (uint8_t)(t >> 16) & 0xFF; + uint8_t u = (uint8_t)(t >> 24) & 0xFF; + uint16_t c = yuv_to_rgb565(y0, u, v); + *(tex++) = c; + c = yuv_to_rgb565(y1, u, v); + *(tex++) = c; + } + return (1 << 16) | GR_TEXFMT_RGB_565; } diff --git a/Source/Glide64/Util.h b/Source/Glide64/Util.h index 3cbbeb3af..22bbabd1e 100644 --- a/Source/Glide64/Util.h +++ b/Source/Glide64/Util.h @@ -44,20 +44,20 @@ #define NOT_TMU1 0x01 #define NOT_TMU2 0x02 -void util_init (); -void render_tri (uint16_t linew = 0); +void util_init(); +void render_tri(uint16_t linew = 0); -int cull_tri (VERTEX **v); -void draw_tri (VERTEX **v, uint16_t linew = 0); -void do_triangle_stuff (uint16_t linew = 0, int old_interpolate = TRUE); -void do_triangle_stuff_2 (uint16_t linew = 0); -void add_tri (VERTEX *v, int n, int type); -void apply_shade_mods (VERTEX *v); +int cull_tri(VERTEX **v); +void draw_tri(VERTEX **v, uint16_t linew = 0); +void do_triangle_stuff(uint16_t linew = 0, int old_interpolate = TRUE); +void do_triangle_stuff_2(uint16_t linew = 0); +void add_tri(VERTEX *v, int n, int type); +void apply_shade_mods(VERTEX *v); -void update (); -void update_scissor (); +void update(); +void update_scissor(); -void set_message_combiner (); +void set_message_combiner(); float ScaleZ(float z); @@ -68,12 +68,12 @@ float ScaleZ(float z); float p = (uc-ux)/(lx-ux); \ ut = p*(lt-ut)+ut; \ ux = uc; \ - } \ + } \ if (lx > lc) { \ float p = (lc-ux)/(lx-ux); \ lt = p*(lt-ut)+ut; \ lx = lc; \ - } + } #define CCLIP2(ux,lx,ut,lt,un,ln,uc,lc) \ if (ux > lx || lx < uc || ux > lc) { rdp.tri_n += 2; return; } \ @@ -82,50 +82,61 @@ float ScaleZ(float z); ut = p*(lt-ut)+ut; \ un = p*(ln-un)+un; \ ux = uc; \ - } \ + } \ if (lx > lc) { \ float p = (lc-ux)/(lx-ux); \ lt = p*(lt-ut)+ut; \ ln = p*(ln-un)+un; \ lx = lc; \ - } + } #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - #include - #define bswap32(x) _byteswap_ulong(x) +#include +#define bswap32(x) _byteswap_ulong(x) #else static inline uint32_t bswap32(uint32_t val) { - return (((val & 0xff000000) >> 24) | - ((val & 0x00ff0000) >> 8) | - ((val & 0x0000ff00) << 8) | - ((val & 0x000000ff) << 24)); + return (((val & 0xff000000) >> 24) | + ((val & 0x00ff0000) >> 8) | + ((val & 0x0000ff00) << 8) | + ((val & 0x000000ff) << 24)); } #endif #define ALOWORD(x) (*((uint16_t*)&(x))) // low word +static inline uint16_t __ROR__(uint16_t value, unsigned int count) +{ + const unsigned int nbits = sizeof(uint16_t) * 8; + count %= nbits; + + uint16_t low = (value << (nbits - count)) & 0xFFFF; + value >>= count; + value |= low; + return value; +} + template static inline T __ROR__(T value, unsigned int count) { - const unsigned int nbits = sizeof(T) * 8; - count %= nbits; + const unsigned int nbits = sizeof(T) * 8; + count %= nbits; - T low = value << (nbits - count); - value >>= count; - value |= low; - return value; + T low = value << (nbits - count); + value >>= count; + value |= low; + return value; } // rotate left template static T __ROL__(T value, unsigned int count) { - const unsigned int nbits = sizeof(T) * 8; - count %= nbits; + const unsigned int nbits = sizeof(T) * 8; + count %= nbits; - T high = value >> (nbits - count); - value <<= count; - value |= high; - return value; + T high = value >> (nbits - count); + value <<= count; + value |= high; + return value; } #endif // ifndef Util_H diff --git a/Source/Glide64/rdp.cpp b/Source/Glide64/rdp.cpp index 0ab26ed3b..a5b37637c 100644 --- a/Source/Glide64/rdp.cpp +++ b/Source/Glide64/rdp.cpp @@ -1989,8 +1989,8 @@ static void rdp_loadblock() // lr_s specifies number of 64-bit words to copy // 10.2 format - uint16_t ul_s = (uint16_t)(rdp.cmd0 >> 14) & 0x3FF; - uint16_t ul_t = (uint16_t)(rdp.cmd0 >> 2) & 0x3FF; + uint16_t ul_s = (uint16_t)((rdp.cmd0 >> 14) & 0x3FF); + uint16_t ul_t = (uint16_t)((rdp.cmd0 >> 2) & 0x3FF); rdp.tiles[tile].ul_s = ul_s; rdp.tiles[tile].ul_t = ul_t;