From 6ea406643b2e990e065fadeb13c108a9dc96015c Mon Sep 17 00:00:00 2001 From: zilmar Date: Sun, 1 May 2016 12:45:01 +1000 Subject: [PATCH] [Glide64] Deal with truncation smaller types --- Source/Glide64/TexLoad4b.h | 24 ++++++++++++------------ Source/Glide64/TexLoad8b.h | 8 ++++---- Source/Glide64/rdp.cpp | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Source/Glide64/TexLoad4b.h b/Source/Glide64/TexLoad4b.h index 23e915b9a..0aac3ea61 100644 --- a/Source/Glide64/TexLoad4b.h +++ b/Source/Glide64/TexLoad4b.h @@ -87,7 +87,7 @@ static inline void load4bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 11) & 0x1E)), 1); *v15 = v14; ++v15; - ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)v12 & 0x1E)), 1); + ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)(v12 & 0xFF) & 0x1E)), 1); v14 <<= 16; ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 3) & 0x1E)), 1); *v15 = v14; @@ -109,7 +109,7 @@ static inline void load4bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 11) & 0x1E)), 1); *v15 = v14; ++v15; - ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)v16 & 0x1E)), 1); + ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)(v16 & 0xFF) & 0x1E)), 1); v14 <<= 16; ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 3) & 0x1E)), 1); *v15 = v14; @@ -141,7 +141,7 @@ static inline void load4bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 11) & 0x1E)), 1); *v23 = v22; ++v23; - ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)v21 & 0x1E)), 1); + ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)(v21 & 0xFF) & 0x1E)), 1); v22 <<= 16; ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 3) & 0x1E)), 1); *v23 = v22; @@ -163,7 +163,7 @@ static inline void load4bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 11) & 0x1E)), 1); *v23 = v22; ++v23; - ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)v24 & 0x1E)), 1); + ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint8_t)(v24 & 0xFF) & 0x1E)), 1); v22 <<= 16; ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 3) & 0x1E)), 1); *v23 = v22; @@ -391,7 +391,7 @@ static inline void load4bIA(uint8_t *src, uint8_t *dst, int wid_64, int height, v11 >>= 3; *v7 = ((((v13 << 8) & 0xE000000) >> 3) & 0x1000000) | ((v13 << 8) & 0xE000000) | (8 * ((v13 << 12) & 0x10000000)) | (4 * ((v13 << 12) & 0x10000000)) | (2 * ((v13 << 12) & 0x10000000)) | ((v13 << 12) & 0x10000000) | (v11 & 0x10000) | v15; v16 = v7 + 1; - v17 = 16 * (uint16_t)v13 & 0x1000; + v17 = 16 * (uint16_t)(v13 & 0xFFFF) & 0x1000; v18 = (((v13 & 0xE00) >> 3) & 0x100) | (v13 & 0xE00) | (8 * v17) | (4 * v17) | (2 * v17) | (v17) | ((((v13 >> 12) & 0xE) >> 3)) | ((v13 >> 12) & 0xE) | (8 * ((v13 >> 8) & 0x10)) | (4 * ((v13 >> 8) & 0x10)) | (2 * ((v13 >> 8) & 0x10)) | ((v13 >> 8) & 0x10); v19 = v13 << 16; v20 = (8 * (v19 & 0x100000)) | (4 * (v19 & 0x100000)) | (2 * (v19 & 0x100000)) | (v19 & 0x100000) | v18; @@ -411,7 +411,7 @@ static inline void load4bIA(uint8_t *src, uint8_t *dst, int wid_64, int height, v23 >>= 3; *v16 = ((((v24 << 8) & 0xE000000) >> 3) & 0x1000000) | ((v24 << 8) & 0xE000000) | (8 * ((v24 << 12) & 0x10000000)) | (4 * ((v24 << 12) & 0x10000000)) | (2 * ((v24 << 12) & 0x10000000)) | ((v24 << 12) & 0x10000000) | (v23 & 0x10000) | (v26); ++v16; - v27 = 16 * (uint16_t)v24 & 0x1000; + v27 = 16 * (uint16_t)(v24 & 0xFFFF) & 0x1000; v28 = (((v24 & 0xE00) >> 3) & 0x100) | (v24 & 0xE00) | (8 * v27) | (4 * v27) | (2 * v27) | (v27) | ((((v24 >> 12) & 0xE) >> 3)) | ((v24 >> 12) & 0xE) | (8 * ((v24 >> 8) & 0x10)) | (4 * ((v24 >> 8) & 0x10)) | (2 * ((v24 >> 8) & 0x10)) | ((v24 >> 8) & 0x10); v29 = v24 << 16; v30 = (8 * (v29 & 0x100000)) | (4 * (v29 & 0x100000)) | (2 * (v29 & 0x100000)) | (v29 & 0x100000) | v28; @@ -439,7 +439,7 @@ static inline void load4bIA(uint8_t *src, uint8_t *dst, int wid_64, int height, v38 >>= 3; *v34 = ((((v37 << 8) & 0xE000000) >> 3) & 0x1000000) | ((v37 << 8) & 0xE000000) | (8 * ((v37 << 12) & 0x10000000)) | (4 * ((v37 << 12) & 0x10000000)) | (2 * ((v37 << 12) & 0x10000000)) | ((v37 << 12) & 0x10000000) | (v38 & 0x10000) | v39; v40 = v34 + 1; - v41 = 16 * (uint16_t)v37 & 0x1000; + v41 = 16 * (uint16_t)(v37 & 0xFFFF) & 0x1000; v42 = (((v37 & 0xE00) >> 3) & 0x100) | (v37 & 0xE00) | (8 * v41) | (4 * v41) | (2 * v41) | v41 | (((v37 >> 12) & 0xE) >> 3) | ((v37 >> 12) & 0xE) | (8 * ((v37 >> 8) & 0x10)) | (4 * ((v37 >> 8) & 0x10)) | (2 * ((v37 >> 8) & 0x10)) | ((v37 >> 8) & 0x10); v43 = v37 << 16; v44 = (8 * (v43 & 0x100000)) | (4 * (v43 & 0x100000)) | (2 * (v43 & 0x100000)) | (v43 & 0x100000) | v42; @@ -459,7 +459,7 @@ static inline void load4bIA(uint8_t *src, uint8_t *dst, int wid_64, int height, v47 >>= 3; *v40 = ((((v48 << 8) & 0xE000000) >> 3) & 0x1000000) | ((v48 << 8) & 0xE000000) | (8 * ((v48 << 12) & 0x10000000)) | (4 * ((v48 << 12) & 0x10000000)) | (2 * ((v48 << 12) & 0x10000000)) | ((v48 << 12) & 0x10000000) | (v47 & 0x10000) | v50; ++v40; - v51 = 16 * (uint16_t)v48 & 0x1000; + v51 = 16 * (uint16_t)(v48 & 0xFFFF) & 0x1000; v52 = (((v48 & 0xE00) >> 3) & 0x100) | (v48 & 0xE00) | (8 * v51) | (4 * v51) | (2 * v51) | v51 | (((v48 >> 12) & 0xE) >> 3) | ((v48 >> 12) & 0xE) | (8 * ((v48 >> 8) & 0x10)) | (4 * ((v48 >> 8) & 0x10)) | (2 * ((v48 >> 8) & 0x10)) | ((v48 >> 8) & 0x10); v53 = v48 << 16; v54 = (8 * (v53 & 0x100000)) | (4 * (v53 & 0x100000)) | (2 * (v53 & 0x100000)) | (v53 & 0x100000) | v52; @@ -527,7 +527,7 @@ static inline void load4bI(uint8_t *src, uint8_t *dst, int wid_64, int height, i *v7 = (16 * ((v13 << 8) & 0xF000000)) | ((v13 << 8) & 0xF000000) | (16 * (v11 & 0xF0000)) | (v11 & 0xF0000) | v14; v15 = v7 + 1; v16 = v13 << 12; - *v15 = (16 * ((v13 << 24) & 0xF000000)) | ((v13 << 24) & 0xF000000) | (16 * (v16 & 0xF0000)) | (v16 & 0xF0000) | (16 * (v13 & 0xF00)) | (v13 & 0xF00) | (16 * ((uint16_t)v13 >> 12)) | ((uint16_t)v13 >> 12); + *v15 = (16 * ((v13 << 24) & 0xF000000)) | ((v13 << 24) & 0xF000000) | (16 * (v16 & 0xF0000)) | (v16 & 0xF0000) | (16 * (v13 & 0xF00)) | (v13 & 0xF00) | (16 * ((uint16_t)(v13 & 0xFFFF) >> 12)) | ((uint16_t)(v13 & 0xFFFF) >> 12); ++v15; v17 = bswap32(*v12); v6 = v12 + 1; @@ -537,7 +537,7 @@ static inline void load4bI(uint8_t *src, uint8_t *dst, int wid_64, int height, i *v15 = (16 * ((v18 << 8) & 0xF000000)) | ((v18 << 8) & 0xF000000) | (16 * (v17 & 0xF0000)) | (v17 & 0xF0000) | v19; ++v15; v20 = v18 << 12; - *v15 = (16 * ((v18 << 24) & 0xF000000)) | ((v18 << 24) & 0xF000000) | (16 * (v20 & 0xF0000)) | (v20 & 0xF0000) | (16 * (v18 & 0xF00)) | (v18 & 0xF00) | (16 * ((uint16_t)v18 >> 12)) | ((uint16_t)v18 >> 12); + *v15 = (16 * ((v18 << 24) & 0xF000000)) | ((v18 << 24) & 0xF000000) | (16 * (v20 & 0xF0000)) | (v20 & 0xF0000) | (16 * (v18 & 0xF00)) | (v18 & 0xF00) | (16 * ((uint16_t)(v18 & 0xFFFF) >> 12)) | ((uint16_t)(v18 & 0xFFFF) >> 12); v7 = v15 + 1; v9 = v10 - 1; } while (v10 != 1); @@ -555,7 +555,7 @@ static inline void load4bI(uint8_t *src, uint8_t *dst, int wid_64, int height, i *v22 = (16 * ((v25 << 8) & 0xF000000)) | ((v25 << 8) & 0xF000000) | (16 * (v26 & 0xF0000)) | (v26 & 0xF0000) | (16 * ((v25 >> 16) & 0xF00)) | ((v25 >> 16) & 0xF00) | (16 * (v25 >> 28)) | (v25 >> 28); v27 = v22 + 1; v28 = v25 << 12; - *v27 = (16 * ((v25 << 24) & 0xF000000)) | ((v25 << 24) & 0xF000000) | (16 * (v28 & 0xF0000)) | (v28 & 0xF0000) | (16 * (v25 & 0xF00)) | (v25 & 0xF00) | (16 * ((uint16_t)v25 >> 12)) | ((uint16_t)v25 >> 12); + *v27 = (16 * ((v25 << 24) & 0xF000000)) | ((v25 << 24) & 0xF000000) | (16 * (v28 & 0xF0000)) | (v28 & 0xF0000) | (16 * (v25 & 0xF00)) | (v25 & 0xF00) | (16 * ((uint16_t)(v25 & 0xFFFF) >> 12)) | ((uint16_t)(v25 & 0xFFFF) >> 12); ++v27; v29 = bswap32(*v21); v21 += 2; @@ -565,7 +565,7 @@ static inline void load4bI(uint8_t *src, uint8_t *dst, int wid_64, int height, i *v27 = (16 * ((v30 << 8) & 0xF000000)) | ((v30 << 8) & 0xF000000) | (16 * (v29 & 0xF0000)) | (v29 & 0xF0000) | v31; ++v27; v32 = v30 << 12; - *v27 = (16 * ((v30 << 24) & 0xF000000)) | ((v30 << 24) & 0xF000000) | (16 * (v32 & 0xF0000)) | (v32 & 0xF0000) | (16 * (v30 & 0xF00)) | (v30 & 0xF00) | (16 * ((uint16_t)v30 >> 12)) | ((uint16_t)v30 >> 12); + *v27 = (16 * ((v30 << 24) & 0xF000000)) | ((v30 << 24) & 0xF000000) | (16 * (v32 & 0xF0000)) | (v32 & 0xF0000) | (16 * (v30 & 0xF00)) | (v30 & 0xF00) | (16 * ((uint16_t)(v30 & 0xFFFF) >> 12)) | ((uint16_t)(v30 & 0xFFFF) >> 12); v22 = v27 + 1; v23 = v24 - 1; } while (v24 != 1); diff --git a/Source/Glide64/TexLoad8b.h b/Source/Glide64/TexLoad8b.h index e5575b91e..b482142a4 100644 --- a/Source/Glide64/TexLoad8b.h +++ b/Source/Glide64/TexLoad8b.h @@ -77,7 +77,7 @@ static inline void load8bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 23) & 0x1FE)), 1); *v8 = v14; v15 = v8 + 1; - ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)v12 & 0x1FE)), 1); + ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)(v12 & 0xFFFF) & 0x1FE)), 1); v14 <<= 16; ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 7) & 0x1FE)), 1); *v15 = v14; @@ -89,7 +89,7 @@ static inline void load8bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 23) & 0x1FE)), 1); *v15 = v14; ++v15; - ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)v16 & 0x1FE)), 1); + ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)(v16 & 0xFFFF) & 0x1FE)), 1); v14 <<= 16; ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 7) & 0x1FE)), 1); *v15 = v14; @@ -111,7 +111,7 @@ static inline void load8bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 23) & 0x1FE)), 1); *v18 = v22; v23 = v18 + 1; - ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)v21 & 0x1FE)), 1); + ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)(v21 & 0xFFFF) & 0x1FE)), 1); v22 <<= 16; ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 7) & 0x1FE)), 1); *v23 = v22; @@ -123,7 +123,7 @@ static inline void load8bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 23) & 0x1FE)), 1); *v23 = v22; ++v23; - ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)v24 & 0x1FE)), 1); + ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + (2 * (uint16_t)(v24 & 0xFFFF) & 0x1FE)), 1); v22 <<= 16; ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 7) & 0x1FE)), 1); *v23 = v22; diff --git a/Source/Glide64/rdp.cpp b/Source/Glide64/rdp.cpp index d42c2e41a..fee1cf887 100644 --- a/Source/Glide64/rdp.cpp +++ b/Source/Glide64/rdp.cpp @@ -2020,7 +2020,7 @@ static inline void loadTile(uint32_t *src, uint32_t *dst, int width, int height, do { v16 = __ROL__(v16, 8); - *(uint8_t *)v7 = v16; + *(uint8_t *)v7 = (v16 & 0xFF); v7 = (uint32_t *)((char *)v7 + 1); --v15; } while (v15); @@ -2048,7 +2048,7 @@ static inline void loadTile(uint32_t *src, uint32_t *dst, int width, int height, do { v20 = __ROL__(v20, 8); - *(uint8_t *)v7 = v20; + *(uint8_t *)v7 = (v20 & 0xFF); v7 = (uint32_t *)((char *)v7 + 1); --v19; } while (v19);