diff --git a/src/poly/math.h b/src/poly/math.h index 369bc9b05..601ab6420 100644 --- a/src/poly/math.h +++ b/src/poly/math.h @@ -145,6 +145,15 @@ inline bool bit_scan_forward(int64_t v, uint32_t* out_first_set_index) { return bit_scan_forward(static_cast(v), out_first_set_index); } +template +inline T log2_floor(T v) { + return sizeof(T) * 8 - 1 - lzcnt(v); +} +template +inline T log2_ceil(T v) { + return sizeof(T) * 8 - lzcnt(v - 1); +} + template inline T rotate_left(T v, uint8_t sh) { return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh)); diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index 014b8d825..8f84ad76e 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -547,7 +547,6 @@ bool TextureCache::UploadTexture2D(GLuint texture, glTextureStorage2D(texture, 1, config.internal_format, texture_info.size_2d.output_width, texture_info.size_2d.output_height); - assert_true(unpack_length % 4 == 0); auto allocation = scratch_buffer_->Acquire(unpack_length); @@ -573,26 +572,34 @@ bool TextureCache::UploadTexture2D(GLuint texture, } else { // Untile image. // We could do this in a shader to speed things up, as this is pretty slow. + // TODO(benvanik): optimize this inner loop (or work by tiles). const uint8_t* src = host_address; uint8_t* dest = reinterpret_cast(allocation.host_ptr); uint32_t bytes_per_block = texture_info.format_info->block_width * texture_info.format_info->block_height * texture_info.format_info->bits_per_pixel / 8; + + // Tiled textures can be packed; get the offset into the packed texture. + uint32_t offset_x; + uint32_t offset_y; + TextureInfo::GetPackedTileOffset(texture_info, &offset_x, &offset_y); + auto bpp = (bytes_per_block >> 2) + ((bytes_per_block >> 1) >> (bytes_per_block >> 2)); for (uint32_t y = 0, output_base_offset = 0; y < texture_info.size_2d.block_height; y++, output_base_offset += texture_info.size_2d.output_pitch) { auto input_base_offset = TextureInfo::TiledOffset2DOuter( - y, (texture_info.size_2d.input_width / - texture_info.format_info->block_width), + offset_y + y, (texture_info.size_2d.input_width / + texture_info.format_info->block_width), bpp); for (uint32_t x = 0, output_offset = output_base_offset; x < texture_info.size_2d.block_width; x++, output_offset += bytes_per_block) { auto input_offset = - TextureInfo::TiledOffset2DInner(x, y, bpp, input_base_offset) >> + TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp, + input_base_offset) >> bpp; TextureSwap(texture_info.endianness, dest + output_offset, src + input_offset * bytes_per_block, bytes_per_block); diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc index 82fa9ddfa..96b14ea7a 100644 --- a/src/xenia/gpu/texture_info.cc +++ b/src/xenia/gpu/texture_info.cc @@ -19,77 +19,86 @@ namespace gpu { using namespace xe::gpu::ucode; using namespace xe::gpu::xenos; -static const FormatInfo format_infos[64] = { - {TextureFormat::k_1_REVERSE, FormatType::kUncompressed, 1, 1, 1}, - {TextureFormat::k_1, FormatType::kUncompressed, 1, 1, 1}, - {TextureFormat::k_8, FormatType::kUncompressed, 1, 1, 8}, - {TextureFormat::k_1_5_5_5, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_5_6_5, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_6_5_5, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_8_8_8_8, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_2_10_10_10, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_8_A, FormatType::kUncompressed, 1, 1, 8}, - {TextureFormat::k_8_B, FormatType::kUncompressed, 1, 1, 8}, - {TextureFormat::k_8_8, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_Cr_Y1_Cb_Y0, FormatType::kCompressed, 2, 1, 16}, - {TextureFormat::k_Y1_Cr_Y0_Cb, FormatType::kCompressed, 2, 1, 16}, - {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, - {TextureFormat::k_8_8_8_8_A, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_4_4_4_4, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_10_11_11, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_11_11_10, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_DXT1, FormatType::kCompressed, 4, 4, 4}, - {TextureFormat::k_DXT2_3, FormatType::kCompressed, 4, 4, 8}, - {TextureFormat::k_DXT4_5, FormatType::kCompressed, 4, 4, 8}, - {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, - {TextureFormat::k_24_8, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_24_8_FLOAT, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_16, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_16_16, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_16_16_16_16, FormatType::kUncompressed, 1, 1, 64}, - {TextureFormat::k_16_EXPAND, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_16_16_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 64}, - {TextureFormat::k_16_FLOAT, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_16_16_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 64}, - {TextureFormat::k_32, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_32_32, FormatType::kUncompressed, 1, 1, 64}, - {TextureFormat::k_32_32_32_32, FormatType::kUncompressed, 1, 1, 128}, - {TextureFormat::k_32_FLOAT, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 64}, - {TextureFormat::k_32_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 128}, - {TextureFormat::k_32_AS_8, FormatType::kCompressed, 4, 1, 8}, - {TextureFormat::k_32_AS_8_8, FormatType::kCompressed, 2, 1, 16}, - {TextureFormat::k_16_MPEG, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_16_16_MPEG, FormatType::kUncompressed, 1, 1, 32}, - {TextureFormat::k_8_INTERLACED, FormatType::kUncompressed, 1, 1, 8}, - {TextureFormat::k_32_AS_8_INTERLACED, FormatType::kCompressed, 4, 1, 8}, - {TextureFormat::k_32_AS_8_8_INTERLACED, FormatType::kCompressed, 1, 1, 16}, - {TextureFormat::k_16_INTERLACED, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1, 16}, - {TextureFormat::k_16_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1, - 32}, - {TextureFormat::k_DXN, FormatType::kCompressed, 4, 4, 8}, - {TextureFormat::k_8_8_8_8_AS_16_16_16_16, FormatType::kUncompressed, 1, 1, - 32}, - {TextureFormat::k_DXT1_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 4}, - {TextureFormat::k_DXT2_3_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 8}, - {TextureFormat::k_DXT4_5_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 8}, - {TextureFormat::k_2_10_10_10_AS_16_16_16_16, FormatType::kUncompressed, 1, - 1, 32}, - {TextureFormat::k_10_11_11_AS_16_16_16_16, FormatType::kUncompressed, 1, 1, - 32}, - {TextureFormat::k_11_11_10_AS_16_16_16_16, FormatType::kUncompressed, 1, 1, - 32}, - {TextureFormat::k_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 96}, - {TextureFormat::k_DXT3A, FormatType::kCompressed, 4, 4, 4}, - {TextureFormat::k_DXT5A, FormatType::kCompressed, 4, 4, 4}, - {TextureFormat::k_CTX1, FormatType::kCompressed, 4, 4, 4}, - {TextureFormat::k_DXT3A_AS_1_1_1_1, FormatType::kCompressed, 4, 4, 4}, - {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, - {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, -}; +const FormatInfo* FormatInfo::Get(uint32_t gpu_format) { + static const FormatInfo format_infos[64] = { + {TextureFormat::k_1_REVERSE, FormatType::kUncompressed, 1, 1, 1}, + {TextureFormat::k_1, FormatType::kUncompressed, 1, 1, 1}, + {TextureFormat::k_8, FormatType::kUncompressed, 1, 1, 8}, + {TextureFormat::k_1_5_5_5, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_5_6_5, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_6_5_5, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_8_8_8_8, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_2_10_10_10, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_8_A, FormatType::kUncompressed, 1, 1, 8}, + {TextureFormat::k_8_B, FormatType::kUncompressed, 1, 1, 8}, + {TextureFormat::k_8_8, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_Cr_Y1_Cb_Y0, FormatType::kCompressed, 2, 1, 16}, + {TextureFormat::k_Y1_Cr_Y0_Cb, FormatType::kCompressed, 2, 1, 16}, + {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, + {TextureFormat::k_8_8_8_8_A, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_4_4_4_4, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_10_11_11, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_11_11_10, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_DXT1, FormatType::kCompressed, 4, 4, 4}, + {TextureFormat::k_DXT2_3, FormatType::kCompressed, 4, 4, 8}, + {TextureFormat::k_DXT4_5, FormatType::kCompressed, 4, 4, 8}, + {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, + {TextureFormat::k_24_8, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_24_8_FLOAT, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_16, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_16_16, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_16_16_16_16, FormatType::kUncompressed, 1, 1, 64}, + {TextureFormat::k_16_EXPAND, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_16_16_16_16_EXPAND, FormatType::kUncompressed, 1, 1, + 64}, + {TextureFormat::k_16_FLOAT, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_16_16_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 64}, + {TextureFormat::k_32, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_32_32, FormatType::kUncompressed, 1, 1, 64}, + {TextureFormat::k_32_32_32_32, FormatType::kUncompressed, 1, 1, 128}, + {TextureFormat::k_32_FLOAT, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 64}, + {TextureFormat::k_32_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, + 128}, + {TextureFormat::k_32_AS_8, FormatType::kCompressed, 4, 1, 8}, + {TextureFormat::k_32_AS_8_8, FormatType::kCompressed, 2, 1, 16}, + {TextureFormat::k_16_MPEG, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_16_16_MPEG, FormatType::kUncompressed, 1, 1, 32}, + {TextureFormat::k_8_INTERLACED, FormatType::kUncompressed, 1, 1, 8}, + {TextureFormat::k_32_AS_8_INTERLACED, FormatType::kCompressed, 4, 1, 8}, + {TextureFormat::k_32_AS_8_8_INTERLACED, FormatType::kCompressed, 1, 1, + 16}, + {TextureFormat::k_16_INTERLACED, FormatType::kUncompressed, 1, 1, 16}, + {TextureFormat::k_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1, + 16}, + {TextureFormat::k_16_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1, + 32}, + {TextureFormat::k_DXN, FormatType::kCompressed, 4, 4, 8}, + {TextureFormat::k_8_8_8_8_AS_16_16_16_16, FormatType::kUncompressed, 1, 1, + 32}, + {TextureFormat::k_DXT1_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 4}, + {TextureFormat::k_DXT2_3_AS_16_16_16_16, FormatType::kCompressed, 4, 4, + 8}, + {TextureFormat::k_DXT4_5_AS_16_16_16_16, FormatType::kCompressed, 4, 4, + 8}, + {TextureFormat::k_2_10_10_10_AS_16_16_16_16, FormatType::kUncompressed, 1, + 1, 32}, + {TextureFormat::k_10_11_11_AS_16_16_16_16, FormatType::kUncompressed, 1, + 1, 32}, + {TextureFormat::k_11_11_10_AS_16_16_16_16, FormatType::kUncompressed, 1, + 1, 32}, + {TextureFormat::k_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 96}, + {TextureFormat::k_DXT3A, FormatType::kCompressed, 4, 4, 4}, + {TextureFormat::k_DXT5A, FormatType::kCompressed, 4, 4, 4}, + {TextureFormat::k_CTX1, FormatType::kCompressed, 4, 4, 4}, + {TextureFormat::k_DXT3A_AS_1_1_1_1, FormatType::kCompressed, 4, 4, 4}, + {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, + {TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0}, + }; + return &format_infos[gpu_format]; +} bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, TextureInfo* out_info) { @@ -118,7 +127,7 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, info.depth = fetch.size_3d.depth; break; } - info.format_info = &format_infos[fetch.format]; + info.format_info = FormatInfo::Get(fetch.format); info.endianness = static_cast(fetch.endianness); info.is_tiled = fetch.tiled; info.input_length = 0; // Populated below. @@ -199,6 +208,62 @@ void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) { output_length = size_2d.output_pitch * block_height; } +void TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info, + uint32_t* out_offset_x, + uint32_t* out_offset_y) { + // Tile size is 32x32, and once textures go <=16 they are packed into a + // single tile together. The math here is insane. Most sourced + // from graph paper and looking at dds dumps. + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // 0 +.4x4.+ +.....8x8.....+ +............16x16............+ + // 1 +.4x4.+ +.....8x8.....+ +............16x16............+ + // 2 +.4x4.+ +.....8x8.....+ +............16x16............+ + // 3 +.4x4.+ +.....8x8.....+ +............16x16............+ + // 4 x +.....8x8.....+ +............16x16............+ + // 5 +.....8x8.....+ +............16x16............+ + // 6 +.....8x8.....+ +............16x16............+ + // 7 +.....8x8.....+ +............16x16............+ + // 8 2x2 +............16x16............+ + // 9 2x2 +............16x16............+ + // 0 +............16x16............+ + // ... ..... + // This only works for square textures, or textures that are some non-pot + // <= square. As soon as the aspect ratio goes weird, the textures start to + // stretch across tiles. + // if (tile_aligned(w) > tile_aligned(h)) { + // // wider than tall, so packed horizontally + // } else if (tile_aligned(w) < tile_aligned(h)) { + // // taller than wide, so packed vertically + // } else { + // square + // } + // It's important to use logical sizes here, as the input sizes will be + // for the entire packed tile set, not the actual texture. + // The minimum dimension is what matters most: if either width or height + // is <= 16 this mode kicks in. + + if (std::min(texture_info.size_2d.logical_width, + texture_info.size_2d.logical_height) > 16) { + // Too big, not packed. + *out_offset_x = 0; + *out_offset_y = 0; + return; + } + + if (poly::log2_ceil(texture_info.size_2d.logical_width) > + poly::log2_ceil(texture_info.size_2d.logical_height)) { + // Wider than tall. Laid out vertically. + *out_offset_x = 0; + *out_offset_y = 16; + } else { + // Taller than wide. Laid out horizontally. + *out_offset_x = 16; + *out_offset_y = 0; + } + *out_offset_x /= texture_info.format_info->block_width; + *out_offset_y /= texture_info.format_info->block_height; +} + // https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104 uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width, uint32_t log_bpp) { diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h index 5240c4026..43f654b11 100644 --- a/src/xenia/gpu/texture_info.h +++ b/src/xenia/gpu/texture_info.h @@ -95,6 +95,8 @@ struct FormatInfo { uint32_t block_width; uint32_t block_height; uint32_t bits_per_pixel; + + static const FormatInfo* Get(uint32_t gpu_format); }; struct TextureInfo { @@ -139,6 +141,9 @@ struct TextureInfo { static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, TextureInfo* out_info); + static void GetPackedTileOffset(const TextureInfo& texture_info, + uint32_t* out_offset_x, + uint32_t* out_offset_y); static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, uint32_t log_bpp); static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index c7f7c5e79..0c516a619 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -318,9 +318,12 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { uint32_t mip_filter : 2; uint32_t unk3_2 : 6; uint32_t border : 1; - uint32_t unk4; // dword_4 - uint32_t unk5 : 9; // dword_5 - uint32_t dimension : 2; + uint32_t unk4_0 : 2; // dword_4 + uint32_t mip_min_level : 4; + uint32_t mip_max_level : 4; + uint32_t unk4_1 : 22; + uint32_t unk5 : 9; // dword_5 + uint32_t dimension : 2; uint32_t unk5b : 21; }); XEPACKEDSTRUCTANONYMOUS({