From 1675a82efdb0d155cf2a22a04887d45b44d9aa5b Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 14 Feb 2016 18:55:26 +0100 Subject: [PATCH] rsx/common/d3d12/gl: Use gsl::span in TextureUtils.cpp * get_placed_texture_storage_size returns more accurate result (fix crash in Outrun) * Factors lot of code and use integer type more carrefully * Treat warning as error in TextureUtils.cpp --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 218 +++++++++++--------------- rpcs3/Emu/RSX/Common/TextureUtils.h | 4 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 10 +- rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 2 +- rpcs3/emucore.vcxproj | 8 +- 5 files changed, 106 insertions(+), 136 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 74385a34d7..1e85c90206 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -8,104 +8,51 @@ #define MAX2(a, b) ((a) > (b)) ? (a) : (b) namespace { -/** -* Write data, assume src pixels are packed but not mipmaplevel -*/ -struct texel_rgba + // FIXME: GSL as_span break build if template parameter is non const with current revision. + // Replace with true as_span when fixed. + template + gsl::span as_span_workaround(gsl::span unformated_span) + { + return{ (T*)unformated_span.data(), gsl::narrow(unformated_span.size_bytes() / sizeof(T)) }; + } + + // TODO: Make this function part of GSL + // Note: Doesn't handle overlapping range detection. + template + constexpr void copy(gsl::span dst, gsl::span src) + { + static_assert(std::is_convertible::value, "Cannot convert source and destination span type."); + Expects(dst.size() == src.size()); + std::copy(src.begin(), src.end(), dst.begin()); + } + +struct copy_unmodified_block { - template - static void copy_mipmap_level(void *dst, void *src, u16 row_count, u16 width_in_block, u16 depth, size_t dst_pitch_in_block, size_t src_pitch_in_block) + template + static void copy_mipmap_level(gsl::span dst, const U *src_ptr, u16 row_count, u16 width_in_block, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { - for (unsigned row = 0; row < row_count * depth; row++) - memcpy((char*)dst + row * dst_pitch_in_block * block_size, (char*)src + row * src_pitch_in_block * block_size, width_in_block * block_size); + size_t row_element_count = dst_pitch_in_block; + static_assert(sizeof(T) == sizeof(U), "Type size doesn't match."); + gsl::span src{ src_ptr, row_count * src_pitch_in_block * depth }; + for (int row = 0; row < row_count * depth; ++row) + copy(dst.subspan(row * dst_pitch_in_block, width_in_block), src.subspan(row * src_pitch_in_block, width_in_block)); } }; - -/** -* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel -*/ -struct texel_16b_swizzled +struct copy_unmodified_block_swizzled { - template - static void copy_mipmap_level(void *dst, void *src, u16 row_count, u16 width_in_block, u16 depth, size_t dst_pitch_in_block, size_t src_pitch_in_block) + template + static void copy_mipmap_level(gsl::span dst, const U *src_ptr, u16 row_count, u16 width_in_block, u16 depth, u32 dst_pitch_in_block, u32) { - u16 *castedSrc = static_cast(src), *castedDst = static_cast(dst); - - std::unique_ptr temp_swizzled(new u16[row_count * width_in_block]); - rsx::convert_linear_swizzle(castedSrc, temp_swizzled.get(), src_pitch_in_block, row_count, true); - for (unsigned row = 0; row < row_count * depth; row++) - for (int j = 0; j < width_in_block; j++) - { - u16 tmp = temp_swizzled[row * src_pitch_in_block + j]; - castedDst[row * dst_pitch_in_block + j] = (tmp >> 8) | (tmp << 8); - } - } -}; - -/** -* Write data, assume src pixels are swizzled and but not mipmaplevel -*/ -struct texel_rgba_swizzled -{ - template - static void copy_mipmap_level(void *dst, void *src, u16 row_count, u16 width_in_block, u16 depth, size_t dst_pitch_in_block, size_t src_pitch_in_block) - { - u32 *castedSrc, *castedDst; - castedSrc = (u32*)src; - castedDst = (u32*)dst ; - std::unique_ptr temp_swizzled(new u32[src_pitch_in_block * row_count]); - rsx::convert_linear_swizzle(castedSrc, temp_swizzled.get(), src_pitch_in_block, row_count, true); - for (unsigned row = 0; row < row_count * depth; row++) - memcpy((char*)dst + row * dst_pitch_in_block * block_size, (char*)temp_swizzled.get() + row * src_pitch_in_block * block_size, width_in_block * block_size); - } -}; - -/** - * Write data, assume compressed (DXTCn) format - * Data are tightly packed - */ -struct texel_bc_format { - template - static void copy_mipmap_level(void *dst, void *src, u16 row_count, u16 width_in_block, u16 depth, size_t dst_pitch_in_block, size_t src_pitch_in_block) - { - for (unsigned row = 0; row < row_count * depth; row++) - memcpy((char*)dst + row * dst_pitch_in_block * block_size, (char*)src + row * src_pitch_in_block * block_size, width_in_block * block_size); - } -}; - -/** -* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel -*/ -struct texel_16b_format { - template - static void copy_mipmap_level(void *dst, void *src, u16 row_count, u16 width_in_block, u16 depth, size_t dst_pitch_in_block, size_t src_pitch_in_block) - { - unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src; - - for (unsigned row = 0; row < row_count * depth; row++) - for (int j = 0; j < width_in_block; j++) - { - u16 tmp = castedSrc[row * src_pitch_in_block + j]; - castedDst[row * dst_pitch_in_block + j] = (tmp >> 8) | (tmp << 8); - } - } -}; - -/** -* Write 16 bytes X 4 pixel textures, assume src pixels are packed but not mipmaplevel -*/ -struct texel_16bX4_format { - template - static void copy_mipmap_level(void *dst, void *src, u16 row_count, u16 width_in_block, u16 depth, size_t dst_pitch_in_block, size_t src_pitch_in_block) - { - unsigned short *casted_dst = (unsigned short *)dst, *casted_src = (unsigned short *)src; - for (unsigned row = 0; row < row_count * depth; row++) - for (int j = 0; j < width_in_block * 4; j++) - { - u16 tmp = casted_src[row * src_pitch_in_block * 4 + j]; - casted_dst[row * dst_pitch_in_block * 4 + j] = (tmp >> 8) | (tmp << 8); - } + std::unique_ptr temp_swizzled(new U[width_in_block * row_count]); + gsl::span src{ src_ptr, gsl::narrow(width_in_block * row_count * depth) }; + for (int d = 0; d < depth; ++d) + { + rsx::convert_linear_swizzle((void*)src.subspan(d * width_in_block * row_count).data(), temp_swizzled.get(), width_in_block, row_count, true); + gsl::span swizzled_src{ temp_swizzled.get(), gsl::narrow(width_in_block * row_count) }; + for (int row = 0; row < row_count; ++row) + copy(dst.subspan((row + d * row_count) * dst_pitch_in_block, width_in_block), swizzled_src.subspan(row * width_in_block, width_in_block)); + } } }; @@ -122,39 +69,46 @@ struct texel_16bX4_format { * The alignment is 256 for mipmap levels and 512 for depth (TODO: make this customisable for Vulkan ?) * The template takes a struct with a "copy_mipmap_level" static function that copy the given mipmap level and returns the offset to add to the src buffer for next * mipmap level (to allow same code for packed/non packed texels) + * Sometimes texture provides a pitch even if texture is swizzled (and then packed) and in such case it's ignored. It's passed via suggested_pitch and is used only if padded_row is false. */ -template -std::vector copy_texture_data(void *dst, const void *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count) +template +std::vector copy_texture_data(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes) { + /** + * Note about size type: RSX texture width is stored in a 16 bits int and pitch is stored in a 20 bits int. + */ + + // <= 128 so fits in u8 + u8 block_size_in_bytes = sizeof(DST_TYPE); + std::vector Result; size_t offsetInDst = 0, offsetInSrc = 0; - size_t texture_height_in_block = (height_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; - size_t texture_width_in_block = (width_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; + // Always lower than width/height so fits in u16 + u16 texture_height_in_block = (height_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; + u16 texture_width_in_block = (width_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; for (unsigned layer = 0; layer < layer_count; layer++) { - size_t miplevel_height_in_block = texture_height_in_block, miplevel_width_in_block = texture_width_in_block; + u16 miplevel_height_in_block = texture_height_in_block, miplevel_width_in_block = texture_width_in_block; for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++) { - size_t dst_pitch = align(miplevel_width_in_block * block_size_in_bytes, 256) / block_size_in_bytes; + // since mip_level is up to 16 bits needs at least 17 bits. + u32 dst_pitch = align(miplevel_width_in_block * block_size_in_bytes, 256) / block_size_in_bytes; MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; - currentMipmapLevelInfo.height = miplevel_height_in_block * block_edge_in_texel; - currentMipmapLevelInfo.width = miplevel_width_in_block * block_edge_in_texel; + // Since <= width/height, fits on 16 bits + currentMipmapLevelInfo.height = static_cast(miplevel_height_in_block * block_edge_in_texel); + currentMipmapLevelInfo.width = static_cast(miplevel_width_in_block * block_edge_in_texel); currentMipmapLevelInfo.depth = depth; - currentMipmapLevelInfo.rowPitch = dst_pitch * block_size_in_bytes; + currentMipmapLevelInfo.rowPitch = static_cast(dst_pitch * block_size_in_bytes); Result.push_back(currentMipmapLevelInfo); - if (!padded_row) - { - T::template copy_mipmap_level((char*)dst + offsetInDst, (char*)src + offsetInSrc, miplevel_height_in_block, miplevel_width_in_block, depth, dst_pitch, miplevel_width_in_block); - offsetInSrc += miplevel_height_in_block * miplevel_width_in_block * block_size_in_bytes * depth; - } - else - { - T::template copy_mipmap_level((char*)dst + offsetInDst, (char*)src + offsetInSrc, miplevel_height_in_block, miplevel_width_in_block, depth, dst_pitch, texture_width_in_block); - offsetInSrc += miplevel_height_in_block * texture_width_in_block * block_size_in_bytes * depth; - } + // TODO: uses src_pitch from texture + // src_pitch in texture can uses 20 bits so fits on 32 bits int. + u32 src_pitch_in_block = padded_row ? suggested_pitch_in_bytes / block_size_in_bytes : miplevel_width_in_block; + const SRC_TYPE *src_with_offset = reinterpret_cast(reinterpret_cast(src) + offsetInSrc); + T::copy_mipmap_level(dst.subspan(offsetInDst / block_size_in_bytes, dst_pitch * depth * miplevel_height_in_block), src_with_offset, miplevel_height_in_block, miplevel_width_in_block, depth, dst_pitch, src_pitch_in_block); + offsetInSrc += miplevel_height_in_block * src_pitch_in_block * block_size_in_bytes * depth; offsetInDst += align(miplevel_height_in_block * dst_pitch * block_size_in_bytes, 512); miplevel_height_in_block = MAX2(miplevel_height_in_block / 2, 1); miplevel_width_in_block = MAX2(miplevel_width_in_block / 2, 1); @@ -250,7 +204,7 @@ size_t get_texture_block_edge(u32 format) size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) { - size_t w = texture.width(), h = texture.height(); + size_t w = texture.width(), h = texture.height(), d = MAX2(texture.depth(), 1); int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); size_t blockEdge = get_texture_block_edge(format); @@ -259,12 +213,20 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi size_t heightInBlocks = (h + blockEdge - 1) / blockEdge; size_t widthInBlocks = (w + blockEdge - 1) / blockEdge; - size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement); - return rowPitch * heightInBlocks * (texture.cubemap() ? 6 : 1) * 2; // * 2 for mipmap levels + size_t result = 0; + for (unsigned mipmap = 0; mipmap < texture.mipmap(); ++mipmap) + { + size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement); + result += align(rowPitch * heightInBlocks * d, 512); + heightInBlocks = MAX2(heightInBlocks / 2, 1); + widthInBlocks = MAX2(widthInBlocks / 2, 1); + } + + return result * (texture.cubemap() ? 6 : 1); } -std::vector upload_placed_texture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData) +std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignement) { u16 w = texture.width(), h = texture.height(); u16 depth; @@ -300,38 +262,38 @@ std::vector upload_placed_texture(const rsx::texture &texture, { case CELL_GCM_TEXTURE_A8R8G8B8: if (is_swizzled) - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); else - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: if (is_swizzled) - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); else - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), 4 * w, h, depth, layer, texture.mipmap(), texture.pitch()); case CELL_GCM_TEXTURE_COMPRESSED_DXT1: if (is_swizzled) - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); else - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); case CELL_GCM_TEXTURE_COMPRESSED_DXT23: if (is_swizzled) - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); else - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); case CELL_GCM_TEXTURE_COMPRESSED_DXT45: if (is_swizzled) - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); else - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); case CELL_GCM_TEXTURE_B8: - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); - default: - return copy_texture_data(textureData, pixels, w, h, depth, layer, texture.mipmap()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); } + throw EXCEPTION("Wrong format %d", format); } size_t get_texture_size(const rsx::texture &texture) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 4ba3a86549..34bff63c9c 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -8,7 +8,7 @@ struct MipmapLevelInfo u16 width; u16 height; u16 depth; - u16 rowPitch; + u32 rowPitch; }; /** @@ -22,7 +22,7 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi * Data are not packed, they are stored per rows using rowPitchAlignement. * Similarly, offset for every mipmaplevel is aligned to rowPitchAlignement boundary. */ -std::vector upload_placed_texture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData); +std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignement); /** * Get number of bytes occupied by texture in RSX mem diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index d5cf7caa24..cbfa0adbb4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -79,8 +79,9 @@ ComPtr upload_single_texture( size_t buffer_size = get_placed_texture_storage_size(texture, 256); size_t heap_offset = texture_buffer_heap.alloc(buffer_size); - void *mapped_buffer = texture_buffer_heap.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - std::vector mipInfos = upload_placed_texture(texture, 256, mapped_buffer); + void *mapped_buffer_ptr = texture_buffer_heap.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + gsl::span mapped_buffer{ (gsl::byte*)mapped_buffer_ptr, gsl::narrow(buffer_size) }; + std::vector mipInfos = upload_placed_texture(mapped_buffer, texture, 256); texture_buffer_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); ComPtr result; @@ -124,8 +125,9 @@ void update_existing_texture( size_t buffer_size = get_placed_texture_storage_size(texture, 256); size_t heap_offset = texture_buffer_heap.alloc(buffer_size); - void *mapped_buffer = texture_buffer_heap.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - std::vector mipInfos = upload_placed_texture(texture, 256, mapped_buffer); + void *mapped_buffer_ptr = texture_buffer_heap.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + gsl::span mapped_buffer{ (gsl::byte*)mapped_buffer_ptr, gsl::narrow(buffer_size) }; + std::vector mipInfos = upload_placed_texture(mapped_buffer, texture, 256); texture_buffer_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST)); diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp index 7b248c3a4c..61d1dca3d9 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp +++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp @@ -185,7 +185,7 @@ namespace rsx if (is_swizzled || mandates_expansion(format)) { aligned_pitch = align(aligned_pitch, 256); - upload_placed_texture(tex, 256, texture_data); + upload_placed_texture({ reinterpret_cast(texture_data), gsl::narrow(texture_data_sz) }, tex, 256); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); } else diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index fae5b2a950..887cc5a5e4 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -88,7 +88,13 @@ - + + true + true + true + true + true +