From 9826afa063ac4d8ec030c54c643337d6e7225790 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 7 Feb 2020 16:55:32 +0100 Subject: [PATCH] upload all texture mipmap levels to gpu implemented for gl/gl4/vulkan/oit don't auto-generate mipmaps in vulkan simpler/smaller detwiddle table use std::lock_guard with cMutex --- core/hw/mem/vmem32.cpp | 21 +- core/rend/TexCache.cpp | 313 ++++++++++++++------------ core/rend/TexCache.h | 113 ++++++---- core/rend/gl4/gles.cpp | 9 +- core/rend/gles/gles.cpp | 1 + core/rend/gles/gles.h | 3 +- core/rend/gles/gltex.cpp | 65 +++++- core/rend/vulkan/oit/oit_renderer.cpp | 6 +- core/rend/vulkan/texture.cpp | 104 +++------ core/rend/vulkan/texture.h | 5 +- core/rend/vulkan/vmu.cpp | 2 +- core/rend/vulkan/vulkan_renderer.cpp | 6 +- core/stdclass.h | 3 + core/wsi/gl_context.cpp | 4 + core/wsi/gl_context.h | 2 + 15 files changed, 362 insertions(+), 295 deletions(-) diff --git a/core/hw/mem/vmem32.cpp b/core/hw/mem/vmem32.cpp index 7a82ececd..3d0e211f9 100644 --- a/core/hw/mem/vmem32.cpp +++ b/core/hw/mem/vmem32.cpp @@ -19,6 +19,7 @@ along with reicast. If not, see . */ #include +#include #include "build.h" #include "vmem32.h" #include "_vmem.h" @@ -267,20 +268,20 @@ static u32 vmem32_map_mmu(u32 address, bool write) u32 end = start + page_size; const vector& blocks = vram_blocks[start / VRAM_PROT_SEGMENT]; - vramlist_lock.Lock(); - for (int i = blocks.size() - 1; i >= 0; i--) { - if (blocks[i].start < end && blocks[i].end >= start) + std::lock_guard lock(vramlist_lock); + for (int i = blocks.size() - 1; i >= 0; i--) { - u32 prot_start = max(start, blocks[i].start); - u32 prot_size = min(end, blocks[i].end + 1) - prot_start; - prot_size += prot_start % PAGE_SIZE; - prot_start &= ~PAGE_MASK; - vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size); + if (blocks[i].start < end && blocks[i].end >= start) + { + u32 prot_start = max(start, blocks[i].start); + u32 prot_size = min(end, blocks[i].end + 1) - prot_start; + prot_size += prot_start % PAGE_SIZE; + prot_start &= ~PAGE_MASK; + vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size); + } } } - vramlist_lock.Unlock(); - } else if (offset >= MAP_RAM_START_OFFSET && offset < MAP_RAM_START_OFFSET + RAM_SIZE) { diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 80d3fca62..57fa01bcc 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -1,4 +1,5 @@ #include +#include #ifndef TARGET_NO_OPENMP #include #endif @@ -22,60 +23,23 @@ u32 palette32_ram[1024]; u32 pal_hash_256[4]; u32 pal_hash_16[64]; -u32 detwiddle[2][8][1024]; -//input : address in the yyyyyxxxxx format -//output : address in the xyxyxyxy format -//U : x resolution , V : y resolution -//twiddle works on 64b words +u32 detwiddle[1024]; - -static u32 twiddle_slow(u32 x,u32 y,u32 x_sz,u32 y_sz) +void BuildTwiddleTable() { - u32 rv=0;//low 2 bits are directly passed -> needs some misc stuff to work.However - //Pvr internally maps the 64b banks "as if" they were twiddled :p - - u32 sh=0; - x_sz>>=1; - y_sz>>=1; - while(x_sz!=0 || y_sz!=0) - { - if (y_sz) - { - u32 temp=y&1; - rv|=temp<>=1; - y>>=1; - sh++; - } - if (x_sz) - { - u32 temp=x&1; - rv|=temp<>=1; - x>>=1; - sh++; - } - } - return rv; + for (u32 j = 0; j < ARRAY_SIZE(detwiddle); j++) + { + u32 detwiddled = 0; + for (int i = 0; i < 10; i++) + { + u32 shift = 1 << i; + detwiddled |= ((j & shift) << i); + } + detwiddle[j] = detwiddled; + } } -static void BuildTwiddleTables() -{ - for (u32 s=0;s<8;s++) - { - u32 x_sz=1024; - u32 y_sz=8<type=64; { - vramlist_lock.Lock(); + std::lock_guard lock(vramlist_lock); // This also protects vram if needed vramlock_list_add(block); - - vramlist_lock.Unlock(); } return block; @@ -216,7 +178,7 @@ bool VramLockedWriteOffset(size_t offset) vector& list = VramLocks[addr_hash]; { - vramlist_lock.Lock(); + std::lock_guard lock(vramlist_lock); for (size_t i = 0; i < list.size(); i++) { @@ -235,8 +197,6 @@ bool VramLockedWriteOffset(size_t offset) list.clear(); _vmem_unprotect_vram((u32)(offset & ~PAGE_MASK), PAGE_SIZE); - - vramlist_lock.Unlock(); } return true; @@ -254,9 +214,8 @@ bool VramLockedWrite(u8* address) //also frees the handle void libCore_vramlock_Unlock_block(vram_block* block) { - vramlist_lock.Lock(); + std::lock_guard lock(vramlist_lock); libCore_vramlock_Unlock_block_wb(block); - vramlist_lock.Unlock(); } void libCore_vramlock_Unlock_block_wb(vram_block* block) @@ -409,8 +368,11 @@ static const PvrTexInfo format[8] = {"ns/1555", 0}, // Not supported (1555) }; -static const u32 MipPoint[8] = +static const u32 VQMipPoint[11] = { + 0x00000,//1 + 0x00001,//2 + 0x00002,//4 0x00006,//8 0x00016,//16 0x00056,//32 @@ -420,6 +382,20 @@ static const u32 MipPoint[8] = 0x05556,//512 0x15556//1024 }; +static const u32 OtherMipPoint[11] = +{ + 0x00003,//1 + 0x00004,//2 + 0x00008,//4 + 0x00018,//8 + 0x00058,//16 + 0x00158,//32 + 0x00558,//64 + 0x01558,//128 + 0x05558,//256 + 0x15558,//512 + 0x55558//1024 +}; static const TextureType PAL_TYPE[4] = { TextureType::_5551, TextureType::_565, TextureType::_4444, TextureType::_8888 @@ -496,71 +472,50 @@ void BaseTextureCacheData::Create() else if (tex->bpp == 8) palette_index = (tcw.PalSelect >> 4) << 8; - //VQ table (if VQ tex) - if (tcw.VQ_Comp) - vq_codebook = sa; - - //Convert a pvr texture into OpenGL - switch (tcw.PixelFmt) + if (tcw.ScanOrder && (tex->PL || tex->PL32)) { + //Texture is stored 'planar' in memory, no deswizzle is needed + //verify(tcw.VQ_Comp==0); + if (tcw.VQ_Comp != 0) + WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)"); - case Pixel1555: //0 1555 value: 1 bit; RGB values: 5 bits each - case PixelReserved: //7 Reserved Regarded as 1555 - case Pixel565: //1 565 R value: 5 bits; G value: 6 bits; B value: 5 bits - case Pixel4444: //2 4444 value: 4 bits; RGB values: 4 bits each - case PixelYUV: //3 YUV422 32 bits per 2 pixels; YUYV values: 8 bits each - case PixelBumpMap: //4 Bump Map 16 bits/pixel; S value: 8 bits; R value: 8 bits - case PixelPal4: //5 4 BPP Palette Palette texture with 4 bits/pixel - case PixelPal8: //6 8 BPP Palette Palette texture with 8 bits/pixel - if (tcw.ScanOrder && (tex->PL || tex->PL32)) + //Planar textures support stride selection, mostly used for non power of 2 textures (videos) + int stride = w; + if (tcw.StrideSel) + stride = (TEXT_CONTROL & 31) * 32; + + //Call the format specific conversion code + texconv = tex->PL; + texconv32 = tex->PL32; + //calculate the size, in bytes, for the locking + size = stride * h * tex->bpp / 8; + } + else + { + // Quake 3 Arena uses one + if (tcw.MipMapped) + // Mipmapped texture must be square and TexV is ignored + h = w; + + if (tcw.VQ_Comp) { - //Texture is stored 'planar' in memory, no deswizzle is needed - //verify(tcw.VQ_Comp==0); - if (tcw.VQ_Comp != 0) - WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)"); - - //Planar textures support stride selection, mostly used for non power of 2 textures (videos) - int stride = w; - if (tcw.StrideSel) - stride = (TEXT_CONTROL & 31) * 32; - - //Call the format specific conversion code - texconv = tex->PL; - texconv32 = tex->PL32; - //calculate the size, in bytes, for the locking - size = stride * h * tex->bpp / 8; + verify(tex->VQ != NULL || tex->VQ32 != NULL); + vq_codebook = sa; + if (tcw.MipMapped) + sa += VQMipPoint[tsp.TexU + 3]; + texconv = tex->VQ; + texconv32 = tex->VQ32; + size = w * h / 8; } else { - // Quake 3 Arena uses one. Not sure if valid but no need to crash - //verify(w == h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN* - - if (tcw.VQ_Comp) - { - verify(tex->VQ != NULL || tex->VQ32 != NULL); - vq_codebook = sa; - if (tcw.MipMapped) - sa += MipPoint[tsp.TexU]; - texconv = tex->VQ; - texconv32 = tex->VQ32; - size = w * h / 8; - } - else - { - verify(tex->TW != NULL || tex->TW32 != NULL); - if (tcw.MipMapped) - sa += MipPoint[tsp.TexU] * tex->bpp / 2; - texconv = tex->TW; - texconv32 = tex->TW32; - size = w * h * tex->bpp / 8; - } + verify(tex->TW != NULL || tex->TW32 != NULL); + if (tcw.MipMapped) + sa += OtherMipPoint[tsp.TexU + 3] * tex->bpp / 8; + texconv = tex->TW; + texconv32 = tex->TW32; + size = w * h * tex->bpp / 8; } - break; - default: - WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt); - size = w * h * 2; - texconv = NULL; - texconv32 = NULL; } } @@ -631,58 +586,119 @@ void BaseTextureCacheData::Update() PixelBuffer pb32; // Figure out if we really need to use a 32-bit pixel buffer + bool textureUpscaling = settings.rend.TextureUpscale > 1 + // Don't process textures that are too big + && w * h <= settings.rend.MaxFilteredTextureSize * settings.rend.MaxFilteredTextureSize + // Don't process YUV textures + && tcw.PixelFmt != PixelYUV; bool need_32bit_buffer = true; - if ((settings.rend.TextureUpscale <= 1 - || w * h > settings.rend.MaxFilteredTextureSize - * settings.rend.MaxFilteredTextureSize // Don't process textures that are too big - || tcw.PixelFmt == PixelYUV) // Don't process YUV textures + if (!textureUpscaling && (!IsPaletted() || tex_type != TextureType::_8888) && texconv != NULL && !Force32BitTexture(tex_type)) need_32bit_buffer = false; // TODO avoid upscaling/depost. textures that change too often + bool mipmapped = IsMipmapped() && settings.rend.UseMipmaps; + if (texconv32 != NULL && need_32bit_buffer) { + if (textureUpscaling) + // don't use mipmaps if upscaling + mipmapped = false; // Force the texture type since that's the only 32-bit one we know tex_type = TextureType::_8888; - pb32.init(w, h); - - texconv32(&pb32, (u8*)&vram[sa], stride, h); + if (mipmapped) + { + pb32.init(w, h, true); + for (int i = 0; i <= tsp.TexU + 3; i++) + { + pb32.set_mipmap(i); + u32 vram_addr; + if (tcw.VQ_Comp) + { + vram_addr = sa_tex + VQMipPoint[i]; + if (i == 0) + { + PixelBuffer pb0; + pb0.init(2, 2 ,false); + texconv32(&pb0, (u8*)&vram[vram_addr], 2, 2); + *pb32.data() = *pb0.data(1, 1); + continue; + } + } + else + vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8; + texconv32(&pb32, (u8*)&vram[vram_addr], 1 << i, 1 << i); + } + pb32.set_mipmap(0); + } + else + { + pb32.init(w, h); + texconv32(&pb32, (u8*)&vram[sa], stride, h); #ifdef DEPOSTERIZE - { - // Deposterization - PixelBuffer tmp_buf; - tmp_buf.init(w, h); + { + // Deposterization + PixelBuffer tmp_buf; + tmp_buf.init(w, h); - DePosterize(pb32.data(), tmp_buf.data(), w, h); - pb32.steal_data(tmp_buf); - } + DePosterize(pb32.data(), tmp_buf.data(), w, h); + pb32.steal_data(tmp_buf); + } #endif - // xBRZ scaling - if (settings.rend.TextureUpscale > 1) - { - PixelBuffer tmp_buf; - tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale); + // xBRZ scaling + if (textureUpscaling) + { + PixelBuffer tmp_buf; + tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale); - if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444) - // Alpha channel formats. Palettes with alpha are already handled - has_alpha = true; - UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha); - pb32.steal_data(tmp_buf); - upscaled_w *= settings.rend.TextureUpscale; - upscaled_h *= settings.rend.TextureUpscale; + if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444) + // Alpha channel formats. Palettes with alpha are already handled + has_alpha = true; + UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha); + pb32.steal_data(tmp_buf); + upscaled_w *= settings.rend.TextureUpscale; + upscaled_h *= settings.rend.TextureUpscale; + } } temp_tex_buffer = pb32.data(); } else if (texconv != NULL) { - pb16.init(w, h); - - texconv(&pb16,(u8*)&vram[sa],stride,h); + if (mipmapped) + { + pb16.init(w, h, true); + for (int i = 0; i <= tsp.TexU + 3; i++) + { + pb16.set_mipmap(i); + u32 vram_addr; + if (tcw.VQ_Comp) + { + vram_addr = sa_tex + VQMipPoint[i]; + if (i == 0) + { + PixelBuffer pb0; + pb0.init(2, 2 ,false); + texconv(&pb0, (u8*)&vram[vram_addr], 2, 2); + *pb16.data() = *pb0.data(1, 1); + continue; + } + } + else + vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8; + texconv(&pb16, (u8*)&vram[vram_addr], 1 << i, 1 << i); + } + pb16.set_mipmap(0); + } + else + { + pb16.init(w, h); + texconv(&pb16,(u8*)&vram[sa],stride,h); + } temp_tex_buffer = pb16.data(); } else @@ -692,6 +708,7 @@ void BaseTextureCacheData::Update() pb16.init(w, h); memset(pb16.data(), 0x80, w * h * 2); temp_tex_buffer = pb16.data(); + mipmapped = false; } // Restore the original texture height if it was constrained to VRAM limits above h = original_h; @@ -699,7 +716,7 @@ void BaseTextureCacheData::Update() //lock the texture to detect changes in it lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this); - UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer); + UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer, mipmapped); if (settings.rend.DumpTextures) { ComputeHash(); @@ -713,7 +730,7 @@ void BaseTextureCacheData::CheckCustomTexture() if (custom_load_in_progress == 0 && custom_image_data != NULL) { tex_type = TextureType::_8888; - UploadToGPU(custom_width, custom_height, custom_image_data); + UploadToGPU(custom_width, custom_height, custom_image_data, false); delete [] custom_image_data; custom_image_data = NULL; } diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index 41021af1d..fae7e3db7 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -17,32 +17,46 @@ extern u32 pal_hash_256[4]; extern u32 pal_hash_16[64]; extern bool KillTex; -extern u32 detwiddle[2][8][1024]; +extern u32 detwiddle[1024]; template class PixelBuffer { - pixel_type* p_buffer_start; - pixel_type* p_current_line; - pixel_type* p_current_pixel; + pixel_type* p_buffer_start = nullptr; + pixel_type* p_current_mipmap = nullptr; + pixel_type* p_current_line = nullptr; + pixel_type* p_current_pixel = nullptr; u32 pixels_per_line = 0; public: - PixelBuffer() - { - p_buffer_start = p_current_line = p_current_pixel = NULL; - } - ~PixelBuffer() { deinit(); } + void init(u32 width, u32 height, bool mipmapped) + { + deinit(); + size_t size = width * height * sizeof(pixel_type); + if (mipmapped) + { + do + { + width /= 2; + height /= 2; + size += width * height * sizeof(pixel_type); + } + while (width != 0 && height != 0); + } + p_buffer_start = p_current_line = p_current_pixel = p_current_mipmap = (pixel_type *)malloc(size); + this->pixels_per_line = 1; + } + void init(u32 width, u32 height) { deinit(); - p_buffer_start = p_current_line = p_current_pixel = (pixel_type *)malloc(width * height * sizeof(pixel_type)); + p_buffer_start = p_current_line = p_current_pixel = p_current_mipmap = (pixel_type *)malloc(width * height * sizeof(pixel_type)); this->pixels_per_line = width; } @@ -51,47 +65,56 @@ public: if (p_buffer_start != NULL) { free(p_buffer_start); - p_buffer_start = p_current_line = p_current_pixel = NULL; + p_buffer_start = p_current_mipmap = p_current_line = p_current_pixel = NULL; } } void steal_data(PixelBuffer &buffer) { deinit(); - p_buffer_start = p_current_line = p_current_pixel = buffer.p_buffer_start; + p_buffer_start = p_current_mipmap = p_current_line = p_current_pixel = buffer.p_buffer_start; pixels_per_line = buffer.pixels_per_line; - buffer.p_buffer_start = buffer.p_current_line = buffer.p_current_pixel = NULL; + buffer.p_buffer_start = p_current_mipmap = buffer.p_current_line = buffer.p_current_pixel = NULL; + } + + void set_mipmap(int level) + { + size_t offset = 0; + for (int i = 0; i < level; i++) + offset += (1 << (2 * i)); + p_current_mipmap = p_current_line = p_current_pixel = p_buffer_start + offset; + pixels_per_line = 1 << level; } __forceinline pixel_type *data(u32 x = 0, u32 y = 0) { - return p_buffer_start + pixels_per_line * y + x; + return p_current_mipmap + pixels_per_line * y + x; } - __forceinline void prel(u32 x,pixel_type value) + __forceinline void prel(u32 x, pixel_type value) { - p_current_pixel[x]=value; + p_current_pixel[x] = value; } - __forceinline void prel(u32 x,u32 y,pixel_type value) + __forceinline void prel(u32 x, u32 y, pixel_type value) { - p_current_pixel[y*pixels_per_line+x]=value; + p_current_pixel[y * pixels_per_line + x] = value; } __forceinline void rmovex(u32 value) { - p_current_pixel+=value; + p_current_pixel += value; } __forceinline void rmovey(u32 value) { - p_current_line+=pixels_per_line*value; - p_current_pixel=p_current_line; + p_current_line += pixels_per_line * value; + p_current_pixel = p_current_line; } - __forceinline void amove(u32 x_m,u32 y_m) + __forceinline void amove(u32 x_m, u32 y_m) { //p_current_pixel=p_buffer_start; - p_current_line=p_buffer_start+pixels_per_line*y_m; - p_current_pixel=p_current_line + x_m; + p_current_line = p_current_mipmap + pixels_per_line * y_m; + p_current_pixel = p_current_line + x_m; } }; @@ -145,8 +168,6 @@ __forceinline u32 YUV422(s32 Y,s32 Yu,s32 Yv) return PixelPacker::packRGB(clamp(0,255,R),clamp(0,255,G),clamp(0,255,B)); } -#define twop(x,y,bcx,bcy) (detwiddle[0][bcy][x]+detwiddle[1][bcx][y]) - //pixel packers ! struct pp_565 { @@ -496,24 +517,23 @@ void texture_PL(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) } } +static inline u32 get_tw_texel_position(u32 x, u32 y) +{ + return detwiddle[y] | detwiddle[x] << 1; +} + template void texture_TW(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) { pb->amove(0,0); - const u32 divider=PixelConvertor::xpp*PixelConvertor::ypp; + const u32 divider = PixelConvertor::xpp * PixelConvertor::ypp; - unsigned long bcx_,bcy_; - bcx_=bitscanrev(Width); - bcy_=bitscanrev(Height); - const u32 bcx=bcx_-3; - const u32 bcy=bcy_-3; - - for (u32 y=0;yrmovex(PixelConvertor::xpp); @@ -528,18 +548,14 @@ void texture_VQ(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) p_in+=256*4*2; pb->amove(0,0); - const u32 divider=PixelConvertor::xpp*PixelConvertor::ypp; - unsigned long bcx_,bcy_; - bcx_=bitscanrev(Width); - bcy_=bitscanrev(Height); - const u32 bcx=bcx_-3; - const u32 bcy=bcy_-3; + Height /= PixelConvertor::ypp; + Width /= PixelConvertor::xpp; - for (u32 y=0;yrmovex(PixelConvertor::xpp); @@ -670,6 +686,11 @@ struct BaseTextureCacheData return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8; } + bool IsMipmapped() + { + return tcw.MipMapped != 0 && tcw.ScanOrder == 0; + } + const char* GetPixelFormatName() { switch (tcw.PixelFmt) @@ -688,7 +709,7 @@ struct BaseTextureCacheData void Create(); void ComputeHash(); void Update(); - virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) = 0; + virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) = 0; virtual bool Force32BitTexture(TextureType type) const { return false; } void CheckCustomTexture(); //true if : dirty or paletted texture and hashes don't match diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 29de1697c..89ec73fe9 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -501,8 +501,6 @@ static bool gl_create_resources() // Assume the resources have already been created return true; - findGLVersion(); - //create vao glGenVertexArrays(1, &gl4.vbo.main_vao); glGenVertexArrays(1, &gl4.vbo.modvol_vao); @@ -538,11 +536,8 @@ extern void gl4CreateTextures(int width, int height); static bool gles_init() { - int major = 0; - int minor = 0; - glGetIntegerv(GL_MAJOR_VERSION, &major); - glGetIntegerv(GL_MINOR_VERSION, &minor); - if (major < 4 || (major == 4 && minor < 3)) + findGLVersion(); + if (gl.gl_major < 4 || (gl.gl_major == 4 && gl.gl_minor < 3)) { WARN_LOG(RENDERER, "Warning: OpenGL version doesn't support per-pixel sorting."); return false; diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5e21fc105..af7e5d143 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -469,6 +469,7 @@ void findGLVersion() { gl.index_type = GL_UNSIGNED_INT; gl.gl_major = theGLContext.GetMajorVersion(); + gl.gl_minor = theGLContext.GetMinorVersion(); gl.is_gles = theGLContext.IsGLES(); if (gl.is_gles) { diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index f6451ba66..2da72ad96 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -95,6 +95,7 @@ struct gl_ctx const char *gl_version; const char *glsl_version_header; int gl_major; + int gl_minor; bool is_gles; GLuint fog_image_format; GLenum index_type; @@ -200,7 +201,7 @@ struct TextureCacheData : BaseTextureCacheData GLuint texID; //gl texture u16* pData; virtual std::string GetId() override { return std::to_string(texID); } - virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) override; + virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) override; virtual bool Delete() override; }; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index d8825d432..4622e81ba 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -73,7 +73,7 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) { free(rows); } -void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer) +void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) { if (texID != 0) { @@ -100,9 +100,66 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer) die("Unsupported texture type"); break; } - glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer); - if (tcw.MipMapped && settings.rend.UseMipmaps) - glGenerateMipmap(GL_TEXTURE_2D); + if (mipmapped) + { + int mipmapLevels = 0; + int dim = width; + while (dim != 0) + { + mipmapLevels++; + dim >>= 1; + } +#ifndef GLES2 + // Open GL 4.2 or GLES 3.0 min + if (gl.gl_major > 4 || (gl.gl_major == 4 && gl.gl_minor >= 2) + || (gl.is_gles && gl.gl_major >= 3)) + { + GLuint internalFormat; + switch (tex_type) + { + case TextureType::_5551: + internalFormat = GL_RGB5_A1; + break; + case TextureType::_565: + internalFormat = GL_RGB565; + break; + case TextureType::_4444: + internalFormat = GL_RGBA4; + break; + case TextureType::_8888: + internalFormat = GL_RGBA8; + break; + } + if (Updates == 1) + { + glTexStorage2D(GL_TEXTURE_2D, mipmapLevels, internalFormat, width, height); + glCheck(); + } + for (int i = 0; i < mipmapLevels; i++) + { + glTexSubImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, 0, 0, 1 << i, 1 << i, comps, gltype, temp_tex_buffer); + temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); + } + } + else +#endif + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, mipmapLevels - 1); + for (int i = 0; i < mipmapLevels; i++) + { + glTexImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, comps, 1 << i, 1 << i, 0, comps, gltype, temp_tex_buffer); + temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); + } + } + } + else + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer); + } + glCheck(); } else { #if FEAT_HAS_SOFTREND diff --git a/core/rend/vulkan/oit/oit_renderer.cpp b/core/rend/vulkan/oit/oit_renderer.cpp index bcc0607f0..68a859592 100644 --- a/core/rend/vulkan/oit/oit_renderer.cpp +++ b/core/rend/vulkan/oit/oit_renderer.cpp @@ -65,7 +65,7 @@ public: vjoyTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice()); vjoyTexture->SetDevice(GetContext()->GetDevice()); vjoyTexture->SetCommandBuffer(texCommandPool.Allocate()); - vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data); + vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data, false); vjoyTexture->SetCommandBuffer(nullptr); texCommandPool.EndFrame(); delete [] image_data; @@ -135,7 +135,7 @@ public: curTexture->SetDevice(GetContext()->GetDevice()); } curTexture->SetCommandBuffer(texCommandPool.Allocate()); - curTexture->UploadToGPU(width, height, (u8*)pb.data()); + curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); curTexture->SetCommandBuffer(nullptr); texCommandPool.EndFrame(); @@ -287,7 +287,7 @@ private: MakeFogTexture(texData); fogTexture->SetCommandBuffer(texCommandPool.Allocate()); - fogTexture->UploadToGPU(128, 2, texData); + fogTexture->UploadToGPU(128, 2, texData, false); fogTexture->SetCommandBuffer(nullptr); } diff --git a/core/rend/vulkan/texture.cpp b/core/rend/vulkan/texture.cpp index 2e4038632..fd3c3cbf8 100644 --- a/core/rend/vulkan/texture.cpp +++ b/core/rend/vulkan/texture.cpp @@ -143,7 +143,7 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk: commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, nullptr, nullptr, imageMemoryBarrier); } -void Texture::UploadToGPU(int width, int height, u8 *data) +void Texture::UploadToGPU(int width, int height, u8 *data, bool mipmapped) { vk::Format format; u32 dataSize = width * height * 2; @@ -167,20 +167,31 @@ void Texture::UploadToGPU(int width, int height, u8 *data) dataSize /= 2; break; } + if (mipmapped) + { + int w = width / 2; + u32 size = dataSize / 4; + while (w) + { + dataSize += size; + size /= 4; + w /= 2; + } + } bool isNew = true; if (width != extent.width || height != extent.height || format != this->format) - Init(width, height, format); + Init(width, height, format, dataSize); else isNew = false; SetImage(dataSize, data, isNew); } -void Texture::Init(u32 width, u32 height, vk::Format format) +void Texture::Init(u32 width, u32 height, vk::Format format, u32 dataSize) { this->extent = vk::Extent2D(width, height); this->format = format; mipmapLevels = 1; - if (tcw.MipMapped && settings.rend.UseMipmaps) + if (IsMipmapped() && settings.rend.UseMipmaps) mipmapLevels += floor(log2(std::max(width, height))); vk::FormatProperties formatProperties = physicalDevice.getFormatProperties(format); @@ -195,7 +206,7 @@ void Texture::Init(u32 width, u32 height, vk::Format format) vk::ImageUsageFlags usageFlags = vk::ImageUsageFlagBits::eSampled; if (needsStaging) { - stagingBufferData = std::unique_ptr(new BufferData(extent.width * extent.height * 4, vk::BufferUsageFlagBits::eTransferSrc)); + stagingBufferData = std::unique_ptr(new BufferData(dataSize, vk::BufferUsageFlagBits::eTransferSrc)); usageFlags |= vk::ImageUsageFlagBits::eTransferDst; initialLayout = vk::ImageLayout::eUndefined; requirements = vk::MemoryPropertyFlagBits::eDeviceLocal; @@ -206,8 +217,6 @@ void Texture::Init(u32 width, u32 height, vk::Format format) initialLayout = vk::ImageLayout::ePreinitialized; requirements = vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible; } - if (mipmapLevels > 1) - usageFlags |= vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst; CreateImage(imageTiling, usageFlags, initialLayout, requirements, vk::ImageAspectFlagBits::eColor); } @@ -252,77 +261,34 @@ void Texture::SetImage(u32 srcSize, void *srcData, bool isNew) // Since we're going to blit to the texture image, set its layout to eTransferDstOptimal setImageLayout(commandBuffer, image.get(), format, mipmapLevels, isNew ? vk::ImageLayout::eUndefined : vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferDstOptimal); - vk::BufferImageCopy copyRegion(0, extent.width, extent.height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), vk::Offset3D(0, 0, 0), vk::Extent3D(extent, 1)); - commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion); if (mipmapLevels > 1) - GenerateMipmaps(); + { + vk::DeviceSize bufferOffset = 0; + for (int i = 0; i < mipmapLevels; i++) + { + vk::BufferImageCopy copyRegion(bufferOffset, 1 << i, 1 << i, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, mipmapLevels - i - 1, 0, 1), + vk::Offset3D(0, 0, 0), vk::Extent3D(1 << i, 1 << i, 1)); + commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion); + bufferOffset += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); + } + } else - // Set the layout for the texture image from eTransferDstOptimal to SHADER_READ_ONLY - setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); + { + vk::BufferImageCopy copyRegion(0, extent.width, extent.height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + vk::Offset3D(0, 0, 0), vk::Extent3D(extent, 1)); + commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion); + } + // Set the layout for the texture image from eTransferDstOptimal to SHADER_READ_ONLY + setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); } else { - if (mipmapLevels > 1) - GenerateMipmaps(); - else - // If we can use the linear tiled image as a texture, just do it - setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal); + // If we can use the linear tiled image as a texture, just do it + setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal); } commandBuffer.end(); } -void Texture::GenerateMipmaps() -{ - u32 mipWidth = extent.width; - u32 mipHeight = extent.height; - vk::ImageMemoryBarrier barrier(vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, - vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - *image, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); - - for (int i = 1; i < mipmapLevels; i++) - { - // Transition previous mipmap level from dst optimal/preinit to src optimal - barrier.subresourceRange.baseMipLevel = i - 1; - if (i == 1 && !needsStaging) - { - barrier.oldLayout = vk::ImageLayout::ePreinitialized; - barrier.srcAccessMask = vk::AccessFlagBits::eHostWrite; - } - else - { - barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; - barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; - } - barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; - barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; - commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, nullptr, nullptr, barrier); - - // Blit previous mipmap level on current - vk::ImageBlit blit(vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, i - 1, 0, 1), - { { vk::Offset3D(0, 0, 0), vk::Offset3D(mipWidth, mipHeight, 1) } }, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, i, 0, 1), - { { vk::Offset3D(0, 0, 0), vk::Offset3D(std::max(mipWidth / 2, 1u), std::max(mipHeight / 2, 1u), 1) } }); - commandBuffer.blitImage(*image, vk::ImageLayout::eTransferSrcOptimal, *image, vk::ImageLayout::eTransferDstOptimal, 1, &blit, vk::Filter::eLinear); - - // Transition previous mipmap level from src optimal to shader read-only optimal - barrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal; - barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; - barrier.srcAccessMask = vk::AccessFlagBits::eTransferRead; - barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; - commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eFragmentShader, {}, nullptr, nullptr, barrier); - - mipWidth = std::max(mipWidth / 2, 1u); - mipHeight = std::max(mipHeight / 2, 1u); - } - // Transition last mipmap level from dst optimal to shader read-only optimal - barrier.subresourceRange.baseMipLevel = mipmapLevels - 1; - barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; - barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; - barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; - barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; - commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eFragmentShader, {}, nullptr, nullptr, barrier); -} - void FramebufferAttachment::Init(u32 width, u32 height, vk::Format format, vk::ImageUsageFlags usage) { this->format = format; diff --git a/core/rend/vulkan/texture.h b/core/rend/vulkan/texture.h index 4dfc76961..ec3b30cdb 100644 --- a/core/rend/vulkan/texture.h +++ b/core/rend/vulkan/texture.h @@ -30,7 +30,7 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk: struct Texture : BaseTextureCacheData { - void UploadToGPU(int width, int height, u8 *data) override; + void UploadToGPU(int width, int height, u8 *data, bool mipmapped) override; u64 GetIntId() { return (u64)reinterpret_cast(this); } std::string GetId() override { char s[20]; sprintf(s, "%p", this); return s; } bool IsNew() const { return !image.get(); } @@ -43,11 +43,10 @@ struct Texture : BaseTextureCacheData void SetDevice(vk::Device device) { this->device = device; } private: - void Init(u32 width, u32 height, vk::Format format); + void Init(u32 width, u32 height, vk::Format format ,u32 dataSize); void SetImage(u32 size, void *data, bool isNew); void CreateImage(vk::ImageTiling tiling, vk::ImageUsageFlags usage, vk::ImageLayout initialLayout, vk::MemoryPropertyFlags memoryProperties, vk::ImageAspectFlags aspectMask); - void GenerateMipmaps(); vk::Format format = vk::Format::eUndefined; vk::Extent2D extent; diff --git a/core/rend/vulkan/vmu.cpp b/core/rend/vulkan/vmu.cpp index 63b8f0211..71898e4e3 100644 --- a/core/rend/vulkan/vmu.cpp +++ b/core/rend/vulkan/vmu.cpp @@ -51,7 +51,7 @@ const std::vector* VulkanVMUs::PrepareVMUs(vk::CommandP VulkanContext::Instance()->GetDevice().allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool, vk::CommandBufferLevel::ePrimary, 1)) .front())); texture->SetCommandBuffer(*commandBuffers[context->GetCurrentImageIndex()].back()); - texture->UploadToGPU(48, 32, (u8*)vmu_lcd_data[i]); + texture->UploadToGPU(48, 32, (u8*)vmu_lcd_data[i], false); texture->SetCommandBuffer(nullptr); vmu_lcd_changed[i] = false; } diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index f94ee339c..f80c0d8c2 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -61,7 +61,7 @@ public: vjoyTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice()); vjoyTexture->SetDevice(GetContext()->GetDevice()); vjoyTexture->SetCommandBuffer(texCommandPool.Allocate()); - vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data); + vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data, false); vjoyTexture->SetCommandBuffer(nullptr); texCommandPool.EndFrame(); delete [] image_data; @@ -122,7 +122,7 @@ public: curTexture->SetDevice(GetContext()->GetDevice()); } curTexture->SetCommandBuffer(texCommandPool.Allocate()); - curTexture->UploadToGPU(width, height, (u8*)pb.data()); + curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); curTexture->SetCommandBuffer(nullptr); texCommandPool.EndFrame(); @@ -275,7 +275,7 @@ private: MakeFogTexture(texData); fogTexture->SetCommandBuffer(texCommandPool.Allocate()); - fogTexture->UploadToGPU(128, 2, texData); + fogTexture->UploadToGPU(128, 2, texData, false); fogTexture->SetCommandBuffer(nullptr); } diff --git a/core/stdclass.h b/core/stdclass.h index f3e5a85a7..912d667c7 100644 --- a/core/stdclass.h +++ b/core/stdclass.h @@ -118,6 +118,9 @@ public : pthread_mutex_unlock(&mutx); #endif } + // std::BasicLockable so we can use std::lock_guard + void lock() { Lock(); } + void unlock() { Unlock(); } }; #if !defined(TARGET_IPHONE) diff --git a/core/wsi/gl_context.cpp b/core/wsi/gl_context.cpp index b36d31ae1..b0b1fc808 100644 --- a/core/wsi/gl_context.cpp +++ b/core/wsi/gl_context.cpp @@ -29,6 +29,10 @@ void GLGraphicsContext::findGLVersion() glGetIntegerv(GL_MAJOR_VERSION, &majorVersion); if (glGetError() == GL_INVALID_ENUM) majorVersion = 2; + else + { + glGetIntegerv(GL_MINOR_VERSION, &minorVersion); + } const char *version = (const char *)glGetString(GL_VERSION); isGLES = !strncmp(version, "OpenGL ES", 9); INFO_LOG(RENDERER, "OpenGL version: %s", version); diff --git a/core/wsi/gl_context.h b/core/wsi/gl_context.h index eda891299..0faec3413 100644 --- a/core/wsi/gl_context.h +++ b/core/wsi/gl_context.h @@ -29,6 +29,7 @@ class GLGraphicsContext { public: int GetMajorVersion() const { return majorVersion; } + int GetMinorVersion() const { return minorVersion; } bool IsGLES() const { return isGLES; } protected: @@ -38,6 +39,7 @@ protected: private: int majorVersion = 0; + int minorVersion = 0; bool isGLES = false; };