diff --git a/core/hw/mem/vmem32.cpp b/core/hw/mem/vmem32.cpp
index 7a82ececd..3d0e211f9 100644
--- a/core/hw/mem/vmem32.cpp
+++ b/core/hw/mem/vmem32.cpp
@@ -19,6 +19,7 @@
along with reicast. If not, see .
*/
#include
+#include
#include "build.h"
#include "vmem32.h"
#include "_vmem.h"
@@ -267,20 +268,20 @@ static u32 vmem32_map_mmu(u32 address, bool write)
u32 end = start + page_size;
const vector& blocks = vram_blocks[start / VRAM_PROT_SEGMENT];
- vramlist_lock.Lock();
- for (int i = blocks.size() - 1; i >= 0; i--)
{
- if (blocks[i].start < end && blocks[i].end >= start)
+ std::lock_guard lock(vramlist_lock);
+ for (int i = blocks.size() - 1; i >= 0; i--)
{
- u32 prot_start = max(start, blocks[i].start);
- u32 prot_size = min(end, blocks[i].end + 1) - prot_start;
- prot_size += prot_start % PAGE_SIZE;
- prot_start &= ~PAGE_MASK;
- vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size);
+ if (blocks[i].start < end && blocks[i].end >= start)
+ {
+ u32 prot_start = max(start, blocks[i].start);
+ u32 prot_size = min(end, blocks[i].end + 1) - prot_start;
+ prot_size += prot_start % PAGE_SIZE;
+ prot_start &= ~PAGE_MASK;
+ vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size);
+ }
}
}
- vramlist_lock.Unlock();
-
}
else if (offset >= MAP_RAM_START_OFFSET && offset < MAP_RAM_START_OFFSET + RAM_SIZE)
{
diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp
index 80d3fca62..57fa01bcc 100644
--- a/core/rend/TexCache.cpp
+++ b/core/rend/TexCache.cpp
@@ -1,4 +1,5 @@
#include
+#include
#ifndef TARGET_NO_OPENMP
#include
#endif
@@ -22,60 +23,23 @@ u32 palette32_ram[1024];
u32 pal_hash_256[4];
u32 pal_hash_16[64];
-u32 detwiddle[2][8][1024];
-//input : address in the yyyyyxxxxx format
-//output : address in the xyxyxyxy format
-//U : x resolution , V : y resolution
-//twiddle works on 64b words
+u32 detwiddle[1024];
-
-static u32 twiddle_slow(u32 x,u32 y,u32 x_sz,u32 y_sz)
+void BuildTwiddleTable()
{
- u32 rv=0;//low 2 bits are directly passed -> needs some misc stuff to work.However
- //Pvr internally maps the 64b banks "as if" they were twiddled :p
-
- u32 sh=0;
- x_sz>>=1;
- y_sz>>=1;
- while(x_sz!=0 || y_sz!=0)
- {
- if (y_sz)
- {
- u32 temp=y&1;
- rv|=temp<>=1;
- y>>=1;
- sh++;
- }
- if (x_sz)
- {
- u32 temp=x&1;
- rv|=temp<>=1;
- x>>=1;
- sh++;
- }
- }
- return rv;
+ for (u32 j = 0; j < ARRAY_SIZE(detwiddle); j++)
+ {
+ u32 detwiddled = 0;
+ for (int i = 0; i < 10; i++)
+ {
+ u32 shift = 1 << i;
+ detwiddled |= ((j & shift) << i);
+ }
+ detwiddle[j] = detwiddled;
+ }
}
-static void BuildTwiddleTables()
-{
- for (u32 s=0;s<8;s++)
- {
- u32 x_sz=1024;
- u32 y_sz=8<type=64;
{
- vramlist_lock.Lock();
+ std::lock_guard lock(vramlist_lock);
// This also protects vram if needed
vramlock_list_add(block);
-
- vramlist_lock.Unlock();
}
return block;
@@ -216,7 +178,7 @@ bool VramLockedWriteOffset(size_t offset)
vector& list = VramLocks[addr_hash];
{
- vramlist_lock.Lock();
+ std::lock_guard lock(vramlist_lock);
for (size_t i = 0; i < list.size(); i++)
{
@@ -235,8 +197,6 @@ bool VramLockedWriteOffset(size_t offset)
list.clear();
_vmem_unprotect_vram((u32)(offset & ~PAGE_MASK), PAGE_SIZE);
-
- vramlist_lock.Unlock();
}
return true;
@@ -254,9 +214,8 @@ bool VramLockedWrite(u8* address)
//also frees the handle
void libCore_vramlock_Unlock_block(vram_block* block)
{
- vramlist_lock.Lock();
+ std::lock_guard lock(vramlist_lock);
libCore_vramlock_Unlock_block_wb(block);
- vramlist_lock.Unlock();
}
void libCore_vramlock_Unlock_block_wb(vram_block* block)
@@ -409,8 +368,11 @@ static const PvrTexInfo format[8] =
{"ns/1555", 0}, // Not supported (1555)
};
-static const u32 MipPoint[8] =
+static const u32 VQMipPoint[11] =
{
+ 0x00000,//1
+ 0x00001,//2
+ 0x00002,//4
0x00006,//8
0x00016,//16
0x00056,//32
@@ -420,6 +382,20 @@ static const u32 MipPoint[8] =
0x05556,//512
0x15556//1024
};
+static const u32 OtherMipPoint[11] =
+{
+ 0x00003,//1
+ 0x00004,//2
+ 0x00008,//4
+ 0x00018,//8
+ 0x00058,//16
+ 0x00158,//32
+ 0x00558,//64
+ 0x01558,//128
+ 0x05558,//256
+ 0x15558,//512
+ 0x55558//1024
+};
static const TextureType PAL_TYPE[4] = {
TextureType::_5551, TextureType::_565, TextureType::_4444, TextureType::_8888
@@ -496,71 +472,50 @@ void BaseTextureCacheData::Create()
else if (tex->bpp == 8)
palette_index = (tcw.PalSelect >> 4) << 8;
- //VQ table (if VQ tex)
- if (tcw.VQ_Comp)
- vq_codebook = sa;
-
- //Convert a pvr texture into OpenGL
- switch (tcw.PixelFmt)
+ if (tcw.ScanOrder && (tex->PL || tex->PL32))
{
+ //Texture is stored 'planar' in memory, no deswizzle is needed
+ //verify(tcw.VQ_Comp==0);
+ if (tcw.VQ_Comp != 0)
+ WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
- case Pixel1555: //0 1555 value: 1 bit; RGB values: 5 bits each
- case PixelReserved: //7 Reserved Regarded as 1555
- case Pixel565: //1 565 R value: 5 bits; G value: 6 bits; B value: 5 bits
- case Pixel4444: //2 4444 value: 4 bits; RGB values: 4 bits each
- case PixelYUV: //3 YUV422 32 bits per 2 pixels; YUYV values: 8 bits each
- case PixelBumpMap: //4 Bump Map 16 bits/pixel; S value: 8 bits; R value: 8 bits
- case PixelPal4: //5 4 BPP Palette Palette texture with 4 bits/pixel
- case PixelPal8: //6 8 BPP Palette Palette texture with 8 bits/pixel
- if (tcw.ScanOrder && (tex->PL || tex->PL32))
+ //Planar textures support stride selection, mostly used for non power of 2 textures (videos)
+ int stride = w;
+ if (tcw.StrideSel)
+ stride = (TEXT_CONTROL & 31) * 32;
+
+ //Call the format specific conversion code
+ texconv = tex->PL;
+ texconv32 = tex->PL32;
+ //calculate the size, in bytes, for the locking
+ size = stride * h * tex->bpp / 8;
+ }
+ else
+ {
+ // Quake 3 Arena uses one
+ if (tcw.MipMapped)
+ // Mipmapped texture must be square and TexV is ignored
+ h = w;
+
+ if (tcw.VQ_Comp)
{
- //Texture is stored 'planar' in memory, no deswizzle is needed
- //verify(tcw.VQ_Comp==0);
- if (tcw.VQ_Comp != 0)
- WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
-
- //Planar textures support stride selection, mostly used for non power of 2 textures (videos)
- int stride = w;
- if (tcw.StrideSel)
- stride = (TEXT_CONTROL & 31) * 32;
-
- //Call the format specific conversion code
- texconv = tex->PL;
- texconv32 = tex->PL32;
- //calculate the size, in bytes, for the locking
- size = stride * h * tex->bpp / 8;
+ verify(tex->VQ != NULL || tex->VQ32 != NULL);
+ vq_codebook = sa;
+ if (tcw.MipMapped)
+ sa += VQMipPoint[tsp.TexU + 3];
+ texconv = tex->VQ;
+ texconv32 = tex->VQ32;
+ size = w * h / 8;
}
else
{
- // Quake 3 Arena uses one. Not sure if valid but no need to crash
- //verify(w == h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN*
-
- if (tcw.VQ_Comp)
- {
- verify(tex->VQ != NULL || tex->VQ32 != NULL);
- vq_codebook = sa;
- if (tcw.MipMapped)
- sa += MipPoint[tsp.TexU];
- texconv = tex->VQ;
- texconv32 = tex->VQ32;
- size = w * h / 8;
- }
- else
- {
- verify(tex->TW != NULL || tex->TW32 != NULL);
- if (tcw.MipMapped)
- sa += MipPoint[tsp.TexU] * tex->bpp / 2;
- texconv = tex->TW;
- texconv32 = tex->TW32;
- size = w * h * tex->bpp / 8;
- }
+ verify(tex->TW != NULL || tex->TW32 != NULL);
+ if (tcw.MipMapped)
+ sa += OtherMipPoint[tsp.TexU + 3] * tex->bpp / 8;
+ texconv = tex->TW;
+ texconv32 = tex->TW32;
+ size = w * h * tex->bpp / 8;
}
- break;
- default:
- WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt);
- size = w * h * 2;
- texconv = NULL;
- texconv32 = NULL;
}
}
@@ -631,58 +586,119 @@ void BaseTextureCacheData::Update()
PixelBuffer pb32;
// Figure out if we really need to use a 32-bit pixel buffer
+ bool textureUpscaling = settings.rend.TextureUpscale > 1
+ // Don't process textures that are too big
+ && w * h <= settings.rend.MaxFilteredTextureSize * settings.rend.MaxFilteredTextureSize
+ // Don't process YUV textures
+ && tcw.PixelFmt != PixelYUV;
bool need_32bit_buffer = true;
- if ((settings.rend.TextureUpscale <= 1
- || w * h > settings.rend.MaxFilteredTextureSize
- * settings.rend.MaxFilteredTextureSize // Don't process textures that are too big
- || tcw.PixelFmt == PixelYUV) // Don't process YUV textures
+ if (!textureUpscaling
&& (!IsPaletted() || tex_type != TextureType::_8888)
&& texconv != NULL
&& !Force32BitTexture(tex_type))
need_32bit_buffer = false;
// TODO avoid upscaling/depost. textures that change too often
+ bool mipmapped = IsMipmapped() && settings.rend.UseMipmaps;
+
if (texconv32 != NULL && need_32bit_buffer)
{
+ if (textureUpscaling)
+ // don't use mipmaps if upscaling
+ mipmapped = false;
// Force the texture type since that's the only 32-bit one we know
tex_type = TextureType::_8888;
- pb32.init(w, h);
-
- texconv32(&pb32, (u8*)&vram[sa], stride, h);
+ if (mipmapped)
+ {
+ pb32.init(w, h, true);
+ for (int i = 0; i <= tsp.TexU + 3; i++)
+ {
+ pb32.set_mipmap(i);
+ u32 vram_addr;
+ if (tcw.VQ_Comp)
+ {
+ vram_addr = sa_tex + VQMipPoint[i];
+ if (i == 0)
+ {
+ PixelBuffer pb0;
+ pb0.init(2, 2 ,false);
+ texconv32(&pb0, (u8*)&vram[vram_addr], 2, 2);
+ *pb32.data() = *pb0.data(1, 1);
+ continue;
+ }
+ }
+ else
+ vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8;
+ texconv32(&pb32, (u8*)&vram[vram_addr], 1 << i, 1 << i);
+ }
+ pb32.set_mipmap(0);
+ }
+ else
+ {
+ pb32.init(w, h);
+ texconv32(&pb32, (u8*)&vram[sa], stride, h);
#ifdef DEPOSTERIZE
- {
- // Deposterization
- PixelBuffer tmp_buf;
- tmp_buf.init(w, h);
+ {
+ // Deposterization
+ PixelBuffer tmp_buf;
+ tmp_buf.init(w, h);
- DePosterize(pb32.data(), tmp_buf.data(), w, h);
- pb32.steal_data(tmp_buf);
- }
+ DePosterize(pb32.data(), tmp_buf.data(), w, h);
+ pb32.steal_data(tmp_buf);
+ }
#endif
- // xBRZ scaling
- if (settings.rend.TextureUpscale > 1)
- {
- PixelBuffer tmp_buf;
- tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale);
+ // xBRZ scaling
+ if (textureUpscaling)
+ {
+ PixelBuffer tmp_buf;
+ tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale);
- if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444)
- // Alpha channel formats. Palettes with alpha are already handled
- has_alpha = true;
- UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha);
- pb32.steal_data(tmp_buf);
- upscaled_w *= settings.rend.TextureUpscale;
- upscaled_h *= settings.rend.TextureUpscale;
+ if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444)
+ // Alpha channel formats. Palettes with alpha are already handled
+ has_alpha = true;
+ UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha);
+ pb32.steal_data(tmp_buf);
+ upscaled_w *= settings.rend.TextureUpscale;
+ upscaled_h *= settings.rend.TextureUpscale;
+ }
}
temp_tex_buffer = pb32.data();
}
else if (texconv != NULL)
{
- pb16.init(w, h);
-
- texconv(&pb16,(u8*)&vram[sa],stride,h);
+ if (mipmapped)
+ {
+ pb16.init(w, h, true);
+ for (int i = 0; i <= tsp.TexU + 3; i++)
+ {
+ pb16.set_mipmap(i);
+ u32 vram_addr;
+ if (tcw.VQ_Comp)
+ {
+ vram_addr = sa_tex + VQMipPoint[i];
+ if (i == 0)
+ {
+ PixelBuffer pb0;
+ pb0.init(2, 2 ,false);
+ texconv(&pb0, (u8*)&vram[vram_addr], 2, 2);
+ *pb16.data() = *pb0.data(1, 1);
+ continue;
+ }
+ }
+ else
+ vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8;
+ texconv(&pb16, (u8*)&vram[vram_addr], 1 << i, 1 << i);
+ }
+ pb16.set_mipmap(0);
+ }
+ else
+ {
+ pb16.init(w, h);
+ texconv(&pb16,(u8*)&vram[sa],stride,h);
+ }
temp_tex_buffer = pb16.data();
}
else
@@ -692,6 +708,7 @@ void BaseTextureCacheData::Update()
pb16.init(w, h);
memset(pb16.data(), 0x80, w * h * 2);
temp_tex_buffer = pb16.data();
+ mipmapped = false;
}
// Restore the original texture height if it was constrained to VRAM limits above
h = original_h;
@@ -699,7 +716,7 @@ void BaseTextureCacheData::Update()
//lock the texture to detect changes in it
lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this);
- UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer);
+ UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer, mipmapped);
if (settings.rend.DumpTextures)
{
ComputeHash();
@@ -713,7 +730,7 @@ void BaseTextureCacheData::CheckCustomTexture()
if (custom_load_in_progress == 0 && custom_image_data != NULL)
{
tex_type = TextureType::_8888;
- UploadToGPU(custom_width, custom_height, custom_image_data);
+ UploadToGPU(custom_width, custom_height, custom_image_data, false);
delete [] custom_image_data;
custom_image_data = NULL;
}
diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h
index 41021af1d..fae7e3db7 100644
--- a/core/rend/TexCache.h
+++ b/core/rend/TexCache.h
@@ -17,32 +17,46 @@ extern u32 pal_hash_256[4];
extern u32 pal_hash_16[64];
extern bool KillTex;
-extern u32 detwiddle[2][8][1024];
+extern u32 detwiddle[1024];
template
class PixelBuffer
{
- pixel_type* p_buffer_start;
- pixel_type* p_current_line;
- pixel_type* p_current_pixel;
+ pixel_type* p_buffer_start = nullptr;
+ pixel_type* p_current_mipmap = nullptr;
+ pixel_type* p_current_line = nullptr;
+ pixel_type* p_current_pixel = nullptr;
u32 pixels_per_line = 0;
public:
- PixelBuffer()
- {
- p_buffer_start = p_current_line = p_current_pixel = NULL;
- }
-
~PixelBuffer()
{
deinit();
}
+ void init(u32 width, u32 height, bool mipmapped)
+ {
+ deinit();
+ size_t size = width * height * sizeof(pixel_type);
+ if (mipmapped)
+ {
+ do
+ {
+ width /= 2;
+ height /= 2;
+ size += width * height * sizeof(pixel_type);
+ }
+ while (width != 0 && height != 0);
+ }
+ p_buffer_start = p_current_line = p_current_pixel = p_current_mipmap = (pixel_type *)malloc(size);
+ this->pixels_per_line = 1;
+ }
+
void init(u32 width, u32 height)
{
deinit();
- p_buffer_start = p_current_line = p_current_pixel = (pixel_type *)malloc(width * height * sizeof(pixel_type));
+ p_buffer_start = p_current_line = p_current_pixel = p_current_mipmap = (pixel_type *)malloc(width * height * sizeof(pixel_type));
this->pixels_per_line = width;
}
@@ -51,47 +65,56 @@ public:
if (p_buffer_start != NULL)
{
free(p_buffer_start);
- p_buffer_start = p_current_line = p_current_pixel = NULL;
+ p_buffer_start = p_current_mipmap = p_current_line = p_current_pixel = NULL;
}
}
void steal_data(PixelBuffer &buffer)
{
deinit();
- p_buffer_start = p_current_line = p_current_pixel = buffer.p_buffer_start;
+ p_buffer_start = p_current_mipmap = p_current_line = p_current_pixel = buffer.p_buffer_start;
pixels_per_line = buffer.pixels_per_line;
- buffer.p_buffer_start = buffer.p_current_line = buffer.p_current_pixel = NULL;
+ buffer.p_buffer_start = p_current_mipmap = buffer.p_current_line = buffer.p_current_pixel = NULL;
+ }
+
+ void set_mipmap(int level)
+ {
+ size_t offset = 0;
+ for (int i = 0; i < level; i++)
+ offset += (1 << (2 * i));
+ p_current_mipmap = p_current_line = p_current_pixel = p_buffer_start + offset;
+ pixels_per_line = 1 << level;
}
__forceinline pixel_type *data(u32 x = 0, u32 y = 0)
{
- return p_buffer_start + pixels_per_line * y + x;
+ return p_current_mipmap + pixels_per_line * y + x;
}
- __forceinline void prel(u32 x,pixel_type value)
+ __forceinline void prel(u32 x, pixel_type value)
{
- p_current_pixel[x]=value;
+ p_current_pixel[x] = value;
}
- __forceinline void prel(u32 x,u32 y,pixel_type value)
+ __forceinline void prel(u32 x, u32 y, pixel_type value)
{
- p_current_pixel[y*pixels_per_line+x]=value;
+ p_current_pixel[y * pixels_per_line + x] = value;
}
__forceinline void rmovex(u32 value)
{
- p_current_pixel+=value;
+ p_current_pixel += value;
}
__forceinline void rmovey(u32 value)
{
- p_current_line+=pixels_per_line*value;
- p_current_pixel=p_current_line;
+ p_current_line += pixels_per_line * value;
+ p_current_pixel = p_current_line;
}
- __forceinline void amove(u32 x_m,u32 y_m)
+ __forceinline void amove(u32 x_m, u32 y_m)
{
//p_current_pixel=p_buffer_start;
- p_current_line=p_buffer_start+pixels_per_line*y_m;
- p_current_pixel=p_current_line + x_m;
+ p_current_line = p_current_mipmap + pixels_per_line * y_m;
+ p_current_pixel = p_current_line + x_m;
}
};
@@ -145,8 +168,6 @@ __forceinline u32 YUV422(s32 Y,s32 Yu,s32 Yv)
return PixelPacker::packRGB(clamp(0,255,R),clamp(0,255,G),clamp(0,255,B));
}
-#define twop(x,y,bcx,bcy) (detwiddle[0][bcy][x]+detwiddle[1][bcx][y])
-
//pixel packers !
struct pp_565
{
@@ -496,24 +517,23 @@ void texture_PL(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height)
}
}
+static inline u32 get_tw_texel_position(u32 x, u32 y)
+{
+ return detwiddle[y] | detwiddle[x] << 1;
+}
+
template
void texture_TW(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height)
{
pb->amove(0,0);
- const u32 divider=PixelConvertor::xpp*PixelConvertor::ypp;
+ const u32 divider = PixelConvertor::xpp * PixelConvertor::ypp;
- unsigned long bcx_,bcy_;
- bcx_=bitscanrev(Width);
- bcy_=bitscanrev(Height);
- const u32 bcx=bcx_-3;
- const u32 bcy=bcy_-3;
-
- for (u32 y=0;yrmovex(PixelConvertor::xpp);
@@ -528,18 +548,14 @@ void texture_VQ(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height)
p_in+=256*4*2;
pb->amove(0,0);
- const u32 divider=PixelConvertor::xpp*PixelConvertor::ypp;
- unsigned long bcx_,bcy_;
- bcx_=bitscanrev(Width);
- bcy_=bitscanrev(Height);
- const u32 bcx=bcx_-3;
- const u32 bcy=bcy_-3;
+ Height /= PixelConvertor::ypp;
+ Width /= PixelConvertor::xpp;
- for (u32 y=0;yrmovex(PixelConvertor::xpp);
@@ -670,6 +686,11 @@ struct BaseTextureCacheData
return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8;
}
+ bool IsMipmapped()
+ {
+ return tcw.MipMapped != 0 && tcw.ScanOrder == 0;
+ }
+
const char* GetPixelFormatName()
{
switch (tcw.PixelFmt)
@@ -688,7 +709,7 @@ struct BaseTextureCacheData
void Create();
void ComputeHash();
void Update();
- virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) = 0;
+ virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) = 0;
virtual bool Force32BitTexture(TextureType type) const { return false; }
void CheckCustomTexture();
//true if : dirty or paletted texture and hashes don't match
diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp
index 29de1697c..89ec73fe9 100644
--- a/core/rend/gl4/gles.cpp
+++ b/core/rend/gl4/gles.cpp
@@ -501,8 +501,6 @@ static bool gl_create_resources()
// Assume the resources have already been created
return true;
- findGLVersion();
-
//create vao
glGenVertexArrays(1, &gl4.vbo.main_vao);
glGenVertexArrays(1, &gl4.vbo.modvol_vao);
@@ -538,11 +536,8 @@ extern void gl4CreateTextures(int width, int height);
static bool gles_init()
{
- int major = 0;
- int minor = 0;
- glGetIntegerv(GL_MAJOR_VERSION, &major);
- glGetIntegerv(GL_MINOR_VERSION, &minor);
- if (major < 4 || (major == 4 && minor < 3))
+ findGLVersion();
+ if (gl.gl_major < 4 || (gl.gl_major == 4 && gl.gl_minor < 3))
{
WARN_LOG(RENDERER, "Warning: OpenGL version doesn't support per-pixel sorting.");
return false;
diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp
index 5e21fc105..af7e5d143 100644
--- a/core/rend/gles/gles.cpp
+++ b/core/rend/gles/gles.cpp
@@ -469,6 +469,7 @@ void findGLVersion()
{
gl.index_type = GL_UNSIGNED_INT;
gl.gl_major = theGLContext.GetMajorVersion();
+ gl.gl_minor = theGLContext.GetMinorVersion();
gl.is_gles = theGLContext.IsGLES();
if (gl.is_gles)
{
diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h
index f6451ba66..2da72ad96 100755
--- a/core/rend/gles/gles.h
+++ b/core/rend/gles/gles.h
@@ -95,6 +95,7 @@ struct gl_ctx
const char *gl_version;
const char *glsl_version_header;
int gl_major;
+ int gl_minor;
bool is_gles;
GLuint fog_image_format;
GLenum index_type;
@@ -200,7 +201,7 @@ struct TextureCacheData : BaseTextureCacheData
GLuint texID; //gl texture
u16* pData;
virtual std::string GetId() override { return std::to_string(texID); }
- virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) override;
+ virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) override;
virtual bool Delete() override;
};
diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp
index d8825d432..4622e81ba 100644
--- a/core/rend/gles/gltex.cpp
+++ b/core/rend/gles/gltex.cpp
@@ -73,7 +73,7 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) {
free(rows);
}
-void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer)
+void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped)
{
if (texID != 0)
{
@@ -100,9 +100,66 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer)
die("Unsupported texture type");
break;
}
- glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer);
- if (tcw.MipMapped && settings.rend.UseMipmaps)
- glGenerateMipmap(GL_TEXTURE_2D);
+ if (mipmapped)
+ {
+ int mipmapLevels = 0;
+ int dim = width;
+ while (dim != 0)
+ {
+ mipmapLevels++;
+ dim >>= 1;
+ }
+#ifndef GLES2
+ // Open GL 4.2 or GLES 3.0 min
+ if (gl.gl_major > 4 || (gl.gl_major == 4 && gl.gl_minor >= 2)
+ || (gl.is_gles && gl.gl_major >= 3))
+ {
+ GLuint internalFormat;
+ switch (tex_type)
+ {
+ case TextureType::_5551:
+ internalFormat = GL_RGB5_A1;
+ break;
+ case TextureType::_565:
+ internalFormat = GL_RGB565;
+ break;
+ case TextureType::_4444:
+ internalFormat = GL_RGBA4;
+ break;
+ case TextureType::_8888:
+ internalFormat = GL_RGBA8;
+ break;
+ }
+ if (Updates == 1)
+ {
+ glTexStorage2D(GL_TEXTURE_2D, mipmapLevels, internalFormat, width, height);
+ glCheck();
+ }
+ for (int i = 0; i < mipmapLevels; i++)
+ {
+ glTexSubImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, 0, 0, 1 << i, 1 << i, comps, gltype, temp_tex_buffer);
+ temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2);
+ }
+ }
+ else
+#endif
+ {
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, mipmapLevels - 1);
+ for (int i = 0; i < mipmapLevels; i++)
+ {
+ glTexImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, comps, 1 << i, 1 << i, 0, comps, gltype, temp_tex_buffer);
+ temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2);
+ }
+ }
+ }
+ else
+ {
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer);
+ }
+ glCheck();
}
else {
#if FEAT_HAS_SOFTREND
diff --git a/core/rend/vulkan/oit/oit_renderer.cpp b/core/rend/vulkan/oit/oit_renderer.cpp
index bcc0607f0..68a859592 100644
--- a/core/rend/vulkan/oit/oit_renderer.cpp
+++ b/core/rend/vulkan/oit/oit_renderer.cpp
@@ -65,7 +65,7 @@ public:
vjoyTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice());
vjoyTexture->SetDevice(GetContext()->GetDevice());
vjoyTexture->SetCommandBuffer(texCommandPool.Allocate());
- vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data);
+ vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data, false);
vjoyTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
delete [] image_data;
@@ -135,7 +135,7 @@ public:
curTexture->SetDevice(GetContext()->GetDevice());
}
curTexture->SetCommandBuffer(texCommandPool.Allocate());
- curTexture->UploadToGPU(width, height, (u8*)pb.data());
+ curTexture->UploadToGPU(width, height, (u8*)pb.data(), false);
curTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
@@ -287,7 +287,7 @@ private:
MakeFogTexture(texData);
fogTexture->SetCommandBuffer(texCommandPool.Allocate());
- fogTexture->UploadToGPU(128, 2, texData);
+ fogTexture->UploadToGPU(128, 2, texData, false);
fogTexture->SetCommandBuffer(nullptr);
}
diff --git a/core/rend/vulkan/texture.cpp b/core/rend/vulkan/texture.cpp
index 2e4038632..fd3c3cbf8 100644
--- a/core/rend/vulkan/texture.cpp
+++ b/core/rend/vulkan/texture.cpp
@@ -143,7 +143,7 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk:
commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, nullptr, nullptr, imageMemoryBarrier);
}
-void Texture::UploadToGPU(int width, int height, u8 *data)
+void Texture::UploadToGPU(int width, int height, u8 *data, bool mipmapped)
{
vk::Format format;
u32 dataSize = width * height * 2;
@@ -167,20 +167,31 @@ void Texture::UploadToGPU(int width, int height, u8 *data)
dataSize /= 2;
break;
}
+ if (mipmapped)
+ {
+ int w = width / 2;
+ u32 size = dataSize / 4;
+ while (w)
+ {
+ dataSize += size;
+ size /= 4;
+ w /= 2;
+ }
+ }
bool isNew = true;
if (width != extent.width || height != extent.height || format != this->format)
- Init(width, height, format);
+ Init(width, height, format, dataSize);
else
isNew = false;
SetImage(dataSize, data, isNew);
}
-void Texture::Init(u32 width, u32 height, vk::Format format)
+void Texture::Init(u32 width, u32 height, vk::Format format, u32 dataSize)
{
this->extent = vk::Extent2D(width, height);
this->format = format;
mipmapLevels = 1;
- if (tcw.MipMapped && settings.rend.UseMipmaps)
+ if (IsMipmapped() && settings.rend.UseMipmaps)
mipmapLevels += floor(log2(std::max(width, height)));
vk::FormatProperties formatProperties = physicalDevice.getFormatProperties(format);
@@ -195,7 +206,7 @@ void Texture::Init(u32 width, u32 height, vk::Format format)
vk::ImageUsageFlags usageFlags = vk::ImageUsageFlagBits::eSampled;
if (needsStaging)
{
- stagingBufferData = std::unique_ptr(new BufferData(extent.width * extent.height * 4, vk::BufferUsageFlagBits::eTransferSrc));
+ stagingBufferData = std::unique_ptr(new BufferData(dataSize, vk::BufferUsageFlagBits::eTransferSrc));
usageFlags |= vk::ImageUsageFlagBits::eTransferDst;
initialLayout = vk::ImageLayout::eUndefined;
requirements = vk::MemoryPropertyFlagBits::eDeviceLocal;
@@ -206,8 +217,6 @@ void Texture::Init(u32 width, u32 height, vk::Format format)
initialLayout = vk::ImageLayout::ePreinitialized;
requirements = vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible;
}
- if (mipmapLevels > 1)
- usageFlags |= vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst;
CreateImage(imageTiling, usageFlags, initialLayout, requirements, vk::ImageAspectFlagBits::eColor);
}
@@ -252,77 +261,34 @@ void Texture::SetImage(u32 srcSize, void *srcData, bool isNew)
// Since we're going to blit to the texture image, set its layout to eTransferDstOptimal
setImageLayout(commandBuffer, image.get(), format, mipmapLevels, isNew ? vk::ImageLayout::eUndefined : vk::ImageLayout::eShaderReadOnlyOptimal,
vk::ImageLayout::eTransferDstOptimal);
- vk::BufferImageCopy copyRegion(0, extent.width, extent.height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), vk::Offset3D(0, 0, 0), vk::Extent3D(extent, 1));
- commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion);
if (mipmapLevels > 1)
- GenerateMipmaps();
+ {
+ vk::DeviceSize bufferOffset = 0;
+ for (int i = 0; i < mipmapLevels; i++)
+ {
+ vk::BufferImageCopy copyRegion(bufferOffset, 1 << i, 1 << i, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, mipmapLevels - i - 1, 0, 1),
+ vk::Offset3D(0, 0, 0), vk::Extent3D(1 << i, 1 << i, 1));
+ commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion);
+ bufferOffset += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2);
+ }
+ }
else
- // Set the layout for the texture image from eTransferDstOptimal to SHADER_READ_ONLY
- setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal);
+ {
+ vk::BufferImageCopy copyRegion(0, extent.width, extent.height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
+ vk::Offset3D(0, 0, 0), vk::Extent3D(extent, 1));
+ commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion);
+ }
+ // Set the layout for the texture image from eTransferDstOptimal to SHADER_READ_ONLY
+ setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal);
}
else
{
- if (mipmapLevels > 1)
- GenerateMipmaps();
- else
- // If we can use the linear tiled image as a texture, just do it
- setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal);
+ // If we can use the linear tiled image as a texture, just do it
+ setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal);
}
commandBuffer.end();
}
-void Texture::GenerateMipmaps()
-{
- u32 mipWidth = extent.width;
- u32 mipHeight = extent.height;
- vk::ImageMemoryBarrier barrier(vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead,
- vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
- *image, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
-
- for (int i = 1; i < mipmapLevels; i++)
- {
- // Transition previous mipmap level from dst optimal/preinit to src optimal
- barrier.subresourceRange.baseMipLevel = i - 1;
- if (i == 1 && !needsStaging)
- {
- barrier.oldLayout = vk::ImageLayout::ePreinitialized;
- barrier.srcAccessMask = vk::AccessFlagBits::eHostWrite;
- }
- else
- {
- barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal;
- barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite;
- }
- barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal;
- barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead;
- commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, nullptr, nullptr, barrier);
-
- // Blit previous mipmap level on current
- vk::ImageBlit blit(vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, i - 1, 0, 1),
- { { vk::Offset3D(0, 0, 0), vk::Offset3D(mipWidth, mipHeight, 1) } },
- vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, i, 0, 1),
- { { vk::Offset3D(0, 0, 0), vk::Offset3D(std::max(mipWidth / 2, 1u), std::max(mipHeight / 2, 1u), 1) } });
- commandBuffer.blitImage(*image, vk::ImageLayout::eTransferSrcOptimal, *image, vk::ImageLayout::eTransferDstOptimal, 1, &blit, vk::Filter::eLinear);
-
- // Transition previous mipmap level from src optimal to shader read-only optimal
- barrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal;
- barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal;
- barrier.srcAccessMask = vk::AccessFlagBits::eTransferRead;
- barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
- commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eFragmentShader, {}, nullptr, nullptr, barrier);
-
- mipWidth = std::max(mipWidth / 2, 1u);
- mipHeight = std::max(mipHeight / 2, 1u);
- }
- // Transition last mipmap level from dst optimal to shader read-only optimal
- barrier.subresourceRange.baseMipLevel = mipmapLevels - 1;
- barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal;
- barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal;
- barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite;
- barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
- commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eFragmentShader, {}, nullptr, nullptr, barrier);
-}
-
void FramebufferAttachment::Init(u32 width, u32 height, vk::Format format, vk::ImageUsageFlags usage)
{
this->format = format;
diff --git a/core/rend/vulkan/texture.h b/core/rend/vulkan/texture.h
index 4dfc76961..ec3b30cdb 100644
--- a/core/rend/vulkan/texture.h
+++ b/core/rend/vulkan/texture.h
@@ -30,7 +30,7 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk:
struct Texture : BaseTextureCacheData
{
- void UploadToGPU(int width, int height, u8 *data) override;
+ void UploadToGPU(int width, int height, u8 *data, bool mipmapped) override;
u64 GetIntId() { return (u64)reinterpret_cast(this); }
std::string GetId() override { char s[20]; sprintf(s, "%p", this); return s; }
bool IsNew() const { return !image.get(); }
@@ -43,11 +43,10 @@ struct Texture : BaseTextureCacheData
void SetDevice(vk::Device device) { this->device = device; }
private:
- void Init(u32 width, u32 height, vk::Format format);
+ void Init(u32 width, u32 height, vk::Format format ,u32 dataSize);
void SetImage(u32 size, void *data, bool isNew);
void CreateImage(vk::ImageTiling tiling, vk::ImageUsageFlags usage, vk::ImageLayout initialLayout,
vk::MemoryPropertyFlags memoryProperties, vk::ImageAspectFlags aspectMask);
- void GenerateMipmaps();
vk::Format format = vk::Format::eUndefined;
vk::Extent2D extent;
diff --git a/core/rend/vulkan/vmu.cpp b/core/rend/vulkan/vmu.cpp
index 63b8f0211..71898e4e3 100644
--- a/core/rend/vulkan/vmu.cpp
+++ b/core/rend/vulkan/vmu.cpp
@@ -51,7 +51,7 @@ const std::vector* VulkanVMUs::PrepareVMUs(vk::CommandP
VulkanContext::Instance()->GetDevice().allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool, vk::CommandBufferLevel::ePrimary, 1))
.front()));
texture->SetCommandBuffer(*commandBuffers[context->GetCurrentImageIndex()].back());
- texture->UploadToGPU(48, 32, (u8*)vmu_lcd_data[i]);
+ texture->UploadToGPU(48, 32, (u8*)vmu_lcd_data[i], false);
texture->SetCommandBuffer(nullptr);
vmu_lcd_changed[i] = false;
}
diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp
index f94ee339c..f80c0d8c2 100644
--- a/core/rend/vulkan/vulkan_renderer.cpp
+++ b/core/rend/vulkan/vulkan_renderer.cpp
@@ -61,7 +61,7 @@ public:
vjoyTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice());
vjoyTexture->SetDevice(GetContext()->GetDevice());
vjoyTexture->SetCommandBuffer(texCommandPool.Allocate());
- vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data);
+ vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data, false);
vjoyTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
delete [] image_data;
@@ -122,7 +122,7 @@ public:
curTexture->SetDevice(GetContext()->GetDevice());
}
curTexture->SetCommandBuffer(texCommandPool.Allocate());
- curTexture->UploadToGPU(width, height, (u8*)pb.data());
+ curTexture->UploadToGPU(width, height, (u8*)pb.data(), false);
curTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
@@ -275,7 +275,7 @@ private:
MakeFogTexture(texData);
fogTexture->SetCommandBuffer(texCommandPool.Allocate());
- fogTexture->UploadToGPU(128, 2, texData);
+ fogTexture->UploadToGPU(128, 2, texData, false);
fogTexture->SetCommandBuffer(nullptr);
}
diff --git a/core/stdclass.h b/core/stdclass.h
index f3e5a85a7..912d667c7 100644
--- a/core/stdclass.h
+++ b/core/stdclass.h
@@ -118,6 +118,9 @@ public :
pthread_mutex_unlock(&mutx);
#endif
}
+ // std::BasicLockable so we can use std::lock_guard
+ void lock() { Lock(); }
+ void unlock() { Unlock(); }
};
#if !defined(TARGET_IPHONE)
diff --git a/core/wsi/gl_context.cpp b/core/wsi/gl_context.cpp
index b36d31ae1..b0b1fc808 100644
--- a/core/wsi/gl_context.cpp
+++ b/core/wsi/gl_context.cpp
@@ -29,6 +29,10 @@ void GLGraphicsContext::findGLVersion()
glGetIntegerv(GL_MAJOR_VERSION, &majorVersion);
if (glGetError() == GL_INVALID_ENUM)
majorVersion = 2;
+ else
+ {
+ glGetIntegerv(GL_MINOR_VERSION, &minorVersion);
+ }
const char *version = (const char *)glGetString(GL_VERSION);
isGLES = !strncmp(version, "OpenGL ES", 9);
INFO_LOG(RENDERER, "OpenGL version: %s", version);
diff --git a/core/wsi/gl_context.h b/core/wsi/gl_context.h
index eda891299..0faec3413 100644
--- a/core/wsi/gl_context.h
+++ b/core/wsi/gl_context.h
@@ -29,6 +29,7 @@ class GLGraphicsContext
{
public:
int GetMajorVersion() const { return majorVersion; }
+ int GetMinorVersion() const { return minorVersion; }
bool IsGLES() const { return isGLES; }
protected:
@@ -38,6 +39,7 @@ protected:
private:
int majorVersion = 0;
+ int minorVersion = 0;
bool isGLES = false;
};