upload all texture mipmap levels to gpu

implemented for gl/gl4/vulkan/oit
don't auto-generate mipmaps in vulkan
simpler/smaller detwiddle table
use std::lock_guard with cMutex
This commit is contained in:
Flyinghead 2020-02-07 16:55:32 +01:00
parent 49c6a55f24
commit 9826afa063
15 changed files with 362 additions and 295 deletions

View File

@ -19,6 +19,7 @@
along with reicast. If not, see <https://www.gnu.org/licenses/>.
*/
#include <unordered_set>
#include <mutex>
#include "build.h"
#include "vmem32.h"
#include "_vmem.h"
@ -267,20 +268,20 @@ static u32 vmem32_map_mmu(u32 address, bool write)
u32 end = start + page_size;
const vector<vram_lock>& blocks = vram_blocks[start / VRAM_PROT_SEGMENT];
vramlist_lock.Lock();
for (int i = blocks.size() - 1; i >= 0; i--)
{
if (blocks[i].start < end && blocks[i].end >= start)
std::lock_guard<cMutex> lock(vramlist_lock);
for (int i = blocks.size() - 1; i >= 0; i--)
{
u32 prot_start = max(start, blocks[i].start);
u32 prot_size = min(end, blocks[i].end + 1) - prot_start;
prot_size += prot_start % PAGE_SIZE;
prot_start &= ~PAGE_MASK;
vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size);
if (blocks[i].start < end && blocks[i].end >= start)
{
u32 prot_start = max(start, blocks[i].start);
u32 prot_size = min(end, blocks[i].end + 1) - prot_start;
prot_size += prot_start % PAGE_SIZE;
prot_start &= ~PAGE_MASK;
vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size);
}
}
}
vramlist_lock.Unlock();
}
else if (offset >= MAP_RAM_START_OFFSET && offset < MAP_RAM_START_OFFSET + RAM_SIZE)
{

View File

@ -1,4 +1,5 @@
#include <algorithm>
#include <mutex>
#ifndef TARGET_NO_OPENMP
#include <omp.h>
#endif
@ -22,60 +23,23 @@ u32 palette32_ram[1024];
u32 pal_hash_256[4];
u32 pal_hash_16[64];
u32 detwiddle[2][8][1024];
//input : address in the yyyyyxxxxx format
//output : address in the xyxyxyxy format
//U : x resolution , V : y resolution
//twiddle works on 64b words
u32 detwiddle[1024];
static u32 twiddle_slow(u32 x,u32 y,u32 x_sz,u32 y_sz)
void BuildTwiddleTable()
{
u32 rv=0;//low 2 bits are directly passed -> needs some misc stuff to work.However
//Pvr internally maps the 64b banks "as if" they were twiddled :p
u32 sh=0;
x_sz>>=1;
y_sz>>=1;
while(x_sz!=0 || y_sz!=0)
{
if (y_sz)
{
u32 temp=y&1;
rv|=temp<<sh;
y_sz>>=1;
y>>=1;
sh++;
}
if (x_sz)
{
u32 temp=x&1;
rv|=temp<<sh;
x_sz>>=1;
x>>=1;
sh++;
}
}
return rv;
for (u32 j = 0; j < ARRAY_SIZE(detwiddle); j++)
{
u32 detwiddled = 0;
for (int i = 0; i < 10; i++)
{
u32 shift = 1 << i;
detwiddled |= ((j & shift) << i);
}
detwiddle[j] = detwiddled;
}
}
static void BuildTwiddleTables()
{
for (u32 s=0;s<8;s++)
{
u32 x_sz=1024;
u32 y_sz=8<<s;
for (u32 i=0;i<x_sz;i++)
{
detwiddle[0][s][i]=twiddle_slow(i,0,x_sz,y_sz);
detwiddle[1][s][i]=twiddle_slow(0,i,y_sz,x_sz);
}
}
}
static OnLoad btt(&BuildTwiddleTables);
static OnLoad btt(&BuildTwiddleTable);
void palette_update()
{
@ -196,12 +160,10 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user
block->type=64;
{
vramlist_lock.Lock();
std::lock_guard<cMutex> lock(vramlist_lock);
// This also protects vram if needed
vramlock_list_add(block);
vramlist_lock.Unlock();
}
return block;
@ -216,7 +178,7 @@ bool VramLockedWriteOffset(size_t offset)
vector<vram_block *>& list = VramLocks[addr_hash];
{
vramlist_lock.Lock();
std::lock_guard<cMutex> lock(vramlist_lock);
for (size_t i = 0; i < list.size(); i++)
{
@ -235,8 +197,6 @@ bool VramLockedWriteOffset(size_t offset)
list.clear();
_vmem_unprotect_vram((u32)(offset & ~PAGE_MASK), PAGE_SIZE);
vramlist_lock.Unlock();
}
return true;
@ -254,9 +214,8 @@ bool VramLockedWrite(u8* address)
//also frees the handle
void libCore_vramlock_Unlock_block(vram_block* block)
{
vramlist_lock.Lock();
std::lock_guard<cMutex> lock(vramlist_lock);
libCore_vramlock_Unlock_block_wb(block);
vramlist_lock.Unlock();
}
void libCore_vramlock_Unlock_block_wb(vram_block* block)
@ -409,8 +368,11 @@ static const PvrTexInfo format[8] =
{"ns/1555", 0}, // Not supported (1555)
};
static const u32 MipPoint[8] =
static const u32 VQMipPoint[11] =
{
0x00000,//1
0x00001,//2
0x00002,//4
0x00006,//8
0x00016,//16
0x00056,//32
@ -420,6 +382,20 @@ static const u32 MipPoint[8] =
0x05556,//512
0x15556//1024
};
static const u32 OtherMipPoint[11] =
{
0x00003,//1
0x00004,//2
0x00008,//4
0x00018,//8
0x00058,//16
0x00158,//32
0x00558,//64
0x01558,//128
0x05558,//256
0x15558,//512
0x55558//1024
};
static const TextureType PAL_TYPE[4] = {
TextureType::_5551, TextureType::_565, TextureType::_4444, TextureType::_8888
@ -496,71 +472,50 @@ void BaseTextureCacheData::Create()
else if (tex->bpp == 8)
palette_index = (tcw.PalSelect >> 4) << 8;
//VQ table (if VQ tex)
if (tcw.VQ_Comp)
vq_codebook = sa;
//Convert a pvr texture into OpenGL
switch (tcw.PixelFmt)
if (tcw.ScanOrder && (tex->PL || tex->PL32))
{
//Texture is stored 'planar' in memory, no deswizzle is needed
//verify(tcw.VQ_Comp==0);
if (tcw.VQ_Comp != 0)
WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
case Pixel1555: //0 1555 value: 1 bit; RGB values: 5 bits each
case PixelReserved: //7 Reserved Regarded as 1555
case Pixel565: //1 565 R value: 5 bits; G value: 6 bits; B value: 5 bits
case Pixel4444: //2 4444 value: 4 bits; RGB values: 4 bits each
case PixelYUV: //3 YUV422 32 bits per 2 pixels; YUYV values: 8 bits each
case PixelBumpMap: //4 Bump Map 16 bits/pixel; S value: 8 bits; R value: 8 bits
case PixelPal4: //5 4 BPP Palette Palette texture with 4 bits/pixel
case PixelPal8: //6 8 BPP Palette Palette texture with 8 bits/pixel
if (tcw.ScanOrder && (tex->PL || tex->PL32))
//Planar textures support stride selection, mostly used for non power of 2 textures (videos)
int stride = w;
if (tcw.StrideSel)
stride = (TEXT_CONTROL & 31) * 32;
//Call the format specific conversion code
texconv = tex->PL;
texconv32 = tex->PL32;
//calculate the size, in bytes, for the locking
size = stride * h * tex->bpp / 8;
}
else
{
// Quake 3 Arena uses one
if (tcw.MipMapped)
// Mipmapped texture must be square and TexV is ignored
h = w;
if (tcw.VQ_Comp)
{
//Texture is stored 'planar' in memory, no deswizzle is needed
//verify(tcw.VQ_Comp==0);
if (tcw.VQ_Comp != 0)
WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
//Planar textures support stride selection, mostly used for non power of 2 textures (videos)
int stride = w;
if (tcw.StrideSel)
stride = (TEXT_CONTROL & 31) * 32;
//Call the format specific conversion code
texconv = tex->PL;
texconv32 = tex->PL32;
//calculate the size, in bytes, for the locking
size = stride * h * tex->bpp / 8;
verify(tex->VQ != NULL || tex->VQ32 != NULL);
vq_codebook = sa;
if (tcw.MipMapped)
sa += VQMipPoint[tsp.TexU + 3];
texconv = tex->VQ;
texconv32 = tex->VQ32;
size = w * h / 8;
}
else
{
// Quake 3 Arena uses one. Not sure if valid but no need to crash
//verify(w == h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN*
if (tcw.VQ_Comp)
{
verify(tex->VQ != NULL || tex->VQ32 != NULL);
vq_codebook = sa;
if (tcw.MipMapped)
sa += MipPoint[tsp.TexU];
texconv = tex->VQ;
texconv32 = tex->VQ32;
size = w * h / 8;
}
else
{
verify(tex->TW != NULL || tex->TW32 != NULL);
if (tcw.MipMapped)
sa += MipPoint[tsp.TexU] * tex->bpp / 2;
texconv = tex->TW;
texconv32 = tex->TW32;
size = w * h * tex->bpp / 8;
}
verify(tex->TW != NULL || tex->TW32 != NULL);
if (tcw.MipMapped)
sa += OtherMipPoint[tsp.TexU + 3] * tex->bpp / 8;
texconv = tex->TW;
texconv32 = tex->TW32;
size = w * h * tex->bpp / 8;
}
break;
default:
WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt);
size = w * h * 2;
texconv = NULL;
texconv32 = NULL;
}
}
@ -631,58 +586,119 @@ void BaseTextureCacheData::Update()
PixelBuffer<u32> pb32;
// Figure out if we really need to use a 32-bit pixel buffer
bool textureUpscaling = settings.rend.TextureUpscale > 1
// Don't process textures that are too big
&& w * h <= settings.rend.MaxFilteredTextureSize * settings.rend.MaxFilteredTextureSize
// Don't process YUV textures
&& tcw.PixelFmt != PixelYUV;
bool need_32bit_buffer = true;
if ((settings.rend.TextureUpscale <= 1
|| w * h > settings.rend.MaxFilteredTextureSize
* settings.rend.MaxFilteredTextureSize // Don't process textures that are too big
|| tcw.PixelFmt == PixelYUV) // Don't process YUV textures
if (!textureUpscaling
&& (!IsPaletted() || tex_type != TextureType::_8888)
&& texconv != NULL
&& !Force32BitTexture(tex_type))
need_32bit_buffer = false;
// TODO avoid upscaling/depost. textures that change too often
bool mipmapped = IsMipmapped() && settings.rend.UseMipmaps;
if (texconv32 != NULL && need_32bit_buffer)
{
if (textureUpscaling)
// don't use mipmaps if upscaling
mipmapped = false;
// Force the texture type since that's the only 32-bit one we know
tex_type = TextureType::_8888;
pb32.init(w, h);
texconv32(&pb32, (u8*)&vram[sa], stride, h);
if (mipmapped)
{
pb32.init(w, h, true);
for (int i = 0; i <= tsp.TexU + 3; i++)
{
pb32.set_mipmap(i);
u32 vram_addr;
if (tcw.VQ_Comp)
{
vram_addr = sa_tex + VQMipPoint[i];
if (i == 0)
{
PixelBuffer<u32> pb0;
pb0.init(2, 2 ,false);
texconv32(&pb0, (u8*)&vram[vram_addr], 2, 2);
*pb32.data() = *pb0.data(1, 1);
continue;
}
}
else
vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8;
texconv32(&pb32, (u8*)&vram[vram_addr], 1 << i, 1 << i);
}
pb32.set_mipmap(0);
}
else
{
pb32.init(w, h);
texconv32(&pb32, (u8*)&vram[sa], stride, h);
#ifdef DEPOSTERIZE
{
// Deposterization
PixelBuffer<u32> tmp_buf;
tmp_buf.init(w, h);
{
// Deposterization
PixelBuffer<u32> tmp_buf;
tmp_buf.init(w, h);
DePosterize(pb32.data(), tmp_buf.data(), w, h);
pb32.steal_data(tmp_buf);
}
DePosterize(pb32.data(), tmp_buf.data(), w, h);
pb32.steal_data(tmp_buf);
}
#endif
// xBRZ scaling
if (settings.rend.TextureUpscale > 1)
{
PixelBuffer<u32> tmp_buf;
tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale);
// xBRZ scaling
if (textureUpscaling)
{
PixelBuffer<u32> tmp_buf;
tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale);
if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444)
// Alpha channel formats. Palettes with alpha are already handled
has_alpha = true;
UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha);
pb32.steal_data(tmp_buf);
upscaled_w *= settings.rend.TextureUpscale;
upscaled_h *= settings.rend.TextureUpscale;
if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444)
// Alpha channel formats. Palettes with alpha are already handled
has_alpha = true;
UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha);
pb32.steal_data(tmp_buf);
upscaled_w *= settings.rend.TextureUpscale;
upscaled_h *= settings.rend.TextureUpscale;
}
}
temp_tex_buffer = pb32.data();
}
else if (texconv != NULL)
{
pb16.init(w, h);
texconv(&pb16,(u8*)&vram[sa],stride,h);
if (mipmapped)
{
pb16.init(w, h, true);
for (int i = 0; i <= tsp.TexU + 3; i++)
{
pb16.set_mipmap(i);
u32 vram_addr;
if (tcw.VQ_Comp)
{
vram_addr = sa_tex + VQMipPoint[i];
if (i == 0)
{
PixelBuffer<u16> pb0;
pb0.init(2, 2 ,false);
texconv(&pb0, (u8*)&vram[vram_addr], 2, 2);
*pb16.data() = *pb0.data(1, 1);
continue;
}
}
else
vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8;
texconv(&pb16, (u8*)&vram[vram_addr], 1 << i, 1 << i);
}
pb16.set_mipmap(0);
}
else
{
pb16.init(w, h);
texconv(&pb16,(u8*)&vram[sa],stride,h);
}
temp_tex_buffer = pb16.data();
}
else
@ -692,6 +708,7 @@ void BaseTextureCacheData::Update()
pb16.init(w, h);
memset(pb16.data(), 0x80, w * h * 2);
temp_tex_buffer = pb16.data();
mipmapped = false;
}
// Restore the original texture height if it was constrained to VRAM limits above
h = original_h;
@ -699,7 +716,7 @@ void BaseTextureCacheData::Update()
//lock the texture to detect changes in it
lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this);
UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer);
UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer, mipmapped);
if (settings.rend.DumpTextures)
{
ComputeHash();
@ -713,7 +730,7 @@ void BaseTextureCacheData::CheckCustomTexture()
if (custom_load_in_progress == 0 && custom_image_data != NULL)
{
tex_type = TextureType::_8888;
UploadToGPU(custom_width, custom_height, custom_image_data);
UploadToGPU(custom_width, custom_height, custom_image_data, false);
delete [] custom_image_data;
custom_image_data = NULL;
}

View File

@ -17,32 +17,46 @@ extern u32 pal_hash_256[4];
extern u32 pal_hash_16[64];
extern bool KillTex;
extern u32 detwiddle[2][8][1024];
extern u32 detwiddle[1024];
template<class pixel_type>
class PixelBuffer
{
pixel_type* p_buffer_start;
pixel_type* p_current_line;
pixel_type* p_current_pixel;
pixel_type* p_buffer_start = nullptr;
pixel_type* p_current_mipmap = nullptr;
pixel_type* p_current_line = nullptr;
pixel_type* p_current_pixel = nullptr;
u32 pixels_per_line = 0;
public:
PixelBuffer()
{
p_buffer_start = p_current_line = p_current_pixel = NULL;
}
~PixelBuffer()
{
deinit();
}
void init(u32 width, u32 height, bool mipmapped)
{
deinit();
size_t size = width * height * sizeof(pixel_type);
if (mipmapped)
{
do
{
width /= 2;
height /= 2;
size += width * height * sizeof(pixel_type);
}
while (width != 0 && height != 0);
}
p_buffer_start = p_current_line = p_current_pixel = p_current_mipmap = (pixel_type *)malloc(size);
this->pixels_per_line = 1;
}
void init(u32 width, u32 height)
{
deinit();
p_buffer_start = p_current_line = p_current_pixel = (pixel_type *)malloc(width * height * sizeof(pixel_type));
p_buffer_start = p_current_line = p_current_pixel = p_current_mipmap = (pixel_type *)malloc(width * height * sizeof(pixel_type));
this->pixels_per_line = width;
}
@ -51,47 +65,56 @@ public:
if (p_buffer_start != NULL)
{
free(p_buffer_start);
p_buffer_start = p_current_line = p_current_pixel = NULL;
p_buffer_start = p_current_mipmap = p_current_line = p_current_pixel = NULL;
}
}
void steal_data(PixelBuffer &buffer)
{
deinit();
p_buffer_start = p_current_line = p_current_pixel = buffer.p_buffer_start;
p_buffer_start = p_current_mipmap = p_current_line = p_current_pixel = buffer.p_buffer_start;
pixels_per_line = buffer.pixels_per_line;
buffer.p_buffer_start = buffer.p_current_line = buffer.p_current_pixel = NULL;
buffer.p_buffer_start = p_current_mipmap = buffer.p_current_line = buffer.p_current_pixel = NULL;
}
void set_mipmap(int level)
{
size_t offset = 0;
for (int i = 0; i < level; i++)
offset += (1 << (2 * i));
p_current_mipmap = p_current_line = p_current_pixel = p_buffer_start + offset;
pixels_per_line = 1 << level;
}
__forceinline pixel_type *data(u32 x = 0, u32 y = 0)
{
return p_buffer_start + pixels_per_line * y + x;
return p_current_mipmap + pixels_per_line * y + x;
}
__forceinline void prel(u32 x,pixel_type value)
__forceinline void prel(u32 x, pixel_type value)
{
p_current_pixel[x]=value;
p_current_pixel[x] = value;
}
__forceinline void prel(u32 x,u32 y,pixel_type value)
__forceinline void prel(u32 x, u32 y, pixel_type value)
{
p_current_pixel[y*pixels_per_line+x]=value;
p_current_pixel[y * pixels_per_line + x] = value;
}
__forceinline void rmovex(u32 value)
{
p_current_pixel+=value;
p_current_pixel += value;
}
__forceinline void rmovey(u32 value)
{
p_current_line+=pixels_per_line*value;
p_current_pixel=p_current_line;
p_current_line += pixels_per_line * value;
p_current_pixel = p_current_line;
}
__forceinline void amove(u32 x_m,u32 y_m)
__forceinline void amove(u32 x_m, u32 y_m)
{
//p_current_pixel=p_buffer_start;
p_current_line=p_buffer_start+pixels_per_line*y_m;
p_current_pixel=p_current_line + x_m;
p_current_line = p_current_mipmap + pixels_per_line * y_m;
p_current_pixel = p_current_line + x_m;
}
};
@ -145,8 +168,6 @@ __forceinline u32 YUV422(s32 Y,s32 Yu,s32 Yv)
return PixelPacker::packRGB(clamp(0,255,R),clamp(0,255,G),clamp(0,255,B));
}
#define twop(x,y,bcx,bcy) (detwiddle[0][bcy][x]+detwiddle[1][bcx][y])
//pixel packers !
struct pp_565
{
@ -496,24 +517,23 @@ void texture_PL(PixelBuffer<pixel_type>* pb,u8* p_in,u32 Width,u32 Height)
}
}
static inline u32 get_tw_texel_position(u32 x, u32 y)
{
return detwiddle[y] | detwiddle[x] << 1;
}
template<class PixelConvertor, class pixel_type>
void texture_TW(PixelBuffer<pixel_type>* pb,u8* p_in,u32 Width,u32 Height)
{
pb->amove(0,0);
const u32 divider=PixelConvertor::xpp*PixelConvertor::ypp;
const u32 divider = PixelConvertor::xpp * PixelConvertor::ypp;
unsigned long bcx_,bcy_;
bcx_=bitscanrev(Width);
bcy_=bitscanrev(Height);
const u32 bcx=bcx_-3;
const u32 bcy=bcy_-3;
for (u32 y=0;y<Height;y+=PixelConvertor::ypp)
for (u32 y = 0; y < Height; y += PixelConvertor::ypp)
{
for (u32 x=0;x<Width;x+=PixelConvertor::xpp)
for (u32 x = 0; x < Width; x += PixelConvertor::xpp)
{
u8* p = &p_in[(twop(x,y,bcx,bcy)/divider)<<3];
u8* p = &p_in[get_tw_texel_position(x, y) / divider * 8];
PixelConvertor::Convert(pb,p);
pb->rmovex(PixelConvertor::xpp);
@ -528,18 +548,14 @@ void texture_VQ(PixelBuffer<pixel_type>* pb,u8* p_in,u32 Width,u32 Height)
p_in+=256*4*2;
pb->amove(0,0);
const u32 divider=PixelConvertor::xpp*PixelConvertor::ypp;
unsigned long bcx_,bcy_;
bcx_=bitscanrev(Width);
bcy_=bitscanrev(Height);
const u32 bcx=bcx_-3;
const u32 bcy=bcy_-3;
Height /= PixelConvertor::ypp;
Width /= PixelConvertor::xpp;
for (u32 y=0;y<Height;y+=PixelConvertor::ypp)
for (u32 y = 0; y < Height; y++)
{
for (u32 x=0;x<Width;x+=PixelConvertor::xpp)
for (u32 x = 0; x < Width; x++)
{
u8 p = p_in[twop(x,y,bcx,bcy)/divider];
u8 p = p_in[get_tw_texel_position(x, y)];
PixelConvertor::Convert(pb,&vq_codebook[p*8]);
pb->rmovex(PixelConvertor::xpp);
@ -670,6 +686,11 @@ struct BaseTextureCacheData
return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8;
}
bool IsMipmapped()
{
return tcw.MipMapped != 0 && tcw.ScanOrder == 0;
}
const char* GetPixelFormatName()
{
switch (tcw.PixelFmt)
@ -688,7 +709,7 @@ struct BaseTextureCacheData
void Create();
void ComputeHash();
void Update();
virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) = 0;
virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) = 0;
virtual bool Force32BitTexture(TextureType type) const { return false; }
void CheckCustomTexture();
//true if : dirty or paletted texture and hashes don't match

View File

@ -501,8 +501,6 @@ static bool gl_create_resources()
// Assume the resources have already been created
return true;
findGLVersion();
//create vao
glGenVertexArrays(1, &gl4.vbo.main_vao);
glGenVertexArrays(1, &gl4.vbo.modvol_vao);
@ -538,11 +536,8 @@ extern void gl4CreateTextures(int width, int height);
static bool gles_init()
{
int major = 0;
int minor = 0;
glGetIntegerv(GL_MAJOR_VERSION, &major);
glGetIntegerv(GL_MINOR_VERSION, &minor);
if (major < 4 || (major == 4 && minor < 3))
findGLVersion();
if (gl.gl_major < 4 || (gl.gl_major == 4 && gl.gl_minor < 3))
{
WARN_LOG(RENDERER, "Warning: OpenGL version doesn't support per-pixel sorting.");
return false;

View File

@ -469,6 +469,7 @@ void findGLVersion()
{
gl.index_type = GL_UNSIGNED_INT;
gl.gl_major = theGLContext.GetMajorVersion();
gl.gl_minor = theGLContext.GetMinorVersion();
gl.is_gles = theGLContext.IsGLES();
if (gl.is_gles)
{

View File

@ -95,6 +95,7 @@ struct gl_ctx
const char *gl_version;
const char *glsl_version_header;
int gl_major;
int gl_minor;
bool is_gles;
GLuint fog_image_format;
GLenum index_type;
@ -200,7 +201,7 @@ struct TextureCacheData : BaseTextureCacheData
GLuint texID; //gl texture
u16* pData;
virtual std::string GetId() override { return std::to_string(texID); }
virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) override;
virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped) override;
virtual bool Delete() override;
};

View File

@ -73,7 +73,7 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) {
free(rows);
}
void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer)
void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, bool mipmapped)
{
if (texID != 0)
{
@ -100,9 +100,66 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer)
die("Unsupported texture type");
break;
}
glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer);
if (tcw.MipMapped && settings.rend.UseMipmaps)
glGenerateMipmap(GL_TEXTURE_2D);
if (mipmapped)
{
int mipmapLevels = 0;
int dim = width;
while (dim != 0)
{
mipmapLevels++;
dim >>= 1;
}
#ifndef GLES2
// Open GL 4.2 or GLES 3.0 min
if (gl.gl_major > 4 || (gl.gl_major == 4 && gl.gl_minor >= 2)
|| (gl.is_gles && gl.gl_major >= 3))
{
GLuint internalFormat;
switch (tex_type)
{
case TextureType::_5551:
internalFormat = GL_RGB5_A1;
break;
case TextureType::_565:
internalFormat = GL_RGB565;
break;
case TextureType::_4444:
internalFormat = GL_RGBA4;
break;
case TextureType::_8888:
internalFormat = GL_RGBA8;
break;
}
if (Updates == 1)
{
glTexStorage2D(GL_TEXTURE_2D, mipmapLevels, internalFormat, width, height);
glCheck();
}
for (int i = 0; i < mipmapLevels; i++)
{
glTexSubImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, 0, 0, 1 << i, 1 << i, comps, gltype, temp_tex_buffer);
temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2);
}
}
else
#endif
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, mipmapLevels - 1);
for (int i = 0; i < mipmapLevels; i++)
{
glTexImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, comps, 1 << i, 1 << i, 0, comps, gltype, temp_tex_buffer);
temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2);
}
}
}
else
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer);
}
glCheck();
}
else {
#if FEAT_HAS_SOFTREND

View File

@ -65,7 +65,7 @@ public:
vjoyTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice());
vjoyTexture->SetDevice(GetContext()->GetDevice());
vjoyTexture->SetCommandBuffer(texCommandPool.Allocate());
vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data);
vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data, false);
vjoyTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
delete [] image_data;
@ -135,7 +135,7 @@ public:
curTexture->SetDevice(GetContext()->GetDevice());
}
curTexture->SetCommandBuffer(texCommandPool.Allocate());
curTexture->UploadToGPU(width, height, (u8*)pb.data());
curTexture->UploadToGPU(width, height, (u8*)pb.data(), false);
curTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
@ -287,7 +287,7 @@ private:
MakeFogTexture(texData);
fogTexture->SetCommandBuffer(texCommandPool.Allocate());
fogTexture->UploadToGPU(128, 2, texData);
fogTexture->UploadToGPU(128, 2, texData, false);
fogTexture->SetCommandBuffer(nullptr);
}

View File

@ -143,7 +143,7 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk:
commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, nullptr, nullptr, imageMemoryBarrier);
}
void Texture::UploadToGPU(int width, int height, u8 *data)
void Texture::UploadToGPU(int width, int height, u8 *data, bool mipmapped)
{
vk::Format format;
u32 dataSize = width * height * 2;
@ -167,20 +167,31 @@ void Texture::UploadToGPU(int width, int height, u8 *data)
dataSize /= 2;
break;
}
if (mipmapped)
{
int w = width / 2;
u32 size = dataSize / 4;
while (w)
{
dataSize += size;
size /= 4;
w /= 2;
}
}
bool isNew = true;
if (width != extent.width || height != extent.height || format != this->format)
Init(width, height, format);
Init(width, height, format, dataSize);
else
isNew = false;
SetImage(dataSize, data, isNew);
}
void Texture::Init(u32 width, u32 height, vk::Format format)
void Texture::Init(u32 width, u32 height, vk::Format format, u32 dataSize)
{
this->extent = vk::Extent2D(width, height);
this->format = format;
mipmapLevels = 1;
if (tcw.MipMapped && settings.rend.UseMipmaps)
if (IsMipmapped() && settings.rend.UseMipmaps)
mipmapLevels += floor(log2(std::max(width, height)));
vk::FormatProperties formatProperties = physicalDevice.getFormatProperties(format);
@ -195,7 +206,7 @@ void Texture::Init(u32 width, u32 height, vk::Format format)
vk::ImageUsageFlags usageFlags = vk::ImageUsageFlagBits::eSampled;
if (needsStaging)
{
stagingBufferData = std::unique_ptr<BufferData>(new BufferData(extent.width * extent.height * 4, vk::BufferUsageFlagBits::eTransferSrc));
stagingBufferData = std::unique_ptr<BufferData>(new BufferData(dataSize, vk::BufferUsageFlagBits::eTransferSrc));
usageFlags |= vk::ImageUsageFlagBits::eTransferDst;
initialLayout = vk::ImageLayout::eUndefined;
requirements = vk::MemoryPropertyFlagBits::eDeviceLocal;
@ -206,8 +217,6 @@ void Texture::Init(u32 width, u32 height, vk::Format format)
initialLayout = vk::ImageLayout::ePreinitialized;
requirements = vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible;
}
if (mipmapLevels > 1)
usageFlags |= vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst;
CreateImage(imageTiling, usageFlags, initialLayout, requirements, vk::ImageAspectFlagBits::eColor);
}
@ -252,77 +261,34 @@ void Texture::SetImage(u32 srcSize, void *srcData, bool isNew)
// Since we're going to blit to the texture image, set its layout to eTransferDstOptimal
setImageLayout(commandBuffer, image.get(), format, mipmapLevels, isNew ? vk::ImageLayout::eUndefined : vk::ImageLayout::eShaderReadOnlyOptimal,
vk::ImageLayout::eTransferDstOptimal);
vk::BufferImageCopy copyRegion(0, extent.width, extent.height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), vk::Offset3D(0, 0, 0), vk::Extent3D(extent, 1));
commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion);
if (mipmapLevels > 1)
GenerateMipmaps();
{
vk::DeviceSize bufferOffset = 0;
for (int i = 0; i < mipmapLevels; i++)
{
vk::BufferImageCopy copyRegion(bufferOffset, 1 << i, 1 << i, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, mipmapLevels - i - 1, 0, 1),
vk::Offset3D(0, 0, 0), vk::Extent3D(1 << i, 1 << i, 1));
commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion);
bufferOffset += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2);
}
}
else
// Set the layout for the texture image from eTransferDstOptimal to SHADER_READ_ONLY
setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal);
{
vk::BufferImageCopy copyRegion(0, extent.width, extent.height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
vk::Offset3D(0, 0, 0), vk::Extent3D(extent, 1));
commandBuffer.copyBufferToImage(stagingBufferData->buffer.get(), image.get(), vk::ImageLayout::eTransferDstOptimal, copyRegion);
}
// Set the layout for the texture image from eTransferDstOptimal to SHADER_READ_ONLY
setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal);
}
else
{
if (mipmapLevels > 1)
GenerateMipmaps();
else
// If we can use the linear tiled image as a texture, just do it
setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal);
// If we can use the linear tiled image as a texture, just do it
setImageLayout(commandBuffer, image.get(), format, mipmapLevels, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal);
}
commandBuffer.end();
}
void Texture::GenerateMipmaps()
{
u32 mipWidth = extent.width;
u32 mipHeight = extent.height;
vk::ImageMemoryBarrier barrier(vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead,
vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
*image, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
for (int i = 1; i < mipmapLevels; i++)
{
// Transition previous mipmap level from dst optimal/preinit to src optimal
barrier.subresourceRange.baseMipLevel = i - 1;
if (i == 1 && !needsStaging)
{
barrier.oldLayout = vk::ImageLayout::ePreinitialized;
barrier.srcAccessMask = vk::AccessFlagBits::eHostWrite;
}
else
{
barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal;
barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite;
}
barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal;
barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead;
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, nullptr, nullptr, barrier);
// Blit previous mipmap level on current
vk::ImageBlit blit(vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, i - 1, 0, 1),
{ { vk::Offset3D(0, 0, 0), vk::Offset3D(mipWidth, mipHeight, 1) } },
vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, i, 0, 1),
{ { vk::Offset3D(0, 0, 0), vk::Offset3D(std::max(mipWidth / 2, 1u), std::max(mipHeight / 2, 1u), 1) } });
commandBuffer.blitImage(*image, vk::ImageLayout::eTransferSrcOptimal, *image, vk::ImageLayout::eTransferDstOptimal, 1, &blit, vk::Filter::eLinear);
// Transition previous mipmap level from src optimal to shader read-only optimal
barrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal;
barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal;
barrier.srcAccessMask = vk::AccessFlagBits::eTransferRead;
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eFragmentShader, {}, nullptr, nullptr, barrier);
mipWidth = std::max(mipWidth / 2, 1u);
mipHeight = std::max(mipHeight / 2, 1u);
}
// Transition last mipmap level from dst optimal to shader read-only optimal
barrier.subresourceRange.baseMipLevel = mipmapLevels - 1;
barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal;
barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal;
barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite;
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eFragmentShader, {}, nullptr, nullptr, barrier);
}
void FramebufferAttachment::Init(u32 width, u32 height, vk::Format format, vk::ImageUsageFlags usage)
{
this->format = format;

View File

@ -30,7 +30,7 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk:
struct Texture : BaseTextureCacheData
{
void UploadToGPU(int width, int height, u8 *data) override;
void UploadToGPU(int width, int height, u8 *data, bool mipmapped) override;
u64 GetIntId() { return (u64)reinterpret_cast<uintptr_t>(this); }
std::string GetId() override { char s[20]; sprintf(s, "%p", this); return s; }
bool IsNew() const { return !image.get(); }
@ -43,11 +43,10 @@ struct Texture : BaseTextureCacheData
void SetDevice(vk::Device device) { this->device = device; }
private:
void Init(u32 width, u32 height, vk::Format format);
void Init(u32 width, u32 height, vk::Format format ,u32 dataSize);
void SetImage(u32 size, void *data, bool isNew);
void CreateImage(vk::ImageTiling tiling, vk::ImageUsageFlags usage, vk::ImageLayout initialLayout,
vk::MemoryPropertyFlags memoryProperties, vk::ImageAspectFlags aspectMask);
void GenerateMipmaps();
vk::Format format = vk::Format::eUndefined;
vk::Extent2D extent;

View File

@ -51,7 +51,7 @@ const std::vector<vk::UniqueCommandBuffer>* VulkanVMUs::PrepareVMUs(vk::CommandP
VulkanContext::Instance()->GetDevice().allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool, vk::CommandBufferLevel::ePrimary, 1))
.front()));
texture->SetCommandBuffer(*commandBuffers[context->GetCurrentImageIndex()].back());
texture->UploadToGPU(48, 32, (u8*)vmu_lcd_data[i]);
texture->UploadToGPU(48, 32, (u8*)vmu_lcd_data[i], false);
texture->SetCommandBuffer(nullptr);
vmu_lcd_changed[i] = false;
}

View File

@ -61,7 +61,7 @@ public:
vjoyTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice());
vjoyTexture->SetDevice(GetContext()->GetDevice());
vjoyTexture->SetCommandBuffer(texCommandPool.Allocate());
vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data);
vjoyTexture->UploadToGPU(OSD_TEX_W, OSD_TEX_H, image_data, false);
vjoyTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
delete [] image_data;
@ -122,7 +122,7 @@ public:
curTexture->SetDevice(GetContext()->GetDevice());
}
curTexture->SetCommandBuffer(texCommandPool.Allocate());
curTexture->UploadToGPU(width, height, (u8*)pb.data());
curTexture->UploadToGPU(width, height, (u8*)pb.data(), false);
curTexture->SetCommandBuffer(nullptr);
texCommandPool.EndFrame();
@ -275,7 +275,7 @@ private:
MakeFogTexture(texData);
fogTexture->SetCommandBuffer(texCommandPool.Allocate());
fogTexture->UploadToGPU(128, 2, texData);
fogTexture->UploadToGPU(128, 2, texData, false);
fogTexture->SetCommandBuffer(nullptr);
}

View File

@ -118,6 +118,9 @@ public :
pthread_mutex_unlock(&mutx);
#endif
}
// std::BasicLockable so we can use std::lock_guard
void lock() { Lock(); }
void unlock() { Unlock(); }
};
#if !defined(TARGET_IPHONE)

View File

@ -29,6 +29,10 @@ void GLGraphicsContext::findGLVersion()
glGetIntegerv(GL_MAJOR_VERSION, &majorVersion);
if (glGetError() == GL_INVALID_ENUM)
majorVersion = 2;
else
{
glGetIntegerv(GL_MINOR_VERSION, &minorVersion);
}
const char *version = (const char *)glGetString(GL_VERSION);
isGLES = !strncmp(version, "OpenGL ES", 9);
INFO_LOG(RENDERER, "OpenGL version: %s", version);

View File

@ -29,6 +29,7 @@ class GLGraphicsContext
{
public:
int GetMajorVersion() const { return majorVersion; }
int GetMinorVersion() const { return minorVersion; }
bool IsGLES() const { return isGLES; }
protected:
@ -38,6 +39,7 @@ protected:
private:
int majorVersion = 0;
int minorVersion = 0;
bool isGLES = false;
};