pcsx2/plugins/GSdx/GSTextureOGL.cpp

608 lines
16 KiB
C++

/*
* Copyright (C) 2011-2011 Gregory hainaut
* Copyright (C) 2007-2009 Gabest
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include <limits.h>
#include "GSTextureOGL.h"
#include "GLState.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern uint32 g_texture_upload_byte;
#endif
// FIXME OGL4: investigate, only 1 unpack buffer always bound
namespace PboPool {
GLuint m_pool[PBO_POOL_SIZE];
uint32 m_offset[PBO_POOL_SIZE];
char* m_map[PBO_POOL_SIZE];
uint32 m_current_pbo = 0;
uint32 m_size;
bool m_texture_storage;
const uint32 m_pbo_size = 4*1024*1024;
#ifndef ENABLE_GLES
// Option for buffer storage
// Note there is a barrier (but maybe coherent is faster)
// XXX: actually does I really need coherent and barrier???
// As far as I understand glTexSubImage2D is a client-server transfer so no need to make
// the value visible to the server
const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
#endif
void Init() {
gl_GenBuffers(countof(m_pool), m_pool);
m_texture_storage = ((theApp.GetConfig("ogl_texture_storage", 0) == 1) && GLLoader::found_GL_ARB_buffer_storage);
for (size_t i = 0; i < countof(m_pool); i++) {
BindPbo();
if (m_texture_storage) {
#ifndef ENABLE_GLES
gl_BufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags);
m_map[m_current_pbo] = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
#endif
} else {
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_COPY);
m_map[m_current_pbo] = NULL;
}
NextPbo();
}
UnbindPbo();
}
char* Map(uint32 size) {
char* map;
m_size = size;
if (m_size > m_pbo_size) {
fprintf(stderr, "BUG: PBO too small %d but need %d\n", m_pbo_size, m_size);
}
if (m_texture_storage) {
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
NextPbo();
}
// Note: texsubimage will access currently bound buffer
// Pbo ready let's get a pointer
BindPbo();
map = m_map[m_current_pbo] + m_offset[m_current_pbo];
} else {
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
NextPbo();
flags &= ~GL_MAP_INVALIDATE_RANGE_BIT;
flags |= GL_MAP_INVALIDATE_BUFFER_BIT;
}
// Pbo ready let's get a pointer
BindPbo();
// Be sure the map is aligned
map = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size, flags);
}
return map;
}
// Used to unmap the buffer when context was detached.
void UnmapAll() {
for (size_t i = 0; i < countof(m_pool); i++) {
m_map[i] = NULL;
m_offset[m_current_pbo] = 0;
}
}
void Unmap() {
if (m_texture_storage) {
// As far as I understand glTexSubImage2D is a client-server transfer so no need to make
// the value visible to the server
//gl_MemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
} else {
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}
}
uint32 Offset() {
return m_offset[m_current_pbo];
}
void Destroy() {
if (m_texture_storage)
UnmapAll();
gl_DeleteBuffers(countof(m_pool), m_pool);
}
void BindPbo() {
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[m_current_pbo]);
}
void NextPbo() {
m_current_pbo = (m_current_pbo + 1) & (countof(m_pool)-1);
// Mark new PBO as free
m_offset[m_current_pbo] = 0;
}
void UnbindPbo() {
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
void EndTransfer() {
// Note: keep offset aligned for SSE/AVX
m_offset[m_current_pbo] = (m_offset[m_current_pbo] + m_size + 63) & ~0x3F;
}
}
// FIXME: check if it possible to always use those setup by default
// glPixelStorei(GL_PACK_ALIGNMENT, 1);
// glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
: m_pbo_id(0),
m_pbo_size(0)
{
// m_size.x = w;
// m_size.y = h;
// FIXME
m_size.x = max(1,w);
m_size.y = max(1,h);
m_format = format;
m_type = type;
m_fbo_read = fbo_read;
m_texture_id = 0;
memset(&m_handles, 0, countof(m_handles) * sizeof(m_handles[0]) );
// Bunch of constant parameter
switch (m_format) {
case GL_R32I:
m_int_format = GL_RED_INTEGER;
m_int_type = GL_INT;
m_int_alignment = 4;
m_int_shift = 2;
break;
case GL_R16UI:
m_int_format = GL_RED_INTEGER;
m_int_type = GL_UNSIGNED_SHORT;
m_int_alignment = 2;
m_int_shift = 1;
break;
case GL_RGBA8:
m_int_format = GL_RGBA;
m_int_type = GL_UNSIGNED_BYTE;
m_int_alignment = 4;
m_int_shift = 2;
break;
case GL_R8:
m_int_format = GL_RED;
m_int_type = GL_UNSIGNED_BYTE;
m_int_alignment = 1;
m_int_shift = 0;
break;
case 0:
case GL_DEPTH32F_STENCIL8:
// Backbuffer & dss aren't important
m_int_format = 0;
m_int_type = 0;
m_int_alignment = 0;
m_int_shift = 0;
break;
default:
ASSERT(0);
}
// Generate the buffer
switch (m_type) {
case GSTexture::Offscreen:
//FIXME I not sure we need a pixel buffer object. It seems more a texture
// gl_GenBuffers(1, &m_texture_id);
// ASSERT(0);
case GSTexture::Texture:
case GSTexture::RenderTarget:
case GSTexture::DepthStencil:
gl_CreateTextures(GL_TEXTURE_2D, 1, &m_texture_id);
break;
case GSTexture::Backbuffer:
break;
default:
break;
}
// Allocate the buffer
switch (m_type) {
case GSTexture::Offscreen:
// Extra buffer to handle various pixel transfer
gl_GenBuffers(1, &m_pbo_id);
// Allocate a pbo with the texture
m_pbo_size = (m_size.x * m_size.y) << m_int_shift;
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo_id);
gl_BufferData(GL_PIXEL_PACK_BUFFER, m_pbo_size, NULL, GL_STREAM_READ);
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
case GSTexture::DepthStencil:
case GSTexture::RenderTarget:
case GSTexture::Texture:
gl_TextureStorage2D(m_texture_id, 1+GL_TEX_LEVEL_0, m_format, m_size.x, m_size.y);
break;
default: break;
}
}
GSTextureOGL::~GSTextureOGL()
{
/* Unbind the texture from our local state */
if (m_texture_id == GLState::rt)
GLState::rt = 0;
if (m_texture_id == GLState::ds)
GLState::ds = 0;
if (m_texture_id == GLState::tex_unit[0])
GLState::tex_unit[0] = 0;
if (m_texture_id == GLState::tex_unit[1])
GLState::tex_unit[1] = 0;
gl_DeleteBuffers(1, &m_pbo_id);
glDeleteTextures(1, &m_texture_id);
}
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
{
ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen);
// Note: reduce noise for gl retracers
// It might introduce bug after an emulator pause so always set it in standard mode
if (GLLoader::in_replayer) {
static uint32 unpack_alignment = 0;
if (unpack_alignment != m_int_alignment) {
unpack_alignment = m_int_alignment;
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
}
} else {
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
}
char* src = (char*)data;
char* map = PboPool::Map(r.height() * pitch);
#ifdef ENABLE_OGL_DEBUG_MEM_BW
// Note: pitch is the line size that will be copied into the PBO
// pitch >> m_int_shift is the line size that will be actually dma-ed into the GPU
g_texture_upload_byte += pitch * r.height();
#endif
memcpy(map, src, pitch*r.height());
PboPool::Unmap();
// Note: reduce noise for gl retracers
// It might introduce bug after an emulator pause so always set it in standard mode
if (GLLoader::in_replayer) {
static int unpack_row_length = 0;
if (unpack_row_length != (pitch >> m_int_shift)) {
unpack_row_length = pitch >> m_int_shift;
glPixelStorei(GL_UNPACK_ROW_LENGTH, unpack_row_length);
}
} else {
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
}
gl_TextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
// Normally only affect TexSubImage call. (i.e. only the previous line)
//glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
// FIXME OGL4: investigate, only 1 unpack buffer always bound
PboPool::UnbindPbo();
PboPool::EndTransfer();
return true;
// For reference, standard upload without pbo (Used to crash on FGLRX)
#if 0
// pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);
// FIXME useful?
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior
return true;
#endif
}
GLuint64 GSTextureOGL::GetHandle(GLuint sampler_id)
{
ASSERT(sampler_id < 12);
#ifndef ENABLE_GLES
if (!m_handles[sampler_id]) {
m_handles[sampler_id] = gl_GetTextureSamplerHandleARB(m_texture_id, sampler_id);
gl_MakeTextureHandleResidentARB(m_handles[sampler_id]);
}
#endif
return m_handles[sampler_id];
}
bool GSTextureOGL::Map(GSMap& m, const GSVector4i* r)
{
// LOTS OF CRAP CODE!!!! PLEASE FIX ME !!!
if (m_type != GSTexture::Offscreen) return false;
// The function allow to modify the texture from the CPU
// Set m.bits <- pointer to the data
// Set m.pitch <- size of a row
// I think in opengl we need to copy back the data to the RAM: glReadPixels — read a block of pixels from the frame buffer
//
// gl_MapBuffer — map a buffer object's data store
// Can be used on GL_PIXEL_UNPACK_BUFFER or GL_TEXTURE_BUFFER
// Bind the texture to the read framebuffer to avoid any disturbance
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0);
glReadBuffer(GL_COLOR_ATTACHMENT0);
// FIXME It might be possible to only read a subrange of the texture based on r object
// Load the PBO with the data
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo_id);
glPixelStorei(GL_PACK_ALIGNMENT, m_int_alignment);
glReadPixels(0, 0, m_size.x, m_size.y, m_int_format, m_int_type, 0);
m.pitch = m_size.x << m_int_shift;
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
// Give access from the CPU
m.bits = (uint8*) gl_MapBufferRange(GL_PIXEL_PACK_BUFFER, 0, m_pbo_size, GL_MAP_READ_BIT);
if ( m.bits ) {
return true;
} else {
fprintf(stderr, "bad mapping of the pbo\n");
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
return false;
}
#if 0
if(m_texture && m_desc.Usage == D3D11_USAGE_STAGING)
{
D3D11_MAPPED_SUBRESOURCE map;
if(SUCCEEDED(m_ctx->Map(m_texture, 0, D3D11_MAP_READ_WRITE, 0, &map)))
{
m.bits = (uint8*)map.pData;
m.pitch = (int)map.RowPitch;
return true;
}
}
return false;
#endif
}
void GSTextureOGL::Unmap()
{
if (m_type == GSTexture::Offscreen) {
gl_UnmapBuffer(GL_PIXEL_PACK_BUFFER);
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
}
#ifndef _WINDOWS
#pragma pack(push, 1)
struct BITMAPFILEHEADER
{
uint16 bfType;
uint32 bfSize;
uint16 bfReserved1;
uint16 bfReserved2;
uint32 bfOffBits;
};
struct BITMAPINFOHEADER
{
uint32 biSize;
int32 biWidth;
int32 biHeight;
uint16 biPlanes;
uint16 biBitCount;
uint32 biCompression;
uint32 biSizeImage;
int32 biXPelsPerMeter;
int32 biYPelsPerMeter;
uint32 biClrUsed;
uint32 biClrImportant;
};
#define BI_RGB 0
#pragma pack(pop)
#endif
void GSTextureOGL::Save(const string& fn, const void* image, uint32 pitch)
{
// Build a BMP file
FILE* fp = fopen(fn.c_str(), "wb");
if (fp == NULL)
return;
BITMAPINFOHEADER bih;
memset(&bih, 0, sizeof(bih));
bih.biSize = sizeof(bih);
bih.biWidth = m_size.x;
bih.biHeight = m_size.y;
bih.biPlanes = 1;
bih.biBitCount = 32;
bih.biCompression = BI_RGB;
bih.biSizeImage = m_size.x * m_size.y << 2;
BITMAPFILEHEADER bfh;
memset(&bfh, 0, sizeof(bfh));
uint8* bfType = (uint8*)&bfh.bfType;
// bfh.bfType = 'MB';
bfType[0] = 0x42;
bfType[1] = 0x4d;
bfh.bfOffBits = sizeof(bfh) + sizeof(bih);
bfh.bfSize = bfh.bfOffBits + bih.biSizeImage;
bfh.bfReserved1 = bfh.bfReserved2 = 0;
fwrite(&bfh, 1, sizeof(bfh), fp);
fwrite(&bih, 1, sizeof(bih), fp);
uint8* data = (uint8*)image + (m_size.y - 1) * pitch;
for(int h = m_size.y; h > 0; h--, data -= pitch)
{
if (false && IsDss()) {
// Only get the depth and convert it to an integer
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 8) {
float* input = (float*)better_data;
// FIXME how to dump 32 bits value into 8bits component color
GLuint depth_integer = (GLuint)(*input * (float)UINT_MAX);
uint8 r = (depth_integer >> 0) & 0xFF;
uint8 g = (depth_integer >> 8) & 0xFF;
uint8 b = (depth_integer >> 16) & 0xFF;
uint8 a = (depth_integer >> 24) & 0xFF;
fwrite(&r, 1, 1, fp);
fwrite(&g, 1, 1, fp);
fwrite(&b, 1, 1, fp);
fwrite(&a, 1, 1, fp);
}
} else {
// swap red and blue
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 4) {
uint8 red = better_data[2];
better_data[2] = better_data[0];
better_data[0] = red;
fwrite(better_data, 1, 4, fp);
}
}
}
fclose(fp);
}
void GSTextureOGL::SaveRaw(const string& fn, const void* image, uint32 pitch)
{
// Build a raw CSV file
FILE* fp = fopen(fn.c_str(), "w");
if (fp == NULL)
return;
uint32* data = (uint32*)image;
for(int h = m_size.y; h > 0; h--) {
for (int w = m_size.x; w > 0; w--, data += 1) {
if (*data > 0xffffff)
;
else {
fprintf(fp, "%x", *data);
}
if ( w > 1)
fprintf(fp, ",");
}
fprintf(fp, "\n");
}
fclose(fp);
}
bool GSTextureOGL::Save(const string& fn, bool dds)
{
// Collect the texture data
uint32 pitch = 4 * m_size.x;
uint32 buf_size = pitch * m_size.y * 2;// Note *2 for security (depth/stencil)
char* image = (char*)malloc(buf_size);
bool status = true;
// FIXME instead of swapping manually B and R maybe you can request the driver to do it
// for us
if (IsBackbuffer()) {
//glReadBuffer(GL_BACK);
//gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image);
} else if(IsDss()) {
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_texture_id, 0);
glReadPixels(0, 0, m_size.x, m_size.y, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image);
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
} else if(m_format == GL_R32I) {
#ifndef ENABLE_GLES
gl_GetTextureImage(m_texture_id, 0, GL_RED_INTEGER, GL_INT, buf_size, image);
SaveRaw(fn, image, pitch);
#endif
// Not supported in Save function
status = false;
} else {
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0);
glReadBuffer(GL_COLOR_ATTACHMENT0);
if (m_format == GL_RGBA8)
glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image);
else if (m_format == GL_R16UI)
{
glReadPixels(0, 0, m_size.x, m_size.y, GL_RED_INTEGER, GL_UNSIGNED_SHORT, image);
// Not supported in Save function
status = false;
}
else if (m_format == GL_R8)
{
glReadPixels(0, 0, m_size.x, m_size.y, GL_RED, GL_UNSIGNED_BYTE, image);
// Not supported in Save function
status = false;
}
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
if (status) Save(fn, image, pitch);
free(image);
return status;
}