2010-06-09 01:37:08 +00:00
|
|
|
// Copyright (C) 2003 Dolphin Project.
|
|
|
|
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
|
|
|
|
// Fast image conversion using OpenGL shaders.
|
|
|
|
// This kind of stuff would be a LOT nicer with OpenCL.
|
|
|
|
|
|
|
|
#include "TextureConverter.h"
|
|
|
|
#include "TextureConversionShader.h"
|
|
|
|
#include "PixelShaderCache.h"
|
|
|
|
#include "VertexShaderManager.h"
|
|
|
|
#include "VertexShaderCache.h"
|
|
|
|
#include "FramebufferManager.h"
|
|
|
|
#include "Globals.h"
|
|
|
|
#include "VideoConfig.h"
|
|
|
|
#include "ImageWrite.h"
|
|
|
|
#include "Render.h"
|
|
|
|
#include "TextureCache.h"
|
|
|
|
#include "Math.h"
|
|
|
|
#include "FileUtil.h"
|
|
|
|
|
|
|
|
namespace TextureConverter
|
|
|
|
{
|
|
|
|
struct TransformBuffer
|
|
|
|
{
|
|
|
|
LPDIRECT3DTEXTURE9 FBTexture;
|
|
|
|
LPDIRECT3DSURFACE9 RenderSurface;
|
|
|
|
LPDIRECT3DSURFACE9 ReadSurface;
|
|
|
|
int Width;
|
|
|
|
int Height;
|
|
|
|
};
|
|
|
|
const u32 NUM_TRANSFORM_BUFFERS = 16;
|
|
|
|
static TransformBuffer TrnBuffers[NUM_TRANSFORM_BUFFERS];
|
|
|
|
static u32 WorkingBuffers = 0;
|
|
|
|
|
|
|
|
static LPDIRECT3DPIXELSHADER9 s_rgbToYuyvProgram = NULL;
|
|
|
|
static LPDIRECT3DPIXELSHADER9 s_yuyvToRgbProgram = NULL;
|
|
|
|
|
|
|
|
// Not all slots are taken - but who cares.
|
|
|
|
const u32 NUM_ENCODING_PROGRAMS = 64;
|
|
|
|
static LPDIRECT3DPIXELSHADER9 s_encodingPrograms[NUM_ENCODING_PROGRAMS];
|
|
|
|
|
|
|
|
void CreateRgbToYuyvProgram()
|
|
|
|
{
|
|
|
|
// Output is BGRA because that is slightly faster than RGBA.
|
|
|
|
char* FProgram = new char[2048];
|
|
|
|
sprintf(FProgram,"uniform float4 blkDims : register(c%d);\n"
|
|
|
|
"uniform float4 textureDims : register(c%d);\n"
|
|
|
|
"uniform sampler samp0 : register(s0);\n"
|
|
|
|
"void main(\n"
|
|
|
|
" out float4 ocol0 : COLOR0,\n"
|
|
|
|
" in float2 uv0 : TEXCOORD0)\n"
|
|
|
|
"{\n"
|
|
|
|
" float2 uv1 = float2((uv0.x + 1.0f)/ blkDims.z, uv0.y / blkDims.w);\n"
|
|
|
|
" float3 c0 = tex2D(samp0, uv0.xy / blkDims.zw).rgb;\n"
|
|
|
|
" float3 c1 = tex2D(samp0, uv1).rgb;\n"
|
|
|
|
" float3 y_const = float3(0.257f,0.504f,0.098f);\n"
|
|
|
|
" float3 u_const = float3(-0.148f,-0.291f,0.439f);\n"
|
|
|
|
" float3 v_const = float3(0.439f,-0.368f,-0.071f);\n"
|
|
|
|
" float4 const3 = float4(0.0625f,0.5f,0.0625f,0.5f);\n"
|
|
|
|
" float3 c01 = (c0 + c1) * 0.5f;\n"
|
|
|
|
" ocol0 = float4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
|
|
|
|
"}\n",C_COLORMATRIX,C_COLORMATRIX+1);
|
|
|
|
|
|
|
|
s_rgbToYuyvProgram = D3D::CompileAndCreatePixelShader(FProgram, (int)strlen(FProgram));
|
|
|
|
if (!s_rgbToYuyvProgram) {
|
|
|
|
ERROR_LOG(VIDEO, "Failed to create RGB to YUYV fragment program");
|
|
|
|
}
|
|
|
|
delete [] FProgram;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CreateYuyvToRgbProgram()
|
|
|
|
{
|
|
|
|
char* FProgram = new char[2048];
|
|
|
|
sprintf(FProgram,"uniform float4 blkDims : register(c%d);\n"
|
|
|
|
"uniform float4 textureDims : register(c%d);\n"
|
|
|
|
"uniform sampler samp0 : register(s0);\n"
|
|
|
|
"void main(\n"
|
|
|
|
" out float4 ocol0 : COLOR0,\n"
|
|
|
|
" in float2 uv0 : TEXCOORD0)\n"
|
|
|
|
"{\n"
|
|
|
|
" float4 c0 = tex2D(samp0, uv0 / blkDims.zw).rgba;\n"
|
|
|
|
" float f = step(0.5, frac(uv0.x));\n"
|
|
|
|
" float y = lerp(c0.b, c0.r, f);\n"
|
|
|
|
" float yComp = 1.164f * (y - 0.0625f);\n"
|
|
|
|
" float uComp = c0.g - 0.5f;\n"
|
|
|
|
" float vComp = c0.a - 0.5f;\n"
|
|
|
|
|
|
|
|
" ocol0 = float4(yComp + (1.596f * vComp),\n"
|
|
|
|
" yComp - (0.813f * vComp) - (0.391f * uComp),\n"
|
|
|
|
" yComp + (2.018f * uComp),\n"
|
|
|
|
" 1.0f);\n"
|
|
|
|
"}\n",C_COLORMATRIX,C_COLORMATRIX+1);
|
|
|
|
s_yuyvToRgbProgram = D3D::CompileAndCreatePixelShader(FProgram, (int)strlen(FProgram));
|
|
|
|
if (!s_yuyvToRgbProgram) {
|
|
|
|
ERROR_LOG(VIDEO, "Failed to create YUYV to RGB fragment program");
|
|
|
|
}
|
|
|
|
delete [] FProgram;
|
|
|
|
}
|
|
|
|
|
|
|
|
LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format)
|
|
|
|
{
|
|
|
|
if (format > NUM_ENCODING_PROGRAMS)
|
|
|
|
{
|
|
|
|
PanicAlert("Unknown texture copy format: 0x%x\n", format);
|
|
|
|
return s_encodingPrograms[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!s_encodingPrograms[format])
|
|
|
|
{
|
2010-07-12 19:30:25 +00:00
|
|
|
const char* shader = TextureConversionShader::GenerateEncodingShader(format,API_D3D9);
|
2010-06-09 01:37:08 +00:00
|
|
|
|
|
|
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
|
|
|
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader) {
|
|
|
|
static int counter = 0;
|
|
|
|
char szTemp[MAX_PATH];
|
|
|
|
sprintf(szTemp, "%senc_%04i.txt", File::GetUserPath(D_DUMP_IDX), counter++);
|
|
|
|
|
|
|
|
SaveData(szTemp, shader);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
s_encodingPrograms[format] = D3D::CompileAndCreatePixelShader(shader, (int)strlen(shader));
|
|
|
|
if (!s_encodingPrograms[format]) {
|
|
|
|
ERROR_LOG(VIDEO, "Failed to create encoding fragment program");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s_encodingPrograms[format];
|
|
|
|
}
|
|
|
|
|
|
|
|
void Init()
|
|
|
|
{
|
|
|
|
for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
|
|
|
|
{
|
|
|
|
s_encodingPrograms[i] = NULL;
|
|
|
|
}
|
|
|
|
for (unsigned int i = 0; i < NUM_TRANSFORM_BUFFERS; i++)
|
|
|
|
{
|
|
|
|
TrnBuffers[i].FBTexture = NULL;
|
|
|
|
TrnBuffers[i].RenderSurface = NULL;
|
|
|
|
TrnBuffers[i].ReadSurface = NULL;
|
|
|
|
TrnBuffers[i].Width = 0;
|
|
|
|
TrnBuffers[i].Height = 0;
|
|
|
|
}
|
|
|
|
CreateRgbToYuyvProgram();
|
|
|
|
CreateYuyvToRgbProgram();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Shutdown()
|
|
|
|
{
|
|
|
|
if(s_rgbToYuyvProgram)
|
|
|
|
s_rgbToYuyvProgram->Release();
|
|
|
|
s_rgbToYuyvProgram = NULL;
|
|
|
|
if(s_yuyvToRgbProgram)
|
|
|
|
s_yuyvToRgbProgram->Release();
|
|
|
|
s_yuyvToRgbProgram=NULL;
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
|
|
|
|
{
|
|
|
|
if(s_encodingPrograms[i])
|
|
|
|
s_encodingPrograms[i]->Release();
|
|
|
|
s_encodingPrograms[i] = NULL;
|
|
|
|
}
|
|
|
|
for (unsigned int i = 0; i < NUM_TRANSFORM_BUFFERS; i++)
|
|
|
|
{
|
|
|
|
if(TrnBuffers[i].RenderSurface != NULL)
|
|
|
|
TrnBuffers[i].RenderSurface->Release();
|
|
|
|
TrnBuffers[i].RenderSurface = NULL;
|
|
|
|
|
|
|
|
if(TrnBuffers[i].ReadSurface != NULL)
|
|
|
|
TrnBuffers[i].ReadSurface->Release();
|
|
|
|
TrnBuffers[i].ReadSurface = NULL;
|
|
|
|
|
|
|
|
if(TrnBuffers[i].FBTexture != NULL)
|
|
|
|
TrnBuffers[i].FBTexture->Release();
|
|
|
|
TrnBuffers[i].FBTexture = NULL;
|
|
|
|
|
|
|
|
TrnBuffers[i].Width = 0;
|
|
|
|
TrnBuffers[i].Height = 0;
|
|
|
|
}
|
|
|
|
WorkingBuffers = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void EncodeToRamUsingShader(LPDIRECT3DPIXELSHADER9 shader, LPDIRECT3DTEXTURE9 srcTexture, const TargetRectangle& sourceRc,
|
|
|
|
u8* destAddr, int dstWidth, int dstHeight, int readStride, bool toTexture, bool linearFilter)
|
|
|
|
{
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
HRESULT hr;
|
2010-06-09 01:37:08 +00:00
|
|
|
u32 index =0;
|
|
|
|
while(index < WorkingBuffers && (TrnBuffers[index].Width != dstWidth || TrnBuffers[index].Height != dstHeight))
|
|
|
|
index++;
|
|
|
|
|
|
|
|
LPDIRECT3DSURFACE9 s_texConvReadSurface = NULL;
|
|
|
|
LPDIRECT3DSURFACE9 Rendersurf = NULL;
|
|
|
|
|
|
|
|
if (index >= WorkingBuffers)
|
|
|
|
{
|
|
|
|
if (WorkingBuffers < NUM_TRANSFORM_BUFFERS)
|
|
|
|
WorkingBuffers++;
|
|
|
|
if (index >= WorkingBuffers)
|
|
|
|
index--;
|
|
|
|
if (TrnBuffers[index].RenderSurface != NULL)
|
|
|
|
{
|
|
|
|
TrnBuffers[index].RenderSurface->Release();
|
|
|
|
TrnBuffers[index].RenderSurface = NULL;
|
|
|
|
}
|
|
|
|
if (TrnBuffers[index].ReadSurface != NULL)
|
|
|
|
{
|
|
|
|
TrnBuffers[index].ReadSurface->Release();
|
|
|
|
TrnBuffers[index].ReadSurface = NULL;
|
|
|
|
}
|
|
|
|
if (TrnBuffers[index].FBTexture != NULL)
|
|
|
|
{
|
|
|
|
TrnBuffers[index].FBTexture->Release();
|
|
|
|
TrnBuffers[index].FBTexture = NULL;
|
|
|
|
}
|
|
|
|
TrnBuffers[index].Width = dstWidth;
|
|
|
|
TrnBuffers[index].Height = dstHeight;
|
|
|
|
D3D::dev->CreateTexture(dstWidth, dstHeight, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8,
|
|
|
|
D3DPOOL_DEFAULT, &TrnBuffers[index].FBTexture, NULL);
|
|
|
|
TrnBuffers[index].FBTexture->GetSurfaceLevel(0,&TrnBuffers[index].RenderSurface);
|
|
|
|
D3D::dev->CreateOffscreenPlainSurface(dstWidth, dstHeight, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &TrnBuffers[index].ReadSurface, NULL );
|
|
|
|
}
|
|
|
|
|
|
|
|
s_texConvReadSurface = TrnBuffers[index].ReadSurface;
|
|
|
|
Rendersurf = TrnBuffers[index].RenderSurface;
|
|
|
|
|
|
|
|
hr = D3D::dev->SetDepthStencilSurface(NULL);
|
|
|
|
hr = D3D::dev->SetRenderTarget(0, Rendersurf);
|
|
|
|
|
|
|
|
if (linearFilter)
|
|
|
|
{
|
2010-09-28 02:15:02 +00:00
|
|
|
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
|
2010-06-09 01:37:08 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
|
|
|
|
}
|
|
|
|
|
|
|
|
D3DVIEWPORT9 vp;
|
|
|
|
vp.X = 0;
|
|
|
|
vp.Y = 0;
|
|
|
|
vp.Width = dstWidth;
|
|
|
|
vp.Height = dstHeight;
|
|
|
|
vp.MinZ = 0.0f;
|
|
|
|
vp.MaxZ = 1.0f;
|
2010-09-28 02:15:02 +00:00
|
|
|
hr = D3D::dev->SetViewport(&vp);
|
2010-06-09 01:37:08 +00:00
|
|
|
RECT SrcRect;
|
|
|
|
SrcRect.top = sourceRc.top;
|
|
|
|
SrcRect.left = sourceRc.left;
|
|
|
|
SrcRect.right = sourceRc.right;
|
|
|
|
SrcRect.bottom = sourceRc.bottom;
|
|
|
|
RECT DstRect;
|
|
|
|
DstRect.top = 0;
|
|
|
|
DstRect.left = 0;
|
|
|
|
DstRect.right = dstWidth;
|
|
|
|
DstRect.bottom = dstHeight;
|
|
|
|
|
|
|
|
|
|
|
|
// Draw...
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
D3D::drawShadedTexQuad(srcTexture,&SrcRect,1,1,dstWidth,dstHeight,shader,VertexShaderCache::GetSimpleVertexShader(0));
|
2010-06-09 01:37:08 +00:00
|
|
|
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
|
|
|
|
// .. and then readback the results.
|
|
|
|
// TODO: make this less slow.
|
|
|
|
|
|
|
|
D3DLOCKED_RECT drect;
|
|
|
|
|
|
|
|
hr = D3D::dev->GetRenderTargetData(Rendersurf,s_texConvReadSurface);
|
2010-09-28 02:15:02 +00:00
|
|
|
hr = s_texConvReadSurface->LockRect(&drect, &DstRect, D3DLOCK_READONLY);
|
|
|
|
int writeStride = bpmem.copyMipMapStrideChannels * 32;
|
|
|
|
|
|
|
|
if (writeStride != readStride && toTexture)
|
2010-06-09 01:37:08 +00:00
|
|
|
{
|
2010-09-28 02:15:02 +00:00
|
|
|
// writing to a texture of a different size
|
|
|
|
|
|
|
|
int readHeight = readStride / dstWidth;
|
2010-06-09 01:37:08 +00:00
|
|
|
|
2010-09-28 02:15:02 +00:00
|
|
|
int readStart = 0;
|
|
|
|
int readLoops = dstHeight / (readHeight/4); // 4 bytes per pixel
|
|
|
|
u8 *Source = (u8*)drect.pBits;
|
|
|
|
for (int i = 0; i < readLoops; i++)
|
2010-06-09 01:37:08 +00:00
|
|
|
{
|
2010-09-28 02:15:02 +00:00
|
|
|
int readDist = dstWidth*readHeight;
|
|
|
|
memcpy(destAddr,Source,readDist);
|
|
|
|
Source += readDist;
|
|
|
|
destAddr += writeStride;
|
2010-06-09 01:37:08 +00:00
|
|
|
}
|
2010-09-28 02:15:02 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
memcpy(destAddr,drect.pBits,dstWidth*dstHeight*4);// 4 bytes per pixel
|
|
|
|
|
|
|
|
hr = s_texConvReadSurface->UnlockRect();
|
2010-06-09 01:37:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source)
|
|
|
|
{
|
|
|
|
u32 format = copyfmt;
|
|
|
|
|
|
|
|
if (bFromZBuffer)
|
|
|
|
{
|
|
|
|
format |= _GX_TF_ZTF;
|
|
|
|
if (copyfmt == 11)
|
|
|
|
format = GX_TF_Z16;
|
|
|
|
else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
|
|
|
|
format |= _GX_TF_CTF;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
|
|
|
|
format |= _GX_TF_CTF;
|
|
|
|
|
|
|
|
LPDIRECT3DPIXELSHADER9 texconv_shader = GetOrCreateEncodingShader(format);
|
|
|
|
if (!texconv_shader)
|
|
|
|
return;
|
|
|
|
|
|
|
|
u8 *dest_ptr = Memory_GetPtr(address);
|
|
|
|
|
2010-10-22 19:40:05 +00:00
|
|
|
LPDIRECT3DTEXTURE9 source_texture = bFromZBuffer ? g_framebufferManager.GetEFBDepthTexture() : g_framebufferManager.GetEFBColorTexture();
|
2010-06-09 01:37:08 +00:00
|
|
|
int width = (source.right - source.left) >> bScaleByHalf;
|
|
|
|
int height = (source.bottom - source.top) >> bScaleByHalf;
|
|
|
|
|
|
|
|
int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
|
|
|
|
|
|
|
|
// Invalidate any existing texture covering this memory range.
|
|
|
|
// TODO - don't delete the texture if it already exists, just replace the contents.
|
2010-09-28 02:15:02 +00:00
|
|
|
TextureCache::InvalidateRange(address, size_in_bytes);
|
2010-06-09 01:37:08 +00:00
|
|
|
|
|
|
|
u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1;
|
|
|
|
u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1;
|
|
|
|
u16 samples = TextureConversionShader::GetEncodedSampleCount(format);
|
|
|
|
|
|
|
|
// only copy on cache line boundaries
|
|
|
|
// extra pixels are copied but not displayed in the resulting texture
|
|
|
|
s32 expandedWidth = (width + blkW) & (~blkW);
|
|
|
|
s32 expandedHeight = (height + blkH) & (~blkH);
|
|
|
|
|
|
|
|
float MValueX = Renderer::GetTargetScaleX();
|
|
|
|
float MValueY = Renderer::GetTargetScaleY();
|
|
|
|
|
|
|
|
float Xstride = (float)((Renderer::GetFullTargetWidth() - Renderer::GetTargetWidth()) / 2);
|
|
|
|
float Ystride = (float)((Renderer::GetFullTargetHeight() - Renderer::GetTargetHeight()) / 2);
|
|
|
|
|
|
|
|
float sampleStride = bScaleByHalf?2.0f:1.0f;
|
|
|
|
|
|
|
|
TextureConversionShader::SetShaderParameters(
|
|
|
|
(float)expandedWidth,
|
|
|
|
expandedHeight * MValueY,
|
|
|
|
source.left * MValueX + Xstride ,
|
|
|
|
source.top * MValueY + Ystride,
|
|
|
|
sampleStride * MValueX,
|
|
|
|
sampleStride * MValueY,
|
|
|
|
(float)Renderer::GetFullTargetWidth(),
|
|
|
|
(float)Renderer::GetFullTargetHeight());
|
|
|
|
|
|
|
|
TargetRectangle scaledSource;
|
|
|
|
scaledSource.top = 0;
|
|
|
|
scaledSource.bottom = expandedHeight;
|
|
|
|
scaledSource.left = 0;
|
|
|
|
scaledSource.right = expandedWidth / samples;
|
|
|
|
int cacheBytes = 32;
|
|
|
|
if ((format & 0x0f) == 6)
|
|
|
|
cacheBytes = 64;
|
|
|
|
|
|
|
|
int readStride = (expandedWidth * cacheBytes) / TexDecoder_GetBlockWidthInTexels(format);
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
Renderer::ResetAPIState();
|
2010-09-28 02:15:02 +00:00
|
|
|
EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, readStride, true, bScaleByHalf > 0);
|
|
|
|
D3D::dev->SetRenderTarget(0, g_framebufferManager.GetEFBColorRTSurface());
|
|
|
|
D3D::dev->SetDepthStencilSurface(g_framebufferManager.GetEFBDepthRTSurface());
|
2010-09-30 15:24:34 +00:00
|
|
|
Renderer::RestoreAPIState();
|
2010-06-09 01:37:08 +00:00
|
|
|
}
|
|
|
|
|
2010-07-09 20:56:16 +00:00
|
|
|
u64 EncodeToRamFromTexture(u32 address,LPDIRECT3DTEXTURE9 source_texture,u32 SourceW, u32 SourceH,float MValueX,float MValueY,float Xstride, float Ystride , bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source)
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
{
|
|
|
|
u32 format = copyfmt;
|
|
|
|
|
|
|
|
if (bFromZBuffer)
|
|
|
|
{
|
|
|
|
format |= _GX_TF_ZTF;
|
|
|
|
if (copyfmt == 11)
|
|
|
|
format = GX_TF_Z16;
|
|
|
|
else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
|
|
|
|
format |= _GX_TF_CTF;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
|
|
|
|
format |= _GX_TF_CTF;
|
|
|
|
|
|
|
|
LPDIRECT3DPIXELSHADER9 texconv_shader = GetOrCreateEncodingShader(format);
|
|
|
|
if (!texconv_shader)
|
2010-07-09 20:56:16 +00:00
|
|
|
return 0;
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
|
|
|
|
u8 *dest_ptr = Memory_GetPtr(address);
|
|
|
|
|
|
|
|
int width = (source.right - source.left) >> bScaleByHalf;
|
|
|
|
int height = (source.bottom - source.top) >> bScaleByHalf;
|
|
|
|
|
|
|
|
int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
|
|
|
|
|
|
|
|
u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1;
|
|
|
|
u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1;
|
|
|
|
u16 samples = TextureConversionShader::GetEncodedSampleCount(format);
|
|
|
|
|
|
|
|
// only copy on cache line boundaries
|
|
|
|
// extra pixels are copied but not displayed in the resulting texture
|
|
|
|
s32 expandedWidth = (width + blkW) & (~blkW);
|
|
|
|
s32 expandedHeight = (height + blkH) & (~blkH);
|
|
|
|
|
|
|
|
float sampleStride = bScaleByHalf?2.0f:1.0f;
|
|
|
|
|
|
|
|
TextureConversionShader::SetShaderParameters(
|
|
|
|
(float)expandedWidth,
|
|
|
|
expandedHeight * MValueY,
|
|
|
|
source.left * MValueX + Xstride ,
|
|
|
|
source.top * MValueY + Ystride,
|
|
|
|
sampleStride * MValueX,
|
|
|
|
sampleStride * MValueY,
|
|
|
|
(float)SourceW,
|
|
|
|
(float)SourceH);
|
|
|
|
|
|
|
|
TargetRectangle scaledSource;
|
|
|
|
scaledSource.top = 0;
|
|
|
|
scaledSource.bottom = expandedHeight;
|
|
|
|
scaledSource.left = 0;
|
|
|
|
scaledSource.right = expandedWidth / samples;
|
|
|
|
int cacheBytes = 32;
|
|
|
|
if ((format & 0x0f) == 6)
|
|
|
|
cacheBytes = 64;
|
|
|
|
|
|
|
|
int readStride = (expandedWidth * cacheBytes) / TexDecoder_GetBlockWidthInTexels(format);
|
2010-09-30 15:24:34 +00:00
|
|
|
EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, readStride, true, bScaleByHalf > 0);
|
2010-07-09 20:56:16 +00:00
|
|
|
TextureCache::MakeRangeDynamic(address,size_in_bytes);
|
2010-09-30 15:24:34 +00:00
|
|
|
return GetHash64(dest_ptr,size_in_bytes,g_ActiveConfig.iSafeTextureCache_ColorSamples);
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
}
|
|
|
|
|
2010-09-28 02:15:02 +00:00
|
|
|
void EncodeToRamYUYV(LPDIRECT3DTEXTURE9 srcTexture, const TargetRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight)
|
2010-06-09 01:37:08 +00:00
|
|
|
{
|
|
|
|
TextureConversionShader::SetShaderParameters(
|
|
|
|
(float)dstWidth,
|
|
|
|
(float)dstHeight,
|
|
|
|
0.0f ,
|
|
|
|
0.0f,
|
|
|
|
1.0f,
|
|
|
|
1.0f,
|
|
|
|
(float)Renderer::GetFullTargetWidth(),
|
|
|
|
(float)Renderer::GetFullTargetHeight());
|
ok, here goes a really experimental commit:
replace efb to ram implementation by a hybrid approach.
explanation:
when copying from efb to texture, instead of make a copy to a texture or to the ram, copy the data to both, in hi quality to the texture and in native quality to the ram.
then instead of re-decoding the data from ram (very slow) use the data in the texture.
to improve this even more, test if the cpu has modified the data in the ram copy, if so, update the texture in memory and mark it as dynamic to avoid redundant work in future frames.
having all this implemented this is what is archived:
sms: full quality with scaled efb copies and fully functional goop cleaning :)
ztp: efb to texture speed with full map support.
nsmbw: this is a hard to emulate game, as it make a lot of shading and texture modification in cpu. it only have 35 fps in my system with new efb to ram but is 10 fps faster than normal efb to ram.
this game also show me another unimplemented feature, copy efb to multiple textures at the same time (is used to animate coins and other things in the world).
this is a remaining todo in efb to texture.
a lot of games should improve, so please test and let me know any regresion caused by this commit.
if everyone likes this the next step is, implement efb to multilpe textures and merge efb to ram and efb to texture.
then port to the other plugins.
enjoy.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5846 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-07-06 22:27:13 +00:00
|
|
|
Renderer::ResetAPIState();
|
2010-06-09 01:37:08 +00:00
|
|
|
EncodeToRamUsingShader(s_rgbToYuyvProgram, srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, false);
|
2010-09-28 02:15:02 +00:00
|
|
|
D3D::dev->SetRenderTarget(0, g_framebufferManager.GetEFBColorRTSurface());
|
|
|
|
D3D::dev->SetDepthStencilSurface(g_framebufferManager.GetEFBDepthRTSurface());
|
|
|
|
Renderer::RestoreAPIState();
|
2010-06-09 01:37:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Should be scale free.
|
|
|
|
void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, LPDIRECT3DTEXTURE9 destTexture)
|
|
|
|
{
|
|
|
|
u8* srcAddr = Memory_GetPtr(xfbAddr);
|
|
|
|
if (!srcAddr)
|
|
|
|
{
|
|
|
|
WARN_LOG(VIDEO, "Tried to decode from invalid memory address");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int srcFmtWidth = srcWidth / 2;
|
2010-09-30 15:24:34 +00:00
|
|
|
|
2010-06-09 01:37:08 +00:00
|
|
|
Renderer::ResetAPIState(); // reset any game specific settings
|
|
|
|
LPDIRECT3DTEXTURE9 s_srcTexture = D3D::CreateTexture2D(srcAddr, srcFmtWidth, srcHeight, srcFmtWidth, D3DFMT_A8R8G8B8, false);
|
|
|
|
LPDIRECT3DSURFACE9 Rendersurf = NULL;
|
|
|
|
destTexture->GetSurfaceLevel(0,&Rendersurf);
|
|
|
|
D3D::dev->SetDepthStencilSurface(NULL);
|
2010-09-28 02:15:02 +00:00
|
|
|
D3D::dev->SetRenderTarget(0, Rendersurf);
|
2010-06-09 01:37:08 +00:00
|
|
|
|
|
|
|
D3DVIEWPORT9 vp;
|
|
|
|
|
|
|
|
// Stretch picture with increased internal resolution
|
|
|
|
vp.X = 0;
|
|
|
|
vp.Y = 0;
|
|
|
|
vp.Width = srcWidth;
|
|
|
|
vp.Height = srcHeight;
|
|
|
|
vp.MinZ = 0.0f;
|
|
|
|
vp.MaxZ = 1.0f;
|
|
|
|
D3D::dev->SetViewport(&vp);
|
|
|
|
RECT destrect;
|
|
|
|
destrect.bottom = srcHeight;
|
|
|
|
destrect.left = 0;
|
|
|
|
destrect.right = srcWidth;
|
|
|
|
destrect.top = 0;
|
|
|
|
|
|
|
|
RECT sourcerect;
|
|
|
|
sourcerect.bottom = srcHeight;
|
|
|
|
sourcerect.left = 0;
|
|
|
|
sourcerect.right = srcFmtWidth;
|
|
|
|
sourcerect.top = 0;
|
|
|
|
|
|
|
|
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
|
|
|
|
D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
|
|
|
|
|
|
|
|
TextureConversionShader::SetShaderParameters(
|
|
|
|
(float)srcFmtWidth,
|
|
|
|
(float)srcHeight,
|
|
|
|
0.0f ,
|
|
|
|
0.0f,
|
|
|
|
1.0f,
|
|
|
|
1.0f,
|
|
|
|
(float)srcFmtWidth,
|
|
|
|
(float)srcHeight);
|
|
|
|
D3D::drawShadedTexQuad(
|
|
|
|
s_srcTexture,
|
|
|
|
&sourcerect,
|
|
|
|
1 ,
|
|
|
|
1,
|
|
|
|
srcWidth,
|
|
|
|
srcHeight,
|
|
|
|
s_yuyvToRgbProgram,
|
2010-09-28 02:15:02 +00:00
|
|
|
VertexShaderCache::GetSimpleVertexShader(0));
|
2010-06-09 01:37:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
|
|
|
|
D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
|
|
|
|
D3D::SetTexture(0,NULL);
|
2010-09-28 02:15:02 +00:00
|
|
|
D3D::dev->SetRenderTarget(0, g_framebufferManager.GetEFBColorRTSurface());
|
|
|
|
D3D::dev->SetDepthStencilSurface(g_framebufferManager.GetEFBDepthRTSurface());
|
2010-06-09 01:37:08 +00:00
|
|
|
Renderer::RestoreAPIState();
|
|
|
|
Rendersurf->Release();
|
2010-09-28 02:15:02 +00:00
|
|
|
s_srcTexture->Release();
|
2010-06-09 01:37:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|