ADDED asm-filters thread-safe patches by chrono.
ADDED maxCpuCores option to ini file and autodetect best value at first run.
This commit is contained in:
parent
911450eff1
commit
f4170784b3
|
@ -63,9 +63,6 @@ Important:
|
||||||
|
|
||||||
- Improve automatic 64k/128k flash save detection
|
- Improve automatic 64k/128k flash save detection
|
||||||
|
|
||||||
- HQ3x/4x ASM implementation produces wrong interpolation on the image's border
|
|
||||||
- This has already been fixed in the C version; look at hq_base.h / line 343 - 372. The ASM version most likely only has something like skipLine instead of skipLinePlus and skipLineMinus, which is however necessary in order to work correctly.
|
|
||||||
|
|
||||||
- Fix OpenGL issues
|
- Fix OpenGL issues
|
||||||
|
|
||||||
- Remove 16 bit hack for filters
|
- Remove 16 bit hack for filters
|
||||||
|
@ -107,7 +104,7 @@ Performance:
|
||||||
|
|
||||||
- Apply pixel filter only to changed parts of the image
|
- Apply pixel filter only to changed parts of the image
|
||||||
|
|
||||||
- Make use of multi-core CPUs
|
- Make even more use of multi-core CPUs
|
||||||
|
|
||||||
- Make use of 64 bit CPUs
|
- Make use of 64 bit CPUs
|
||||||
|
|
||||||
|
@ -150,4 +147,6 @@ Jonas Quinn
|
||||||
Spacy
|
Spacy
|
||||||
|
|
||||||
Special Thanks go to:
|
Special Thanks go to:
|
||||||
chrono - ASM genius, fixed a bug in the HQ3x/4x filters and made them thread-safe
|
chrono:
|
||||||
|
ASM genius, fixed a bug in the HQ3x/4x filters.
|
||||||
|
Made HQ3x/4x and Bilinear filters (ASM versions) thread-safe
|
||||||
|
|
|
@ -24,12 +24,6 @@
|
||||||
|
|
||||||
#include "System.h"
|
#include "System.h"
|
||||||
|
|
||||||
static u8 row_cur[3*322];
|
|
||||||
static u8 row_next[3*322];
|
|
||||||
|
|
||||||
static u8 *rgb_row_cur = row_cur;
|
|
||||||
static u8 *rgb_row_next = row_next;
|
|
||||||
|
|
||||||
#define RGB(r,g,b) ((r)>>3) << systemRedShift |\
|
#define RGB(r,g,b) ((r)>>3) << systemRedShift |\
|
||||||
((g) >> 3) << systemGreenShift |\
|
((g) >> 3) << systemGreenShift |\
|
||||||
((b) >> 3) << systemBlueShift\
|
((b) >> 3) << systemBlueShift\
|
||||||
|
@ -83,6 +77,11 @@ static void fill_rgb_row_32(u32 *from, int src_width, u8 *row, int width)
|
||||||
void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||||
{
|
{
|
||||||
|
u8 row_cur[3*322];
|
||||||
|
u8 row_next[3*322];
|
||||||
|
u8 *rgb_row_cur = row_cur;
|
||||||
|
u8 *rgb_row_next = row_next;
|
||||||
|
|
||||||
u16 *to = (u16 *)dstPtr;
|
u16 *to = (u16 *)dstPtr;
|
||||||
u16 *to_odd = (u16 *)(dstPtr + dstPitch);
|
u16 *to_odd = (u16 *)(dstPtr + dstPitch);
|
||||||
|
|
||||||
|
@ -161,6 +160,11 @@ void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||||
{
|
{
|
||||||
|
u8 row_cur[3*322];
|
||||||
|
u8 row_next[3*322];
|
||||||
|
u8 *rgb_row_cur = row_cur;
|
||||||
|
u8 *rgb_row_next = row_next;
|
||||||
|
|
||||||
u16 *to = (u16 *)dstPtr;
|
u16 *to = (u16 *)dstPtr;
|
||||||
u16 *to_odd = (u16 *)(dstPtr + dstPitch);
|
u16 *to_odd = (u16 *)(dstPtr + dstPitch);
|
||||||
|
|
||||||
|
@ -250,6 +254,11 @@ void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||||
{
|
{
|
||||||
|
u8 row_cur[3*322];
|
||||||
|
u8 row_next[3*322];
|
||||||
|
u8 *rgb_row_cur = row_cur;
|
||||||
|
u8 *rgb_row_next = row_next;
|
||||||
|
|
||||||
u32 *to = (u32 *)dstPtr;
|
u32 *to = (u32 *)dstPtr;
|
||||||
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
|
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
|
||||||
|
|
||||||
|
@ -330,6 +339,11 @@ void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||||
{
|
{
|
||||||
|
u8 row_cur[3*322];
|
||||||
|
u8 row_next[3*322];
|
||||||
|
u8 *rgb_row_cur = row_cur;
|
||||||
|
u8 *rgb_row_next = row_next;
|
||||||
|
|
||||||
u32 *to = (u32 *)dstPtr;
|
u32 *to = (u32 *)dstPtr;
|
||||||
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
|
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -21,7 +21,7 @@
|
||||||
#ifndef NO_D3D
|
#ifndef NO_D3D
|
||||||
|
|
||||||
// The number of pixel-filter threads to be created
|
// The number of pixel-filter threads to be created
|
||||||
#define NTHREADS 4
|
#define NTHREADS ( theApp.maxCpuCores )
|
||||||
|
|
||||||
#pragma comment( lib, "d3d9" )
|
#pragma comment( lib, "d3d9" )
|
||||||
#pragma comment( lib, "d3dx9" )
|
#pragma comment( lib, "d3dx9" )
|
||||||
|
@ -454,7 +454,7 @@ void Direct3DDisplay::render()
|
||||||
u32 dst_bytes_per_thread = lr.Pitch * dst_height_per_thread;
|
u32 dst_bytes_per_thread = lr.Pitch * dst_height_per_thread;
|
||||||
|
|
||||||
// Use Multi Threading
|
// Use Multi Threading
|
||||||
assert( NTHREADS > 0 );
|
assert( ( NTHREADS > 0 ) && ( NTHREADS < MAXIMUM_PROCESSORS ) );
|
||||||
for( int i = ( NTHREADS - 1 ) ; i > -1 ; i-- ) {
|
for( int i = ( NTHREADS - 1 ) ; i > -1 ; i-- ) {
|
||||||
// create last thread first because it could have more work than the others (for eg. if NTHREADS = 3)
|
// create last thread first because it could have more work than the others (for eg. if NTHREADS = 3)
|
||||||
// (last thread has to process the remaining lines if (height / NTHREADS) is not an integer)
|
// (last thread has to process the remaining lines if (height / NTHREADS) is not an integer)
|
||||||
|
@ -819,7 +819,7 @@ void Direct3DDisplay::calculateDestRect()
|
||||||
rectangleFillsScreen = true; // no clear() necessary
|
rectangleFillsScreen = true; // no clear() necessary
|
||||||
destRect.left = 0;
|
destRect.left = 0;
|
||||||
destRect.top = 0;
|
destRect.top = 0;
|
||||||
destRect.right = dpp.BackBufferWidth; // for some reason there'l be a black
|
destRect.right = dpp.BackBufferWidth; // for some reason there will be a black
|
||||||
destRect.bottom = dpp.BackBufferHeight; // border line when using -1 at the end
|
destRect.bottom = dpp.BackBufferHeight; // border line when using -1 at the end
|
||||||
} else {
|
} else {
|
||||||
// use aspect ratio
|
// use aspect ratio
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
|
#include <intrin.h>
|
||||||
|
|
||||||
#include "AVIWrite.h"
|
#include "AVIWrite.h"
|
||||||
#include "LangSelect.h"
|
#include "LangSelect.h"
|
||||||
|
@ -207,16 +208,6 @@ void directXMessage(const char *msg)
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
// VBA
|
// VBA
|
||||||
|
|
||||||
BEGIN_MESSAGE_MAP(VBA, CWinApp)
|
|
||||||
//{{AFX_MSG_MAP(VBA)
|
|
||||||
// NOTE - the ClassWizard will add and remove mapping macros here.
|
|
||||||
// DO NOT EDIT what you see in these blocks of generated code!
|
|
||||||
//}}AFX_MSG_MAP
|
|
||||||
END_MESSAGE_MAP()
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
|
||||||
// VBA construction
|
|
||||||
|
|
||||||
VBA::VBA()
|
VBA::VBA()
|
||||||
{
|
{
|
||||||
// COINIT_MULTITHREADED is not supported by SHBrowseForFolder with BIF_USENEWUI
|
// COINIT_MULTITHREADED is not supported by SHBrowseForFolder with BIF_USENEWUI
|
||||||
|
@ -229,6 +220,7 @@ VBA::VBA()
|
||||||
mode800Available = false;
|
mode800Available = false;
|
||||||
mode1024Available = false;
|
mode1024Available = false;
|
||||||
mode1280Available = false;
|
mode1280Available = false;
|
||||||
|
maxCpuCores = 1;
|
||||||
windowPositionX = 0;
|
windowPositionX = 0;
|
||||||
windowPositionY = 0;
|
windowPositionY = 0;
|
||||||
filterFunction = NULL;
|
filterFunction = NULL;
|
||||||
|
@ -1486,6 +1478,14 @@ void VBA::loadSettings()
|
||||||
if(windowPositionY < 0)
|
if(windowPositionY < 0)
|
||||||
windowPositionY = 0;
|
windowPositionY = 0;
|
||||||
|
|
||||||
|
maxCpuCores = regQueryDwordValue("maxCpuCores", 0);
|
||||||
|
if(maxCpuCores < 0) {
|
||||||
|
maxCpuCores = 0;
|
||||||
|
}
|
||||||
|
if(maxCpuCores == 0) {
|
||||||
|
maxCpuCores = detectCpuCores();
|
||||||
|
}
|
||||||
|
|
||||||
useBiosFileGBA = ( regQueryDwordValue("useBiosGBA", 0) == 1 ) ? true : false;
|
useBiosFileGBA = ( regQueryDwordValue("useBiosGBA", 0) == 1 ) ? true : false;
|
||||||
|
|
||||||
useBiosFileGB = ( regQueryDwordValue("useBiosGB", 0) == 1 ) ? true : false;
|
useBiosFileGB = ( regQueryDwordValue("useBiosGB", 0) == 1 ) ? true : false;
|
||||||
|
@ -2474,6 +2474,8 @@ void VBA::saveSettings()
|
||||||
regSetDwordValue("windowX", windowPositionX);
|
regSetDwordValue("windowX", windowPositionX);
|
||||||
regSetDwordValue("windowY", windowPositionY);
|
regSetDwordValue("windowY", windowPositionY);
|
||||||
|
|
||||||
|
regSetDwordValue("maxCpuCores", maxCpuCores);
|
||||||
|
|
||||||
regSetDwordValue("useBiosGBA", useBiosFileGBA);
|
regSetDwordValue("useBiosGBA", useBiosFileGBA);
|
||||||
|
|
||||||
regSetDwordValue("useBiosGB", useBiosFileGB);
|
regSetDwordValue("useBiosGB", useBiosFileGB);
|
||||||
|
@ -2587,6 +2589,17 @@ void VBA::saveSettings()
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int VBA::detectCpuCores()
|
||||||
|
{
|
||||||
|
int CPUInfo[4];
|
||||||
|
|
||||||
|
__cpuid( CPUInfo, 1 );
|
||||||
|
|
||||||
|
int processor_count = ( CPUInfo[1] & 0x00FF0000 ) >> 16;
|
||||||
|
|
||||||
|
return processor_count;
|
||||||
|
}
|
||||||
|
|
||||||
void winSignal(int, int)
|
void winSignal(int, int)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,6 +79,7 @@ class VBA : public CWinApp
|
||||||
bool mode800Available;
|
bool mode800Available;
|
||||||
bool mode1024Available;
|
bool mode1024Available;
|
||||||
bool mode1280Available;
|
bool mode1280Available;
|
||||||
|
int maxCpuCores; // maximum number of CPU cores VBA should use, 0 means auto-detect
|
||||||
int windowPositionX;
|
int windowPositionX;
|
||||||
int windowPositionY;
|
int windowPositionY;
|
||||||
void (*filterFunction)(u8*,u32,u8*,u8*,u32,int,int);
|
void (*filterFunction)(u8*,u32,u8*,u8*,u32,int,int);
|
||||||
|
@ -262,13 +263,10 @@ class VBA : public CWinApp
|
||||||
void updateFrameSkip();
|
void updateFrameSkip();
|
||||||
void loadSettings();
|
void loadSettings();
|
||||||
void addRecentFile(CString file);
|
void addRecentFile(CString file);
|
||||||
//{{AFX_MSG(VBA)
|
|
||||||
afx_msg void OnAppAbout();
|
private:
|
||||||
// NOTE - the ClassWizard will add and remove member functions here.
|
int detectCpuCores();
|
||||||
// DO NOT EDIT what you see in these blocks of generated code !
|
};
|
||||||
//}}AFX_MSG
|
|
||||||
DECLARE_MESSAGE_MAP()
|
|
||||||
};
|
|
||||||
|
|
||||||
extern VBA theApp;
|
extern VBA theApp;
|
||||||
extern int emulating;
|
extern int emulating;
|
||||||
|
|
Loading…
Reference in New Issue