ADDED asm-filters thread-safe patches by chrono.

ADDED maxCpuCores option to ini file and autodetect best value at first run.
This commit is contained in:
spacy51 2008-03-26 16:21:30 +00:00
parent 911450eff1
commit f4170784b3
8 changed files with 1116 additions and 1086 deletions

View File

@ -63,9 +63,6 @@ Important:
- Improve automatic 64k/128k flash save detection - Improve automatic 64k/128k flash save detection
- HQ3x/4x ASM implementation produces wrong interpolation on the image's border
- This has already been fixed in the C version; look at hq_base.h / line 343 - 372. The ASM version most likely only has something like skipLine instead of skipLinePlus and skipLineMinus, which is however necessary in order to work correctly.
- Fix OpenGL issues - Fix OpenGL issues
- Remove 16 bit hack for filters - Remove 16 bit hack for filters
@ -107,7 +104,7 @@ Performance:
- Apply pixel filter only to changed parts of the image - Apply pixel filter only to changed parts of the image
- Make use of multi-core CPUs - Make even more use of multi-core CPUs
- Make use of 64 bit CPUs - Make use of 64 bit CPUs
@ -150,4 +147,6 @@ Jonas Quinn
Spacy Spacy
Special Thanks go to: Special Thanks go to:
chrono - ASM genius, fixed a bug in the HQ3x/4x filters and made them thread-safe chrono:
ASM genius, fixed a bug in the HQ3x/4x filters.
Made HQ3x/4x and Bilinear filters (ASM versions) thread-safe

View File

@ -24,12 +24,6 @@
#include "System.h" #include "System.h"
static u8 row_cur[3*322];
static u8 row_next[3*322];
static u8 *rgb_row_cur = row_cur;
static u8 *rgb_row_next = row_next;
#define RGB(r,g,b) ((r)>>3) << systemRedShift |\ #define RGB(r,g,b) ((r)>>3) << systemRedShift |\
((g) >> 3) << systemGreenShift |\ ((g) >> 3) << systemGreenShift |\
((b) >> 3) << systemBlueShift\ ((b) >> 3) << systemBlueShift\
@ -83,6 +77,11 @@ static void fill_rgb_row_32(u32 *from, int src_width, u8 *row, int width)
void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height) u8 *dstPtr, u32 dstPitch, int width, int height)
{ {
u8 row_cur[3*322];
u8 row_next[3*322];
u8 *rgb_row_cur = row_cur;
u8 *rgb_row_next = row_next;
u16 *to = (u16 *)dstPtr; u16 *to = (u16 *)dstPtr;
u16 *to_odd = (u16 *)(dstPtr + dstPitch); u16 *to_odd = (u16 *)(dstPtr + dstPitch);
@ -161,6 +160,11 @@ void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height) u8 *dstPtr, u32 dstPitch, int width, int height)
{ {
u8 row_cur[3*322];
u8 row_next[3*322];
u8 *rgb_row_cur = row_cur;
u8 *rgb_row_next = row_next;
u16 *to = (u16 *)dstPtr; u16 *to = (u16 *)dstPtr;
u16 *to_odd = (u16 *)(dstPtr + dstPitch); u16 *to_odd = (u16 *)(dstPtr + dstPitch);
@ -250,6 +254,11 @@ void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height) u8 *dstPtr, u32 dstPitch, int width, int height)
{ {
u8 row_cur[3*322];
u8 row_next[3*322];
u8 *rgb_row_cur = row_cur;
u8 *rgb_row_next = row_next;
u32 *to = (u32 *)dstPtr; u32 *to = (u32 *)dstPtr;
u32 *to_odd = (u32 *)(dstPtr + dstPitch); u32 *to_odd = (u32 *)(dstPtr + dstPitch);
@ -330,6 +339,11 @@ void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height) u8 *dstPtr, u32 dstPitch, int width, int height)
{ {
u8 row_cur[3*322];
u8 row_next[3*322];
u8 *rgb_row_cur = row_cur;
u8 *rgb_row_next = row_next;
u32 *to = (u32 *)dstPtr; u32 *to = (u32 *)dstPtr;
u32 *to_odd = (u32 *)(dstPtr + dstPitch); u32 *to_odd = (u32 *)(dstPtr + dstPitch);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@
#ifndef NO_D3D #ifndef NO_D3D
// The number of pixel-filter threads to be created // The number of pixel-filter threads to be created
#define NTHREADS 4 #define NTHREADS ( theApp.maxCpuCores )
#pragma comment( lib, "d3d9" ) #pragma comment( lib, "d3d9" )
#pragma comment( lib, "d3dx9" ) #pragma comment( lib, "d3dx9" )
@ -454,7 +454,7 @@ void Direct3DDisplay::render()
u32 dst_bytes_per_thread = lr.Pitch * dst_height_per_thread; u32 dst_bytes_per_thread = lr.Pitch * dst_height_per_thread;
// Use Multi Threading // Use Multi Threading
assert( NTHREADS > 0 ); assert( ( NTHREADS > 0 ) && ( NTHREADS < MAXIMUM_PROCESSORS ) );
for( int i = ( NTHREADS - 1 ) ; i > -1 ; i-- ) { for( int i = ( NTHREADS - 1 ) ; i > -1 ; i-- ) {
// create last thread first because it could have more work than the others (for eg. if NTHREADS = 3) // create last thread first because it could have more work than the others (for eg. if NTHREADS = 3)
// (last thread has to process the remaining lines if (height / NTHREADS) is not an integer) // (last thread has to process the remaining lines if (height / NTHREADS) is not an integer)
@ -819,7 +819,7 @@ void Direct3DDisplay::calculateDestRect()
rectangleFillsScreen = true; // no clear() necessary rectangleFillsScreen = true; // no clear() necessary
destRect.left = 0; destRect.left = 0;
destRect.top = 0; destRect.top = 0;
destRect.right = dpp.BackBufferWidth; // for some reason there'l be a black destRect.right = dpp.BackBufferWidth; // for some reason there will be a black
destRect.bottom = dpp.BackBufferHeight; // border line when using -1 at the end destRect.bottom = dpp.BackBufferHeight; // border line when using -1 at the end
} else { } else {
// use aspect ratio // use aspect ratio

View File

@ -24,6 +24,7 @@
#endif #endif
#include "stdafx.h" #include "stdafx.h"
#include <intrin.h>
#include "AVIWrite.h" #include "AVIWrite.h"
#include "LangSelect.h" #include "LangSelect.h"
@ -207,16 +208,6 @@ void directXMessage(const char *msg)
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// VBA // VBA
BEGIN_MESSAGE_MAP(VBA, CWinApp)
//{{AFX_MSG_MAP(VBA)
// NOTE - the ClassWizard will add and remove mapping macros here.
// DO NOT EDIT what you see in these blocks of generated code!
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// VBA construction
VBA::VBA() VBA::VBA()
{ {
// COINIT_MULTITHREADED is not supported by SHBrowseForFolder with BIF_USENEWUI // COINIT_MULTITHREADED is not supported by SHBrowseForFolder with BIF_USENEWUI
@ -229,6 +220,7 @@ VBA::VBA()
mode800Available = false; mode800Available = false;
mode1024Available = false; mode1024Available = false;
mode1280Available = false; mode1280Available = false;
maxCpuCores = 1;
windowPositionX = 0; windowPositionX = 0;
windowPositionY = 0; windowPositionY = 0;
filterFunction = NULL; filterFunction = NULL;
@ -1486,6 +1478,14 @@ void VBA::loadSettings()
if(windowPositionY < 0) if(windowPositionY < 0)
windowPositionY = 0; windowPositionY = 0;
maxCpuCores = regQueryDwordValue("maxCpuCores", 0);
if(maxCpuCores < 0) {
maxCpuCores = 0;
}
if(maxCpuCores == 0) {
maxCpuCores = detectCpuCores();
}
useBiosFileGBA = ( regQueryDwordValue("useBiosGBA", 0) == 1 ) ? true : false; useBiosFileGBA = ( regQueryDwordValue("useBiosGBA", 0) == 1 ) ? true : false;
useBiosFileGB = ( regQueryDwordValue("useBiosGB", 0) == 1 ) ? true : false; useBiosFileGB = ( regQueryDwordValue("useBiosGB", 0) == 1 ) ? true : false;
@ -2474,6 +2474,8 @@ void VBA::saveSettings()
regSetDwordValue("windowX", windowPositionX); regSetDwordValue("windowX", windowPositionX);
regSetDwordValue("windowY", windowPositionY); regSetDwordValue("windowY", windowPositionY);
regSetDwordValue("maxCpuCores", maxCpuCores);
regSetDwordValue("useBiosGBA", useBiosFileGBA); regSetDwordValue("useBiosGBA", useBiosFileGBA);
regSetDwordValue("useBiosGB", useBiosFileGB); regSetDwordValue("useBiosGB", useBiosFileGB);
@ -2587,6 +2589,17 @@ void VBA::saveSettings()
#endif #endif
} }
int VBA::detectCpuCores()
{
int CPUInfo[4];
__cpuid( CPUInfo, 1 );
int processor_count = ( CPUInfo[1] & 0x00FF0000 ) >> 16;
return processor_count;
}
void winSignal(int, int) void winSignal(int, int)
{ {
} }

View File

@ -79,6 +79,7 @@ class VBA : public CWinApp
bool mode800Available; bool mode800Available;
bool mode1024Available; bool mode1024Available;
bool mode1280Available; bool mode1280Available;
int maxCpuCores; // maximum number of CPU cores VBA should use, 0 means auto-detect
int windowPositionX; int windowPositionX;
int windowPositionY; int windowPositionY;
void (*filterFunction)(u8*,u32,u8*,u8*,u32,int,int); void (*filterFunction)(u8*,u32,u8*,u8*,u32,int,int);
@ -262,13 +263,10 @@ class VBA : public CWinApp
void updateFrameSkip(); void updateFrameSkip();
void loadSettings(); void loadSettings();
void addRecentFile(CString file); void addRecentFile(CString file);
//{{AFX_MSG(VBA)
afx_msg void OnAppAbout(); private:
// NOTE - the ClassWizard will add and remove member functions here. int detectCpuCores();
// DO NOT EDIT what you see in these blocks of generated code ! };
//}}AFX_MSG
DECLARE_MESSAGE_MAP()
};
extern VBA theApp; extern VBA theApp;
extern int emulating; extern int emulating;