ADDED asm-filters thread-safe patches by chrono.
ADDED maxCpuCores option to ini file and autodetect best value at first run.
This commit is contained in:
parent
911450eff1
commit
f4170784b3
|
@ -63,9 +63,6 @@ Important:
|
|||
|
||||
- Improve automatic 64k/128k flash save detection
|
||||
|
||||
- HQ3x/4x ASM implementation produces wrong interpolation on the image's border
|
||||
- This has already been fixed in the C version; look at hq_base.h / line 343 - 372. The ASM version most likely only has something like skipLine instead of skipLinePlus and skipLineMinus, which is however necessary in order to work correctly.
|
||||
|
||||
- Fix OpenGL issues
|
||||
|
||||
- Remove 16 bit hack for filters
|
||||
|
@ -107,7 +104,7 @@ Performance:
|
|||
|
||||
- Apply pixel filter only to changed parts of the image
|
||||
|
||||
- Make use of multi-core CPUs
|
||||
- Make even more use of multi-core CPUs
|
||||
|
||||
- Make use of 64 bit CPUs
|
||||
|
||||
|
@ -150,4 +147,6 @@ Jonas Quinn
|
|||
Spacy
|
||||
|
||||
Special Thanks go to:
|
||||
chrono - ASM genius, fixed a bug in the HQ3x/4x filters and made them thread-safe
|
||||
chrono:
|
||||
ASM genius, fixed a bug in the HQ3x/4x filters.
|
||||
Made HQ3x/4x and Bilinear filters (ASM versions) thread-safe
|
||||
|
|
|
@ -24,12 +24,6 @@
|
|||
|
||||
#include "System.h"
|
||||
|
||||
static u8 row_cur[3*322];
|
||||
static u8 row_next[3*322];
|
||||
|
||||
static u8 *rgb_row_cur = row_cur;
|
||||
static u8 *rgb_row_next = row_next;
|
||||
|
||||
#define RGB(r,g,b) ((r)>>3) << systemRedShift |\
|
||||
((g) >> 3) << systemGreenShift |\
|
||||
((b) >> 3) << systemBlueShift\
|
||||
|
@ -83,6 +77,11 @@ static void fill_rgb_row_32(u32 *from, int src_width, u8 *row, int width)
|
|||
void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||
{
|
||||
u8 row_cur[3*322];
|
||||
u8 row_next[3*322];
|
||||
u8 *rgb_row_cur = row_cur;
|
||||
u8 *rgb_row_next = row_next;
|
||||
|
||||
u16 *to = (u16 *)dstPtr;
|
||||
u16 *to_odd = (u16 *)(dstPtr + dstPitch);
|
||||
|
||||
|
@ -161,6 +160,11 @@ void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
|||
void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||
{
|
||||
u8 row_cur[3*322];
|
||||
u8 row_next[3*322];
|
||||
u8 *rgb_row_cur = row_cur;
|
||||
u8 *rgb_row_next = row_next;
|
||||
|
||||
u16 *to = (u16 *)dstPtr;
|
||||
u16 *to_odd = (u16 *)(dstPtr + dstPitch);
|
||||
|
||||
|
@ -250,6 +254,11 @@ void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
|||
void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||
{
|
||||
u8 row_cur[3*322];
|
||||
u8 row_next[3*322];
|
||||
u8 *rgb_row_cur = row_cur;
|
||||
u8 *rgb_row_next = row_next;
|
||||
|
||||
u32 *to = (u32 *)dstPtr;
|
||||
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
|
||||
|
||||
|
@ -330,6 +339,11 @@ void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
|||
void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
|
||||
u8 *dstPtr, u32 dstPitch, int width, int height)
|
||||
{
|
||||
u8 row_cur[3*322];
|
||||
u8 row_next[3*322];
|
||||
u8 *rgb_row_cur = row_cur;
|
||||
u8 *rgb_row_next = row_next;
|
||||
|
||||
u32 *to = (u32 *)dstPtr;
|
||||
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -21,7 +21,7 @@
|
|||
#ifndef NO_D3D
|
||||
|
||||
// The number of pixel-filter threads to be created
|
||||
#define NTHREADS 4
|
||||
#define NTHREADS ( theApp.maxCpuCores )
|
||||
|
||||
#pragma comment( lib, "d3d9" )
|
||||
#pragma comment( lib, "d3dx9" )
|
||||
|
@ -454,7 +454,7 @@ void Direct3DDisplay::render()
|
|||
u32 dst_bytes_per_thread = lr.Pitch * dst_height_per_thread;
|
||||
|
||||
// Use Multi Threading
|
||||
assert( NTHREADS > 0 );
|
||||
assert( ( NTHREADS > 0 ) && ( NTHREADS < MAXIMUM_PROCESSORS ) );
|
||||
for( int i = ( NTHREADS - 1 ) ; i > -1 ; i-- ) {
|
||||
// create last thread first because it could have more work than the others (for eg. if NTHREADS = 3)
|
||||
// (last thread has to process the remaining lines if (height / NTHREADS) is not an integer)
|
||||
|
@ -819,7 +819,7 @@ void Direct3DDisplay::calculateDestRect()
|
|||
rectangleFillsScreen = true; // no clear() necessary
|
||||
destRect.left = 0;
|
||||
destRect.top = 0;
|
||||
destRect.right = dpp.BackBufferWidth; // for some reason there'l be a black
|
||||
destRect.right = dpp.BackBufferWidth; // for some reason there will be a black
|
||||
destRect.bottom = dpp.BackBufferHeight; // border line when using -1 at the end
|
||||
} else {
|
||||
// use aspect ratio
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#endif
|
||||
|
||||
#include "stdafx.h"
|
||||
#include <intrin.h>
|
||||
|
||||
#include "AVIWrite.h"
|
||||
#include "LangSelect.h"
|
||||
|
@ -207,16 +208,6 @@ void directXMessage(const char *msg)
|
|||
/////////////////////////////////////////////////////////////////////////////
|
||||
// VBA
|
||||
|
||||
BEGIN_MESSAGE_MAP(VBA, CWinApp)
|
||||
//{{AFX_MSG_MAP(VBA)
|
||||
// NOTE - the ClassWizard will add and remove mapping macros here.
|
||||
// DO NOT EDIT what you see in these blocks of generated code!
|
||||
//}}AFX_MSG_MAP
|
||||
END_MESSAGE_MAP()
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// VBA construction
|
||||
|
||||
VBA::VBA()
|
||||
{
|
||||
// COINIT_MULTITHREADED is not supported by SHBrowseForFolder with BIF_USENEWUI
|
||||
|
@ -229,6 +220,7 @@ VBA::VBA()
|
|||
mode800Available = false;
|
||||
mode1024Available = false;
|
||||
mode1280Available = false;
|
||||
maxCpuCores = 1;
|
||||
windowPositionX = 0;
|
||||
windowPositionY = 0;
|
||||
filterFunction = NULL;
|
||||
|
@ -1486,6 +1478,14 @@ void VBA::loadSettings()
|
|||
if(windowPositionY < 0)
|
||||
windowPositionY = 0;
|
||||
|
||||
maxCpuCores = regQueryDwordValue("maxCpuCores", 0);
|
||||
if(maxCpuCores < 0) {
|
||||
maxCpuCores = 0;
|
||||
}
|
||||
if(maxCpuCores == 0) {
|
||||
maxCpuCores = detectCpuCores();
|
||||
}
|
||||
|
||||
useBiosFileGBA = ( regQueryDwordValue("useBiosGBA", 0) == 1 ) ? true : false;
|
||||
|
||||
useBiosFileGB = ( regQueryDwordValue("useBiosGB", 0) == 1 ) ? true : false;
|
||||
|
@ -2474,6 +2474,8 @@ void VBA::saveSettings()
|
|||
regSetDwordValue("windowX", windowPositionX);
|
||||
regSetDwordValue("windowY", windowPositionY);
|
||||
|
||||
regSetDwordValue("maxCpuCores", maxCpuCores);
|
||||
|
||||
regSetDwordValue("useBiosGBA", useBiosFileGBA);
|
||||
|
||||
regSetDwordValue("useBiosGB", useBiosFileGB);
|
||||
|
@ -2587,6 +2589,17 @@ void VBA::saveSettings()
|
|||
#endif
|
||||
}
|
||||
|
||||
int VBA::detectCpuCores()
|
||||
{
|
||||
int CPUInfo[4];
|
||||
|
||||
__cpuid( CPUInfo, 1 );
|
||||
|
||||
int processor_count = ( CPUInfo[1] & 0x00FF0000 ) >> 16;
|
||||
|
||||
return processor_count;
|
||||
}
|
||||
|
||||
void winSignal(int, int)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -79,6 +79,7 @@ class VBA : public CWinApp
|
|||
bool mode800Available;
|
||||
bool mode1024Available;
|
||||
bool mode1280Available;
|
||||
int maxCpuCores; // maximum number of CPU cores VBA should use, 0 means auto-detect
|
||||
int windowPositionX;
|
||||
int windowPositionY;
|
||||
void (*filterFunction)(u8*,u32,u8*,u8*,u32,int,int);
|
||||
|
@ -262,13 +263,10 @@ class VBA : public CWinApp
|
|||
void updateFrameSkip();
|
||||
void loadSettings();
|
||||
void addRecentFile(CString file);
|
||||
//{{AFX_MSG(VBA)
|
||||
afx_msg void OnAppAbout();
|
||||
// NOTE - the ClassWizard will add and remove member functions here.
|
||||
// DO NOT EDIT what you see in these blocks of generated code !
|
||||
//}}AFX_MSG
|
||||
DECLARE_MESSAGE_MAP()
|
||||
};
|
||||
|
||||
private:
|
||||
int detectCpuCores();
|
||||
};
|
||||
|
||||
extern VBA theApp;
|
||||
extern int emulating;
|
||||
|
|
Loading…
Reference in New Issue