gsdx-ogl: merge from trunk (4990:5021)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5022 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2011-12-27 17:04:28 +00:00
commit 720a841cb2
96 changed files with 2388 additions and 4764 deletions

View File

@ -1,35 +0,0 @@
#!/bin/sh
# Usage: sh build.sh [option]
# option can be all (rebuilds everything), clean, or nothing (incremental build)
# Modify the individual build.sh for specific program options like debug symbols
#This is just for building the plugins; pcsx2 is build using codeblocks.
#ZeroGS Normal mode
export ZEROGSOPTIONS="--enable-sse2"
#ZeroGS Debug mode
#export ZEROGSOPTIONS="--enable-debug --enable-devbuild --enable-sse2"
#ZeroSPU2 Debug mode (Don't enable right now)
#export ZEROSPU2OPTIONS="--enable-debug --enable-devbuild"
#GSnull debug options.
#export GSnullOPTIONS="--enable-debug"
option=$@
export PCSX2PLUGINS="`pwd`/bin/plugins"
curdir=`pwd`
echo
echo "Building the Pcsx2 Plugins."
echo "Note: binaries generated are 32 bit, and require 32 bit versions of all dependencies."
cd ${curdir}/plugins
sh build.sh $option
if [ $? -ne 0 ]
then
echo Error with building plugins
exit 1
fi

174
build.rb
View File

@ -1,174 +0,0 @@
#!/usr/bin/env ruby
# Usage: ruby build.rb [option] [pcsx2, plugins, <plugin name>] [dev,debug,release] [all, install, clean]
# If you don't specify pcsx2, plugins, or a plugin name, it will assume you want to rebuild everything.
# If you don't specify dev or debug, it assumes a release build.
# If it isn't all, install, or clean, it assumes install.
# If you want other options, add them to $pcsx2_build_types. This is still a work in progress...
# --arcum42
require "fileutils.rb"
include FileUtils
$main_dir = Dir.pwd
$pcsx2_install_dir = "#{$main_dir}/bin"
$plugin_install_dir = "#{$main_dir}/bin/plugins"
$pcsx2_dir = "#{$main_dir}/pcsx2"
$plugins_dir = "#{$main_dir}/plugins"
$pcsx2_prefix = " --prefix #{$main_dir}"
$plugins_prefix = " --prefix #{$plugin_install_dir}"
$plugin_list=["CDVDnull", "dev9null", "FWnull", "USBnull", "SPU2null", "zerogs", "zzogl", "zeropad", "zerospu2", "PeopsSPU2", "CDVDiso", "CDVDisoEFP", "CDVDlinuz"]
$full_plugin_list=["CDVDnull", "dev9null", "FWnull", "USBnull", "SPU2null", "zerogs", "zzogl", "zeropad", "zerospu2", "PeopsSPU2", "CDVDiso", "CDVDisoEFP", "CDVDlinuz","GSnull","PadNull","onepad"]
$pcsx2_build_types = {
"dev" => " --enable-devbuild ",
"debug" => " --enable-debug ",
"release" => " "
}
$pcsx2_release_params=["dev","debug","release"]
$make_params=["all", "clean","install"]
$build_report =""
$build_counter = 0
def plugin_src_dir(plugin_name)
name = "#{$plugins_dir}/#{plugin_name}/"
case plugin_name
when "CDVDiso" then
name += "src"
when "CDVDisoEFP" then
name += "src/Linux"
when "CDVDlinuz"
name += "Src/Linux"
when "zerogs", "zzogl"
name += "opengl"
end
return name
end
def announce(my_program)
print "---------------\n"
print "Building #{my_program}\n"
print "---------------\n"
end
def make(options)
system("make #{options}")
($? == 0)
end
def rebuild(options)
system("aclocal")
system("automake")
system("autoconf")
system("chmod +x configure")
system("./configure #{options}")
make "clean"
end
def install(build_name)
ret = make "install"
case build_name
# If the package isn't inclined to obey simple instructions...
when "CDVDisoEFP" then
system("cp #{plugin_src_dir(build_name)}/cfgCDVDisoEFP #{$plugin_install_dir}")
system("cp #{plugin_src_dir(build_name)}/libCDVDisoEFP.so #{$plugin_install_dir}")
when "CDVDlinuz" then
system("cp #{plugin_src_dir(build_name)}/cfgCDVDlinuz #{$plugin_install_dir}")
system("cp #{plugin_src_dir(build_name)}/libCDVDlinuz.so #{$plugin_install_dir}")
when "PeopsSPU2" then
system("cp #{plugin_src_dir(build_name)}/libspu2Peops*.so* #{$plugin_install_dir}")
# Copy the shaders over. Shouldn't the makefile do this?
when "zzogl","zerogs" then
system("cp #{plugin_src_dir(build_name)}/Win32/ps2hw.dat #{$plugin_install_dir}")
#And while we have the opportunity...
when "pcsx2" then
svn_revision = `svn info | grep Revision:`
svn_revision = /[0-9]+/.match(svn_revision)
system("cp #{$pcsx2_install_dir}/pcsx2 #{$pcsx2_install_dir}/pcsx2-#{svn_revision}")
end
ret
end
def build(build_name, make_parameter)
completed = true
announce "#{build_name.capitalize}"
if build_name != "pcsx2" then
build_dir = plugin_src_dir(build_name)
else
build_dir = "#{$pcsx2_dir}"
end
Dir.chdir build_dir
case make_parameter
when "all" then
if build_name == "pcsx2"
rebuild($pcsx2_prefix)
else
rebuild($plugins_prefix)
end
completed = install(build_name)
when "clean" then
make "clean"
else
completed = install(build_name)
end
Dir.chdir $main_dir
if completed then
$build_report += "#{build_name} was built successfully.\n"
$build_counter += 1
else
$build_report += "#{build_name} was not built successfully.\n"
end
end
build_parameter = "all"
make_parameter = ""
build_items = Array.new([])
ARGV.each do |x|
make_parameter = x if $make_params.include?(x)
build_items.push(x) if $full_plugin_list.include?(x) or (x == "pcsx2")
$pcsx2_prefix = $pcsx2_build_types[x] + $pcsx2_prefix if $pcsx2_release_params.include?(x)
if (x == "plugins") then
x = $plugin_list
build_items.push(x)
end
end
if build_items.empty? then
build_items.push($plugin_list)
build_items.push("pcsx2")
end
build_items.flatten!
build_items.each do |x|
build(x,make_parameter)
end
print "\n--\n"
print "Build Summary:\n"
print $build_report
print "\n"
print "#{$build_counter}/#{build_items.count} Successful.\n"

45
build.sh Executable file
View File

@ -0,0 +1,45 @@
#!/bin/bash
flags=""
args="$@"
clean_build=false
for f in $args; do
if [ "$f" = "gsdx" ] ; then
flags="$flags -DFORCE_INTERNAL_SDL=TRUE"
fi
if [ "$f" = "dev" ] ; then
flags="$flags -DCMAKE_BUILD_TYPE=Devel"
fi
if [ "$f" = "debug" ] ; then
flags="$flags -DCMAKE_BUILD_TYPE=Debug"
fi
if [ "$f" = "release" ] ; then
flags="$flags -DCMAKE_BUILD_TYPE=Release"
fi
if [ "$f" = "clean" ] ; then
clean_build=true
fi
done
rm install_log.txt
if [ $flags ]; then
echo "Building pcsx2 with $flags"
echo "Building pcsx2 with $flags" > install_log.txt
fi
if [ ! -d "build" ]; then
mkdir build
fi
cd build
cmake $flags .. 2>&1 | tee -a ../install_log.txt
if [ $clean_build = true ]; then
echo "Doing a clean build."
make clean 2>&1 | tee -a ../install_log.txt
fi
make 2>&1 | tee -a ../install_log.txt
make install 2>&1 | tee -a ../install_log.txt
cd ..

View File

@ -18,6 +18,7 @@
# Installation path : -DPACKAGE_MODE=TRUE(follow FHS)|FALSE(local bin/)
# Plugin installation path : -DPLUGIN_DIR="/usr/lib/pcsx2"
# Game DB installation path : -DGAMEINDEX_DIR="/var/games/pcsx2"
# Follow XDG standard : -DXDG_STD=TRUE|FALSE
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
@ -60,11 +61,7 @@ if(FORCE_INTERNAL_ALL)
endif(FORCE_INTERNAL_ALL)
if(NOT DEFINED FORCE_INTERNAL_SOUNDTOUCH)
set(FORCE_INTERNAL_SOUNDTOUCH TRUE)
message(STATUS "Use internal version of Soundtouch by default.
Note: There have been issues in the past with sound quality depending on the version of Soundtouch
Use -DFORCE_INTERNAL_SOUNDTOUCH=FALSE at your own risk")
# set(FORCE_INTERNAL_SOUNDTOUCH FALSE)
set(FORCE_INTERNAL_SOUNDTOUCH FALSE)
endif(NOT DEFINED FORCE_INTERNAL_SOUNDTOUCH)
if(NOT DEFINED FORCE_INTERNAL_ZLIB)
@ -79,6 +76,10 @@ if (FORCE_INTERNAL_SDL)
Crashes can be expected and no support will be provided")
endif (FORCE_INTERNAL_SDL)
if (NOT DEFINED XDG_STD)
set(XDG_STD FALSE)
endif (NOT DEFINED XDG_STD)
#-------------------------------------------------------------------------------
# Control GCC flags
#-------------------------------------------------------------------------------

View File

@ -6,20 +6,4 @@
* need to add copyrigh info in Cmake file
## 64 bit
nvidia-cg-toolkit: replace it with GLSL
ia32-libs: add libportaudio, libglew
ia32-libs: add .so symlink for compilation
ia32-libs-gtk: add libwxbase2.8 and libwxgtk2.8 (warning a architecture dependant include file (in -dev package) is also needed)
ia32-libs-gtk: add .so symlink for compilation
== Multi-Arch support in APT ==
by David Kalnischkies, mentored by Michael Vogt
Hardware like 64bit processors are perfectly able to execute 32bit
opcode but until now this potentiality is disregard as the
infrastructure tools like dpkg and APT are not able to install and/or
solve dependencies across multiple architectures. The project
therefore focuses on enabling APT to work out good solutions in a
MultiArch aware environments without the need of hacky and partly
working biarch packages currently in use.
=> see multiarch

View File

@ -1,21 +1,22 @@
Source: pcsx2-unstable
Source: pcsx2.snapshot
Section: contrib/games
Priority: optional
Maintainer: Gregory Hainaut <gregory.hainaut@gmail.com>
# WARNING we need dpkg-dev 1.15.7 to support dpkg-buildflags but ubunutu 10.04 have only 1.15.5.6...
# WARNING Natty need at least cmake 2.8.5 (multiarch issue)
Build-Depends: cmake (>= 2.8),
debhelper (>= 7.0.50),
dpkg-dev (>= 1.15.5.6),
gcc-multilib [amd64],
g++-multilib [amd64],
ia32-libs-dev [amd64],
lib32asound2-dev [amd64],
lib32bz2-dev [amd64],
lib32z1-dev (>= 1:1.2.3.3) [amd64],
debhelper (>= 7.0.50),
dpkg-dev (>= 1.15.7),
gcc-multilib [amd64],
g++-multilib [amd64],
ia32-libs-dev [amd64],
lib32asound2-dev [amd64],
lib32bz2-dev [amd64],
lib32z1-dev (>= 1:1.2.3.3) [amd64],
libasound2-dev,
libbz2-dev,
libgl1-mesa-dev,
# Future GSdx version will need glew1.6. Only Oneiric have it...
# libglew1.6-dev,
libglew1.5-dev,
libglu1-mesa-dev,
libgtk2.0-dev (>= 2.16),
@ -27,8 +28,7 @@ Build-Depends: cmake (>= 2.8),
libwxgtk2.8-dev,
libx11-dev,
locales | locales-all,
# Note lucid need nvidia-cg-toolkit-pcsx2 to be compatible with ppa
nvidia-cg-toolkit-pcsx2 | nvidia-cg-toolkit (>= 3),
nvidia-cg-toolkit (>= 3),
portaudio19-dev,
zlib1g-dev (>= 1:1.2.3.3)
Standards-Version: 3.9.2

View File

@ -1,18 +0,0 @@
By default, move all users datas into XDG_CONFIG_DIR
Index: pcsx2.snapshot-3369/pcsx2/gui/AppConfig.cpp
===================================================================
--- pcsx2.snapshot-3369.orig/pcsx2/gui/AppConfig.cpp
+++ pcsx2.snapshot-3369/pcsx2/gui/AppConfig.cpp
@@ -125,7 +125,12 @@
{
switch( mode )
{
+#ifdef __LINUX__
+ // By default on linux move all user data file into central configuration directory
+ case DocsFolder_User: return GetUserLocalDataDir();
+#else
case DocsFolder_User: return (wxDirName)Path::Combine( wxStandardPaths::Get().GetDocumentsDir(), pxGetAppName() );
+#endif
//case DocsFolder_CWD: return (wxDirName)wxGetCwd();
case DocsFolder_Custom: return CustomDocumentsFolder;

View File

@ -1 +0,0 @@
05_move_data_to_config.patch

View File

@ -14,17 +14,14 @@ override_dh_auto_configure:
dh_auto_configure -- \
-DCMAKE_BUILD_TYPE=$(CMAKE_BUILD_TYPE) \
-DCMAKE_BUILD_STRIP=FALSE \
-DFORCE_INTERNAL_SOUNDTOUCH=FALSE \
-DXDG_STD=TRUE \
-DFORCE_INTERNAL_SDL=TRUE \
-DPACKAGE_MODE=TRUE
clean:
dh_testdir
dh_testroot
dh_auto_clean
rm -fr obj-* # cmake stuff is not always removed (fixed in debhelper 8.9.4)
dh_clean
override_dh_strip:

View File

@ -1,19 +0,0 @@
pcsx2 for debian
=========================
* This version has some major modifications against the default upstream version.
-> documents are stored in $XDG_CONFIG_HOME instead of $HOME/pcsx2
-> some features were removed so it could compile against libsound 1.3.
Pcsx2 needs at least soundtouch 1.5.
* This package is highly experimental.
* Documentation needs some love. Feel free to help.
* -fPIC option was removed for multiple reason.
- Code only support x86 architecture.
- Upstream code uses the ebx register so it's not compliant with PIC.
- Impacts the performance too much.
- Only plugins. No package will link to them.
-- Gregory Hainaut <gregory.hainaut@gmail.com> Sat, 24 Apr 2010 23:11:10 +0200

View File

@ -1,40 +0,0 @@
need libsoundtouch v1.5
* policy bin2cpp tool ??
May be we could pregenerate the file in the upstream tar ball!!
* copyright and doc stuff
Lots of work to do here....
# only a stub
plugins/zzogl-pg/opengl/memcpy_amd.cpp: UNKNOWN
# need zerofrog confirmation of gpl v2
plugins/zzogl-pg/opengl/zpipe.h: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/shaders.sh: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/common.h: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/ZeroGSShaders/zpipe.h: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders.cpp: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders.h: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/ZeroGSShaders/zpipe.cpp: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/zerogsmath.h: *No copyright* UNKNOWN
plugins/zzogl-pg/opengl/zpipe.cpp: *No copyright* UNKNOWN
## 64 bit
nvidia-cg-toolkit: propably need a special package for 32 bits version
ia32-libs: add libportaudio, libsoundtouch, libglew
ia32-libs: add .so symlink for compilation
ia32-libs-gtk: add libwxbase2.8 and libwxgtk2.8 (warning a architecture dependant include file (in -dev package) is also needed)
ia32-libs-gtk: add .so symlink for compilation
== Multi-Arch support in APT ==
by David Kalnischkies, mentored by Michael Vogt
Hardware like 64bit processors are perfectly able to execute 32bit
opcode but until now this potentiality is disregard as the
infrastructure tools like dpkg and APT are not able to install and/or
solve dependencies across multiple architectures. The project
therefore focuses on enabling APT to work out good solutions in a
MultiArch aware environments without the need of hacky and partly
working biarch packages currently in use.

View File

@ -1,6 +0,0 @@
pcsx2 (3351-1) unstable; urgency=low
* Initial release
-- Gregory Hainaut <gregory.hainaut@gmail.com> Thu, 13 May 2010 14:27:56 +0200

View File

@ -1,6 +0,0 @@
pcsx2 (3351-0ubuntu1) lucid; urgency=low
* Initial release
-- Gregory Hainaut <gregory.hainaut@gmail.com> Thu, 13 May 2010 14:27:56 +0200

View File

@ -1 +0,0 @@
7

View File

@ -1,117 +0,0 @@
Source: pcsx2
Section: games
Priority: optional
Maintainer: Gregory Hainaut <gregory.hainaut@gmail.com>
# WARNING we need dpkg-dev 1.15.7 to support dpkg-buildflags but ubunutu 10.04 have only 1.15.5.6...
Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.15.5.6), cmake (>=2.8),
gcc-multilib [amd64], g++-multilib [amd64],
zlib1g-dev (>= 1:1.2.3.3) | lib32z1-dev (>= 1.2.3.3) [amd64],
libbz2-dev (>= 1.0.4),
libsdl1.2-dev,
libjpeg-dev,
libwxbase2.8-dev (>= 2.8.10), libwxbase2.8-dev (<< 2.8.11),
libwxgtk2.8-dev (>= 2.8.10), libwxgtk2.8-dev (<< 2.8.11),
libgtk2.0-dev (>= 2.16),
libasound2-dev | lib32asound2-dev [amd64],
portaudio19-dev,
# version not yet in debian
# libsoundtouch1-dev (>= 1.5),
# I patch the source (remove feature) to compile with version 1.3
libsoundtouch1-dev (>= 1.3),
libsparsehash-dev (>= 1.6),
libx11-dev,
libglew1.5-dev (>= 1.5.1),
libgl1-mesa-dev,
libglu1-mesa-dev,
# my nmu: add 32bits packages
# nvidia-cg-toolkit (>= 2.1.0017.deb1) | nvidia-cg-toolkit (>= 2.1.0017.deb1+nmu2) [amd64],
nvidia-cg-toolkit-pcsx2 | nvidia-cg-toolkit (>= 2.1), ia32-nvidia-cg-toolkit-pcsx2 [amd64],
ia32-libs (>= 20090808+nmu7) [amd64], ia32-libs-gtk (= 20100503+local1) [amd64]
Standards-Version: 3.9.1
Homepage: http://pcsx2.net/
Package: pcsx2
# Warning amd64 need additional ia32libs
Architecture: i386 amd64
Depends: ${shlibs:Depends}, ${misc:Depends},
pcsx2-data (>= ${binary:Version}),
pcsx2-plugins (>= ${binary:Version})
Conflicts: pcsx2-unstable
Description: Playstation 2 emulator
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package includes the main binary file.
Package: pcsx2-data
Architecture: all
Depends: ${misc:Depends}
Recommends: pcsx2 (>= ${binary:Version}), pcsx2-plugins (>= ${binary:Version})
Conflicts: pcsx2-data-unstable
Description: data for pcsx2
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package includes data files.
Package: pcsx2-plugins
# Warning amd64 need additional ia32libs
Architecture: i386 amd64
# manually add nvidia-cg-toolkit for zzogl. Do not why is not found by shlibs !!!
Depends: ${shlibs:Depends}, ${misc:Depends},
nvidia-cg-toolkit-pcsx2 | nvidia-cg-toolkit (>= 2.1), ia32-nvidia-cg-toolkit-pcsx2 [amd64]
Recommends: pcsx2 (>= ${binary:Version}),
pcsx2-data (>= ${binary:Version})
Conflicts: pcsx2-plugins-unstable
Description: Various plugins for pcsx2
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package includes the plugins for PCSX2.
Package: pcsx2-dbg
Section: debug
Priority: extra
# Warning amd64 need additional ia32libs
Architecture: i386 amd64
Depends: ${misc:Depends}, pcsx2 (= ${binary:Version})
Conflicts: pcsx2-unstable-dbg
Description: Debug symbols for pcsx2
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package contains the debug symbol of pcsx2.
Package: pcsx2-plugins-dbg
Section: debug
Priority: extra
# Warning amd64 need additional ia32libs
Architecture: i386 amd64
Depends: ${misc:Depends}, pcsx2-plugins (= ${binary:Version})
Conflicts: pcsx2-plugins-unstable-dbg
Description: Debug symbols of the pcsx2-plugins
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package contains the debug symbols of the pcsx2 plugins.

View File

@ -1,108 +0,0 @@
Source: pcsx2
Section: games
Priority: optional
Maintainer: Gregory Hainaut <gregory.hainaut@gmail.com>
Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.15.5.6), cmake (>=2.8),
zlib1g-dev (>= 1:1.2.3.3),
libbz2-dev (>= 1.0.4),
libsdl1.2-dev,
libjpeg-dev,
libwxbase2.8-dev (>= 2.8.10), libwxbase2.8-dev (<< 2.8.11),
libwxgtk2.8-dev (>= 2.8.10), libwxgtk2.8-dev (<< 2.8.11),
libgtk2.0-dev (>= 2.16),
libasound2-dev,
portaudio19-dev,
# version not yet in debian
# libsoundtouch1-dev (>= 1.5),
# I patch the source (remove feature) to compile with version 1.3
libsoundtouch1-dev (>= 1.3),
libsparsehash-dev (>= 1.6),
libx11-dev,
libglew1.5-dev (>= 1.5.1),
libgl1-mesa-dev,
libglu1-mesa-dev,
nvidia-cg-toolkit-pcsx2
Standards-Version: 3.9.1
Homepage: http://pcsx2.net/
Package: pcsx2
Architecture: i386
Depends: ${shlibs:Depends}, ${misc:Depends},
pcsx2-data (>= ${binary:Version}),
pcsx2-plugins (>= ${binary:Version})
Conflicts: pcsx2-unstable
Description: Playstation 2 emulator
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package includes the main binary file.
Package: pcsx2-data
Architecture: all
Depends: ${misc:Depends}
Recommends: pcsx2 (>= ${binary:Version}), pcsx2-plugins (>= ${binary:Version})
Conflicts: pcsx2-data-unstable
Description: data for pcsx2
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package includes data files.
Package: pcsx2-plugins
Architecture: i386
# manually add nvidia-cg-toolkit for zzogl. Do not why is not found by shlibs !!!
Depends: ${shlibs:Depends}, ${misc:Depends},
nvidia-cg-toolkit-pcsx2 | nvidia-cg-toolkit (>= 2.1)
Recommends: pcsx2 (>= ${binary:Version}),
pcsx2-data (>= ${binary:Version})
Conflicts: pcsx2-plugins-unstable
Description: Various plugins for pcsx2
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package includes the plugins for PCSX2.
Package: pcsx2-dbg
Section: debug
Priority: extra
Architecture: i386
Depends: ${misc:Depends}, pcsx2 (= ${binary:Version})
Conflicts: pcsx2-unstable-dbg
Description: Debug symbols for pcsx2
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package contains the debug symbol of pcsx2.
Package: pcsx2-plugins-dbg
Section: debug
Priority: extra
Architecture: i386
Depends: ${misc:Depends}, pcsx2-plugins (= ${binary:Version})
Conflicts: pcsx2-plugins-unstable-dbg
Description: Debug symbols of the pcsx2-plugins
PCSX2 is a PlayStation 2 emulator for Windows and Linux, started by the same
team that brought you PCSX (a Sony PlayStation 1 emulator).
.
WARNING: It requires a CPU with SSE2 instructions. If your CPU does not support
this instruction set, it does not have enough horse power to run this emulator
anyway.
.
This package contains the debug symbols of the pcsx2 plugins.

View File

@ -1,168 +0,0 @@
This work was packaged for Debian by:
Gregory Hainaut <gregory.hainaut@gmail.com> on Sat, 24 Apr 2010 23:11:10 +0200
It was downloaded from:
http://pcsx2.googlecode.com/svn/
Upstream Author(s):
PCSX2 Dev Team
Copyright:
Copyright (C) 2002-2010 PCSX2 Dev Team
Files: pcsx2/*, common/*, plugins/spu2-x/*, plugins/PadNull/Pad*, plugins/USBnull/*, plugins/FWnull/*, plugins/CDVDnull/CDVD*, plugins/GSnull/*, plugins/dev9null/DEV9.cpp
License: LGPL-3+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
On Debian systems, the complete text of the GNU Lesser General
Public License version 3 can be found in "/usr/share/common-licenses/LGPL-3".
Files: pcsx2/Mdec.cpp, pcsx2/Mdec.h, pcsx2/RDebug/deci2_drfp.cpp, pcsx2/IPU/mpeg2lib/*, pcsx2/cheatscpp.h, common/include/api/*, plugins/onepad/*, plugins/PadNull/Linux/*, plugins/SPU2null/*, plugins/FWnull/FW.cpp, plugins/zerospu2/*, plugins/zzogl-pg/*, plugins/GSnull/Registers.h, plugins/GSnull/Linux/Linux*, plugins/GSnull/Linux/Config*, plugins/dev9null/DEV9.h, plugins/dev9null/Config.*
License: GPL-2+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
On Debian systems, the complete text of the GNU General
Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
Files: plugins/spu2-x/src/Spu2replay.*, plugins/spu2-x/src/Decode*, plugins/spu2-x/src/Linux/ConfigSoundTouch.cpp, plugins/spu2-x/src/spdif.h, plugins/spu2-x/src/Debug.h
License: LGPL-2.1+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
On Debian systems, the complete text of the GNU Lesser General
Public License version 2.1 can be found in "/usr/share/common-licenses/LGPL-2.1".
Files: plugins/zzogl-pg/opengl/glprocs.*
Copyright: 1991-2000, Silicon Graphics, Inc
License: MIT/X11 (BSD like)
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice including the dates of first publication and
either this permission notice or a reference to
http://oss.sgi.com/projects/FreeB/
shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
SILICON GRAPHICS, INC. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Except as contained in this notice, the name of Silicon Graphics, Inc.
shall not be used in advertising or otherwise to promote the sale, use or
other dealings in this Software without prior written authorization from
Silicon Graphics, Inc.
Files: common/include/intrin_x86.h
Copyright: 2006, KJK::Hyperion <hackbunny@reactos.com>
License: MIT/X11 (BSD like)
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
Files: common/src/Utilities/vssprintf.cpp
Copyright: 2002, Michael Ringgaard
License: BSD (3 clause)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Files: plugins/zzogl-pg/opengl/zpipe.cpp
Copyright: public domain
Files: plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders*
Copyright: Unknown
The Debian packaging is:
Copyright (C) 2010 Gregory Hainaut <gregory.hainaut@gmail.com>
and is licensed under the Lesser GPL version 3, see above.

View File

@ -1,145 +0,0 @@
#!/bin/sh
# copyright (c) 2010 Gregory Hainaut
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This package is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
######################################################################
# Global Parameters
######################################################################
# Svn parameter
if [ -n "$1" ] ; then
SVN_CO_VERSION=$1;
else
echo "Please provide the subversion revision number as the first parameter"
exit 1;
fi
if [ -n "$2" ] ; then
# Use branch argument
SVN_TRUNK="http://pcsx2.googlecode.com/svn/branches/$2"
else
# by default take the trunk
SVN_TRUNK="http://pcsx2.googlecode.com/svn/trunk"
fi
# Debian name of package and tarball
PKG_NAME="pcsx2-${SVN_CO_VERSION}"
TAR_NAME="pcsx2_${SVN_CO_VERSION}.orig.tar"
# Directory
TMP_DIR=/tmp
ROOT_DIR=${TMP_DIR}/subversion_pcsx2_${SVN_CO_VERSION}
NEW_DIR=${TMP_DIR}/$PKG_NAME
######################################################################
# Basic functions
######################################################################
get_svn_dir()
{
for directory in $* ; do
# Print the directory without /
echo " $directory" | sed -e 's/\//\ /g'
if [ -e `basename ${directory}` ] ; then
# Directory already exist so only update
svn up --quiet ${SVN_TRUNK}/${directory} -r $SVN_CO_VERSION;
else
svn co --quiet ${SVN_TRUNK}/${directory} -r $SVN_CO_VERSION;
fi
done
}
get_svn_file()
{
for file in $* ; do
if [ ! -e `basename ${file}` ] ; then
# Versioning information is not supported for a single file
# therefore you can't use svn co
svn export --quiet ${SVN_TRUNK}/${file} -r $SVN_CO_VERSION;
fi
done
}
######################################################################
# Main script
######################################################################
## Download the svn repository (only the useful things)
echo "Downloading pcsx2 source revision ${SVN_CO_VERSION}"
mkdir -p $ROOT_DIR;
(cd $ROOT_DIR;
get_svn_file CMakeLists.txt;
get_svn_dir bin common cmake pcsx2 tools;
get_svn_dir debian-upstream;
echo "Done")
echo "Downloading Linux compatible plugins for revision ${SVN_CO_VERSION}"
# Note: Other plugins exist but they are not 100% copyright free.
mkdir -p $ROOT_DIR/plugins
(cd $ROOT_DIR/plugins;
get_svn_file plugins/CMakeLists.txt;
# DVD
get_svn_dir plugins/CDVDnull;
# Copyright issue
# get_svn_dir plugins/CDVDnull plugins/CDVDiso;
# PAD
get_svn_dir plugins/PadNull plugins/onepad;
# AUDIO
get_svn_dir plugins/SPU2null plugins/spu2-x plugins/zerospu2;
# Graphics
get_svn_dir plugins/GSnull plugins/zzogl-pg;
# Misc
get_svn_dir plugins/dev9null plugins/FWnull plugins/USBnull;
echo "Note: some plugins are more or less deprecated CDVDisoEFP, CDVDlinuz, Zerogs, Zeropad ...";
echo "Done")
## Installation
echo "Copy the subversion repository to a temporary directory"
# Copy the dir
rm -fr $NEW_DIR
cp -r $ROOT_DIR $NEW_DIR
echo "Remove .svn directories"
find $NEW_DIR -name ".svn" -type d -exec rm -fr {} \; 2> /dev/null
echo "Remove old build system (scripts and autotools)"
find $NEW_DIR -name "build.sh" -exec rm -f {} \;
find $NEW_DIR -name "install-sh" -exec rm -f {} \;
find $NEW_DIR -name "depcomp" -exec rm -f {} \;
find $NEW_DIR -name "missing" -exec rm -f {} \;
find $NEW_DIR -name "aclocal.m4" -exec rm -f {} \;
find $NEW_DIR -name "configure.ac" -exec rm -f {} \;
find $NEW_DIR -name "Makefile.am" -exec rm -f {} \;
echo "Remove 3rd party directories"
find $NEW_DIR -name "3rdparty" -exec rm -fr {} \; 2> /dev/null
# I really need to clean this mess one day
# echo "Remove plugins/zzogl-pg/opengl/ZeroGSShaders (some zlib source in the middle)"
# rm -fr $NEW_DIR/plugins/zzogl-pg/opengl/ZeroGSShaders
echo "Remove windows file (useless & copyright issue)"
find $NEW_DIR -iname "windows" -type d -exec rm -fr {} \; 2> /dev/null
find $NEW_DIR -name "Win32" -type d -exec rm -fr {} \; 2> /dev/null
rm -fr "${NEW_DIR}/plugins/zzogl-pg/opengl/Win32"
rm -fr "${NEW_DIR}/tools/GSDumpGUI"
rm -fr "${NEW_DIR}/common/vsprops"
echo "Remove useless files (copyright issues)"
rm -fr "${NEW_DIR}/plugins/zzogl-pg/opengl/ZeroGSShaders"
rm -fr "${NEW_DIR}/common/src/Utilities/x86/MemcpyFast.cpp"
rm -fr "${NEW_DIR}/plugins/zzogl-pg/opengl/memcpy_amd.cpp"
## BUILD
echo "Build the tar.gz file"
tar -C $TMP_DIR -czf ${TAR_NAME}.gz $PKG_NAME
## Clean
rm -fr $NEW_DIR
rm -fr $ROOT_DIR

View File

View File

@ -1,24 +0,0 @@
Debian specific
Always uses the same executable name for the build. Easier for the install script
Index: pcsx2-3567/pcsx2/CMakeLists.txt
===================================================================
--- pcsx2-3567.orig/pcsx2/CMakeLists.txt
+++ pcsx2-3567/pcsx2/CMakeLists.txt
@@ -72,7 +72,7 @@
if(CMAKE_BUILD_TYPE STREQUAL Debug)
# executable name
- set(Output pcsx2-dbg)
+ set(Output pcsx2)
# add defines
add_definitions(${CommonFlags} ${DebugFlags} -DPCSX2_DEVBUILD -DPCSX2_DEBUG -DWX_PRECOMP)
@@ -82,7 +82,7 @@
if(CMAKE_BUILD_TYPE STREQUAL Devel)
# executable name
- set(Output pcsx2-dev)
+ set(Output pcsx2)
# add defines
add_definitions(${CommonFlags} ${OptimizationFlags} -DPCSX2_DEVBUILD -DWX_PRECOMP -DNDEBUG)

View File

@ -1,58 +0,0 @@
Debian policy.
This patch updates default plugin path and config path.
It also updates the pcsx2 game db path and shaders data path.
AppInit.cpp:93 could be probably updated
Index: pcsx2-3369/pcsx2/gui/AppConfig.cpp
===================================================================
--- pcsx2-3369.orig/pcsx2/gui/AppConfig.cpp
+++ pcsx2-3369/pcsx2/gui/AppConfig.cpp
@@ -162,7 +162,8 @@
wxDirName GetPlugins()
{
- return AppRoot() + Base::Plugins();
+ // return AppRoot() + Base::Plugins();
+ return wxDirName( L"/usr/lib/games/pcsx2" ) + Base::Plugins();
}
wxDirName GetSettings()
Index: pcsx2-3369/plugins/zzogl-pg/opengl/ZZoglCreate.cpp
===================================================================
--- pcsx2-3369.orig/plugins/zzogl-pg/opengl/ZZoglCreate.cpp
+++ pcsx2-3369/plugins/zzogl-pg/opengl/ZZoglCreate.cpp
@@ -347,17 +347,12 @@
assert(hShaderGlob != NULL);
s_lpShaderResources = (u8*)LockResource(hShaderGlob);
# else // not _WIN32
- FILE* fres = fopen("ps2hw.dat", "rb");
+ FILE* fres = fopen("/usr/share/games/pcsx2/shaders/ps2hw.dat", "rb");
if (fres == NULL)
{
- fres = fopen("plugins/ps2hw.dat", "rb");
-
- if (fres == NULL)
- {
- ZZLog::Error_Log("Cannot find ps2hw.dat in working directory. Exiting.");
- return false;
- }
+ ZZLog::Error_Log("Cannot find ps2hw.dat in working directory. Exiting.");
+ return false;
}
fseek(fres, 0, SEEK_END);
Index: pcsx2-3369/pcsx2/gui/AppGameDatabase.h
===================================================================
--- pcsx2-3369.orig/pcsx2/gui/AppGameDatabase.h
+++ pcsx2-3369/pcsx2/gui/AppGameDatabase.h
@@ -51,8 +51,8 @@
Console.WriteLn( "(GameDB) Unloading..." );
}
- AppGameDatabase& LoadFromFile(const wxString& file = L"GameIndex.dbf", const wxString& key = L"Serial" );
- void SaveToFile(const wxString& file = L"GameIndex.dbf");
+ AppGameDatabase& LoadFromFile(const wxString& file = L"/var/games/pcsx2/GameIndex.dbf", const wxString& key = L"Serial" );
+ void SaveToFile(const wxString& file = L"/var/games/pcsx2/GameIndex.dbf");
};
static wxString compatToStringWX(int compat) {

View File

@ -1,17 +0,0 @@
Index: pcsx2-3369/pcsx2/gui/AppConfig.cpp
===================================================================
--- pcsx2-3369.orig/pcsx2/gui/AppConfig.cpp
+++ pcsx2-3369/pcsx2/gui/AppConfig.cpp
@@ -125,7 +125,12 @@
{
switch( mode )
{
+#ifdef __LINUX__
+ // By default on linux move all user data file into central configuration directory
+ case DocsFolder_User: return GetUserLocalDataDir();
+#else
case DocsFolder_User: return (wxDirName)Path::Combine( wxStandardPaths::Get().GetDocumentsDir(), pxGetAppName() );
+#endif
//case DocsFolder_CWD: return (wxDirName)wxGetCwd();
case DocsFolder_Custom: return CustomDocumentsFolder;

View File

@ -1,176 +0,0 @@
This patch removes recording feature beacause it needs libsoundtouch > 1.4.
Howerever only the version 1.3 is in debian. Unfortunately the package seems
to be not actively maintained.
Note it also correct the inlude path.
Index: pcsx2-3369/plugins/spu2-x/src/Wavedump_wav.cpp
===================================================================
--- pcsx2-3369.orig/plugins/spu2-x/src/Wavedump_wav.cpp
+++ pcsx2-3369/plugins/spu2-x/src/Wavedump_wav.cpp
@@ -16,16 +16,22 @@
*/
#include "Global.h"
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
#include "soundtouch/WavFile.h"
+#endif
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
static WavOutFile* _new_WavOutFile( const char* destfile )
{
return new WavOutFile( destfile, 48000, 16, 2 );
}
+#endif
namespace WaveDump
{
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
static WavOutFile* m_CoreWav[2][CoreSrc_Count] = { NULL };
+#endif
static const char* m_tbl_CoreOutputTypeNames[CoreSrc_Count] =
{
@@ -42,6 +48,7 @@
if( !IsDevBuild ) return;
if( !WaveLog() ) return;
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
char wavfilename[256];
for( uint cidx=0; cidx<2; cidx++ )
@@ -68,11 +75,13 @@
}
}
}
+#endif
}
void Close()
{
if( !IsDevBuild ) return;
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
for( uint cidx=0; cidx<2; cidx++ )
{
for( int srcidx=0; srcidx<CoreSrc_Count; srcidx++ )
@@ -80,13 +89,16 @@
safe_delete( m_CoreWav[cidx][srcidx] );
}
}
+#endif
}
void WriteCore( uint coreidx, CoreSourceType src, const StereoOut16& sample )
{
if( !IsDevBuild ) return;
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
if( m_CoreWav[coreidx][src] != NULL )
m_CoreWav[coreidx][src]->write( (s16*)&sample, 2 );
+#endif
}
void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right )
@@ -101,11 +113,14 @@
bool WavRecordEnabled = false;
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
static WavOutFile* m_wavrecord = NULL;
+#endif
static Mutex WavRecordMutex;
void RecordStart()
{
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
WavRecordEnabled = false;
try
@@ -120,18 +135,23 @@
m_wavrecord = NULL; // not needed, but what the heck. :)
SysMessage("SPU2-X couldn't open file for recording: %s.\nRecording to wavfile disabled.", "recording.wav");
}
+#endif
}
void RecordStop()
{
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
WavRecordEnabled = false;
ScopedLock lock( WavRecordMutex );
safe_delete( m_wavrecord );
+#endif
}
void RecordWrite( const StereoOut16& sample )
{
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
ScopedLock lock( WavRecordMutex );
if( m_wavrecord == NULL ) return;
m_wavrecord->write( (s16*)&sample, 2 );
+#endif
}
Index: pcsx2-3369/plugins/zerospu2/zerospu2.cpp
===================================================================
--- pcsx2-3369.orig/plugins/zerospu2/zerospu2.cpp
+++ pcsx2-3369/plugins/zerospu2/zerospu2.cpp
@@ -28,7 +28,9 @@
#include <stdlib.h>
#include "soundtouch/SoundTouch.h"
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
#include "soundtouch/WavFile.h"
+#endif
char libraryName[256];
@@ -74,7 +76,9 @@
// time stretch variables
soundtouch::SoundTouch* pSoundTouch=NULL;
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
extern WavOutFile* g_pWavRecord; // used for recording
+#endif
u64 s_GlobalTimeStamp = 0;
s32 s_nDurations[64]={0};
@@ -361,7 +365,9 @@
RemoveSound();
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
delete g_pWavRecord; g_pWavRecord = NULL;
+#endif
delete pSoundTouch; pSoundTouch = NULL;
for (u32 i = 0; i < ArraySize(s_pAudioBuffers); ++i)
Index: pcsx2-3369/plugins/zerospu2/zeroworker.cpp
===================================================================
--- pcsx2-3369.orig/plugins/zerospu2/zeroworker.cpp
+++ pcsx2-3369/plugins/zerospu2/zeroworker.cpp
@@ -19,10 +19,14 @@
#include "zerospu2.h"
#include "zeroworker.h"
#include "soundtouch/SoundTouch.h"
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
#include "soundtouch/WavFile.h"
+#endif
s32 g_logsound = 0;
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
WavOutFile* g_pWavRecord=NULL; // used for recording
+#endif
const s32 f[5][2] = {
{ 0, 0 },
@@ -302,6 +306,7 @@
void LogRawSound(void* pleft, s32 leftstride, void* pright, s32 rightstride, s32 numsamples)
{
+#if defined(SOUNDTOUCH_VERSION_ID) & (SOUNDTOUCH_VERSION_ID >= 10400)
if (g_pWavRecord == NULL )
g_pWavRecord = new WavOutFile(RECORD_FILENAME, SAMPLE_RATE, 16, 2);
@@ -320,4 +325,5 @@
}
g_pWavRecord->write(&tempbuf[0], numsamples*2);
+#endif
}

View File

@ -1,4 +0,0 @@
01_rename_binary_generated.patch
02_update_default_path.patch
05_move_data_to_config.patch
21_use_legacy_soundtouch_13.patch

View File

@ -1,2 +0,0 @@
usr/share/games/pcsx2/shaders
var/games/pcsx2/

View File

@ -1,2 +0,0 @@
bin/plugins/ps2hw.dat usr/share/games/pcsx2/shaders/
bin/GameIndex.dbf var/games/pcsx2/

View File

@ -1 +0,0 @@
usr/lib/games/pcsx2/plugins

View File

@ -1 +0,0 @@
bin/plugins/lib* usr/lib/games/pcsx2/plugins

View File

@ -1,6 +0,0 @@
#* -fPIC option was removed for multiple reason.
# - Code only support x86 architecture.
# - Upstream code uses the ebx register so it's not compliant with PIC.
# - Impacts the performance too much.
# - Only plugins. No package will link to them.
pcsx2-plugins-unstable: shlib-with-non-pic-code

View File

@ -1,9 +0,0 @@
[Desktop Entry]
Version=1.0
Type=Application
Name=PCSX2
GenericName=Playstation 2 Emulator
Comment=Sony Playstation 2 emulator
Exec=pcsx2
Icon=pcsx2
Categories=Game;Emulator;GTK;

View File

@ -1,3 +0,0 @@
usr/games
usr/share/pixmaps
usr/share/applications

View File

@ -1,3 +0,0 @@
bin/pcsx2 usr/games
debian/pcsx2.desktop usr/share/applications
debian/pcsx2.xpm usr/share/pixmaps

View File

@ -1 +0,0 @@
bin/docs/pcsx2.man

View File

@ -1,6 +0,0 @@
?package(pcsx2): \
needs="X11" \
section="Applications/Emulators" \
title="pcsx2" \
longtitle="A playstation 2 emulators" \
command="/usr/games/pcsx2"

File diff suppressed because it is too large Load Diff

View File

@ -1,87 +0,0 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Uncomment this to turn on verbose mode.
# export DH_VERBOSE=1
ifneq (,$(filter noopt,$(DEB_BUILD_OPTIONS)))
CMAKE_BUILD_TYPE=Debug
else
CMAKE_BUILD_TYPE=Release
endif
ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
MAKEFLAGS += -j$(NUMJOBS)
endif
USER_CFLAGS=$(shell dpkg-buildflags --get CFLAGS)
USER_CXXFLAGS=$(shell dpkg-buildflags --get CXXFLAGS)
build: build-stamp
build-stamp:
dh_testdir
# backup orig makefile
for makefile in `find ./ -name "Makefile"` ; do [ -f $${makefile}.orig ] || mv $${makefile} $${makefile}.orig ; done
# Cmake based makefile
cmake CMakeLists.txt \
-DCMAKE_BUILD_TYPE=$(CMAKE_BUILD_TYPE) \
-DCMAKE_BUILD_STRIP=FALSE \
-DFORCE_INTERNAL_SOUNDTOUCH=FALSE \
-DUSER_CMAKE_C_FLAGS:STRING="$(USER_CFLAGS)" \
-DUSER_CMAKE_CXX_FLAGS:STRING="$(USER_CXXFLAGS)"
$(MAKE) $(MAKEFLAGS)
touch build-stamp
clean:
dh_testdir
dh_testroot
rm -f build-stamp
# Backup some orig makefile if it's not already done.
# I hope that some will be delete by upstream when the cmake port is over.
# Note: In case that we do not dl all the plugins, the test [ -f $${makefile} ] ensures it works
for makefile in plugins/CDVDlinuz/Src/Linux/Makefile \
plugins/CDVDiso/src/Linux/Makefile \
plugins/CDVDiso/src/Windows/Makefile \
plugins/USBnull/Windows/Makefile \
plugins/FWnull/Windows/Makefile \
plugins/PeopsSPU2/Makefile \
plugins/CDVDisoEFP/src/Linux/Makefile ; do \
[ -f $${makefile}.orig ] || ( [ -f $${makefile} ] && mv $${makefile} $${makefile}.orig ) || true ; done
# Add here the commands to clean up after the build process.
[ -f Makefile ] && $(MAKE) clean || true
# Remove cmake stuff
rm -fr $$(find . -type d -name CMakeFiles)
rm -f $$(find . -type f -name CMakeCache.txt) $$(find . -type f -name cmake_install.cmake)
rm -f $$(find . -type f -name Makefile)
# Files generated by bin2cpp
cd pcsx2/gui/Resources/ && rm -f App*.h Config*.h BackgroundLogo.h ButtonIcon_Camera.h Dualshock.h
# leftover of cmake
rm -f bin/plugins/ps2hw.dat
rm -f pcsx2/svnrev.h
# Restore orig makefile
for makefile_orig in `find ./ -name "Makefile.orig"` ; do [ -f $${makefile_orig} ] && mv $${makefile_orig} `echo $${makefile_orig} | sed -e 's/.orig//'` ; done
dh_clean
## Uncomment this, if fglrx driver is installed
#override_dh_shlibdeps:
# dh_shlibdeps -- --ignore-missing-info
override_dh_strip:
dh_strip --package=pcsx2 --dbg-package=pcsx2-dbg
dh_strip --package=pcsx2-plugins --dbg-package=pcsx2-plugins-dbg
# Avoid to relaunch the compilation twice. (build and dh_auto_build target)
override_dh_auto_build:
# Do nothing
%:
dh $@ --parallel
.PHONY: build clean install

View File

@ -1,87 +0,0 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Uncomment this to turn on verbose mode.
# export DH_VERBOSE=1
ifneq (,$(filter noopt,$(DEB_BUILD_OPTIONS)))
CMAKE_BUILD_TYPE=Debug
else
CMAKE_BUILD_TYPE=Release
endif
ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
MAKEFLAGS += -j$(NUMJOBS)
endif
USER_CFLAGS=$(shell dpkg-buildflags --get CFLAGS)
USER_CXXFLAGS=$(shell dpkg-buildflags --get CXXFLAGS)
build: build-stamp
build-stamp:
dh_testdir
# backup orig makefile
for makefile in `find ./ -name "Makefile"` ; do [ -f $${makefile}.orig ] || mv $${makefile} $${makefile}.orig ; done
# Cmake based makefile
cmake CMakeLists.txt \
-DCMAKE_BUILD_TYPE=$(CMAKE_BUILD_TYPE) \
-DCMAKE_BUILD_STRIP=FALSE \
-DFORCE_INTERNAL_SOUNDTOUCH=FALSE \
-DUSER_CMAKE_C_FLAGS:STRING="$(USER_CFLAGS)" \
-DUSER_CMAKE_CXX_FLAGS:STRING="$(USER_CXXFLAGS)"
$(MAKE) $(MAKEFLAGS)
touch build-stamp
clean:
dh_testdir
dh_testroot
rm -f build-stamp
# Backup some orig makefile if it's not already done.
# I hope that some will be delete by upstream when the cmake port is over.
# Note: In case that we do not dl all the plugins, the test [ -f $${makefile} ] ensures it works
for makefile in plugins/CDVDlinuz/Src/Linux/Makefile \
plugins/CDVDiso/src/Linux/Makefile \
plugins/CDVDiso/src/Windows/Makefile \
plugins/USBnull/Windows/Makefile \
plugins/FWnull/Windows/Makefile \
plugins/PeopsSPU2/Makefile \
plugins/CDVDisoEFP/src/Linux/Makefile ; do \
[ -f $${makefile}.orig ] || ( [ -f $${makefile} ] && mv $${makefile} $${makefile}.orig ) || true ; done
# Add here the commands to clean up after the build process.
[ -f Makefile ] && $(MAKE) clean || true
# Remove cmake stuff
rm -fr $$(find . -type d -name CMakeFiles)
rm -f $$(find . -type f -name CMakeCache.txt) $$(find . -type f -name cmake_install.cmake)
rm -f $$(find . -type f -name Makefile)
# Files generated by bin2cpp
cd pcsx2/gui/Resources/ && rm -f App*.h Config*.h BackgroundLogo.h ButtonIcon_Camera.h Dualshock.h
# leftover of cmake
rm -f bin/plugins/ps2hw.dat
rm -f pcsx2/svnrev.h
# Restore orig makefile
for makefile_orig in `find ./ -name "Makefile.orig"` ; do [ -f $${makefile_orig} ] && mv $${makefile_orig} `echo $${makefile_orig} | sed -e 's/.orig//'` ; done
dh_clean
# Allow compilation when fglrx is installed
override_dh_shlibdeps:
dh_shlibdeps -- --ignore-missing-info
override_dh_strip:
dh_strip --package=pcsx2 --dbg-package=pcsx2-dbg
dh_strip --package=pcsx2-plugins --dbg-package=pcsx2-plugins-dbg
# Avoid to relaunch the compilation twice. (build and dh_auto_build target)
override_dh_auto_build:
# Do nothing
%:
dh $@ --parallel
.PHONY: build clean install

View File

@ -1 +0,0 @@
3.0 (quilt)

View File

@ -1,11 +0,0 @@
# Example watch control file for uscan
# Rename this file to "watch" and then you can run the "uscan" command
# to check for upstream updates and more.
# See uscan(1) for format
# Compulsory line, this is a version 3 file
version=3
# Note: Upstream does not release prepackaged source files.
# It's adviced to get them from their subversion repository.
# http://pcsx2.googlecode.com/files/Pcsx2-linux-beta-(.*).tar.gz

View File

@ -89,6 +89,10 @@ if(CMAKE_BUILD_TYPE STREQUAL Release)
add_definitions(${CommonFlags} ${OptimizationFlags} -DWX_PRECOMP -DNDEBUG)
endif(CMAKE_BUILD_TYPE STREQUAL Release)
if(XDG_STD)
add_definitions(-DXDG_STD)
endif(XDG_STD)
# In package mode always use pcsx2
if(PACKAGE_MODE)
set(Output pcsx2)

View File

@ -133,7 +133,12 @@ namespace PathDefs
{
switch( mode )
{
#ifdef XDG_STD
// Move all user data file into central configuration directory (XDG_CONFIG_DIR)
case DocsFolder_User: return GetUserLocalDataDir();
#else
case DocsFolder_User: return (wxDirName)Path::Combine( wxStandardPaths::Get().GetDocumentsDir(), pxGetAppName() );
#endif
case DocsFolder_Custom: return CustomDocumentsFolder;
jNO_DEFAULT

View File

@ -163,20 +163,26 @@ static wxLanguage i18n_FallbackToAnotherLang( wxLanguage wxLangId )
switch(wxLangId)
{
case wxLANGUAGE_CHINESE_HONGKONG :
case wxLANGUAGE_CHINESE_MACAU : return wxLANGUAGE_CHINESE_TRADITIONAL;
case wxLANGUAGE_CHINESE_SINGAPORE : return wxLANGUAGE_CHINESE_SIMPLIFIED;
case wxLANGUAGE_SWEDISH_FINLAND : return wxLANGUAGE_SWEDISH;
case wxLANGUAGE_PORTUGUESE : return wxLANGUAGE_PORTUGUESE_BRAZILIAN;
case wxLANGUAGE_CHINESE_HONGKONG :
case wxLANGUAGE_CHINESE_MACAU : return wxLANGUAGE_CHINESE_TRADITIONAL;
case wxLANGUAGE_CHINESE_SINGAPORE : return wxLANGUAGE_CHINESE_SIMPLIFIED;
case wxLANGUAGE_SAMI :
case wxLANGUAGE_SWEDISH_FINLAND : return wxLANGUAGE_SWEDISH;
case wxLANGUAGE_PORTUGUESE : return wxLANGUAGE_PORTUGUESE_BRAZILIAN;
// Overkill 9000?
case wxLANGUAGE_GERMAN_AUSTRIAN :
case wxLANGUAGE_GERMAN_BELGIUM :
case wxLANGUAGE_GERMAN_LIECHTENSTEIN :
case wxLANGUAGE_GERMAN_LUXEMBOURG :
case wxLANGUAGE_GERMAN_SWISS :
return wxLANGUAGE_GERMAN;
case wxLANGUAGE_GERMAN_AUSTRIAN :
case wxLANGUAGE_GERMAN_BELGIUM :
case wxLANGUAGE_GERMAN_LIECHTENSTEIN :
case wxLANGUAGE_GERMAN_LUXEMBOURG :
case wxLANGUAGE_GERMAN_SWISS : return wxLANGUAGE_GERMAN;
case wxLANGUAGE_ITALIAN_SWISS : return wxLANGUAGE_ITALIAN;
default : break;
default : break;
}
return wxLangId;
}

View File

@ -17,6 +17,7 @@ set(CommonFlags
#-Wstrict-aliasing # Allow to track strict aliasing issue.
-Wunused-variable
-std=c++0x
-fno-strict-aliasing
)
set(OptimizationFlags

View File

@ -109,7 +109,7 @@ EXPORT_C_(int32) GPUopen(void* hWnd)
#endif
int renderer = theApp.GetConfig("Renderer", 1);
int threads = theApp.GetConfig("swthreads", 1);
int threads = theApp.GetConfig("extrathreads", 0);
switch(renderer)
{

View File

@ -76,17 +76,12 @@ void GPUDrawScanline::BeginDraw(const void* param)
m_sp = m_sp_map[sel];
}
void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats, uint64 frame)
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
{
m_ds_map.UpdateStats(stats, frame);
m_ds_map.UpdateStats(frame, ticks, pixels);
}
void GPUDrawScanline::PrintStats()
{
m_ds_map.PrintStats();
}
#ifndef JIT_DRAW
#ifndef ENABLE_JIT_RASTERIZER
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
{

View File

@ -42,10 +42,9 @@ public:
// IDrawScanline
void BeginDraw(const void* param);
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
void PrintStats();
void EndDraw(uint64 frame, uint64 ticks, int pixels);
#ifndef JIT_DRAW
#ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -301,6 +301,11 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
return;
}
if(m_sel.tlu)
{
mov(edx, ptr[&m_local.gd->clut]);
}
// xmm2 = s
// xmm3 = t
// xmm7 = test
@ -953,7 +958,7 @@ void GPUDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr)
if(m_sel.tlu) movzx(eax, byte[esi + eax]);
const Address& src = m_sel.tlu ? ptr[eax * 2 + (size_t)m_local.gd->clut] : ptr[esi + eax * 2];
const Address& src = m_sel.tlu ? ptr[edx + eax * 2] : ptr[esi + eax * 2];
if(i == 0) movd(dst, src);
else pinsrw(dst, src, (uint8)i);

View File

@ -127,7 +127,7 @@ bool GPURenderer::Merge()
void GPURenderer::VSync()
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
m_perfmon.Put(GSPerfMon::Frame);

View File

@ -29,13 +29,15 @@ GPURendererSW::GPURendererSW(GSDevice* dev, int threads)
{
m_output = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16);
m_rl.Create<GPUDrawScanline>(threads);
m_rl = GSRasterizerList::Create<GPUDrawScanline>(threads, &m_perfmon);
}
GPURendererSW::~GPURendererSW()
{
delete m_texture;
delete m_rl;
_aligned_free(m_output);
}
@ -67,12 +69,12 @@ GSTexture* GPURendererSW::GetOutput()
void GPURendererSW::Draw()
{
shared_ptr<GSRasterizerData> data(new GPURasterizerData());
GPUScanlineGlobalData& gd = *(GPUScanlineGlobalData*)data->param;
const GPUDrawingEnvironment& env = m_env;
//
GPUScanlineGlobalData gd;
gd.sel.key = 0;
gd.sel.iip = env.PRIM.IIP;
gd.sel.me = env.STATUS.ME;
@ -97,7 +99,11 @@ void GPURendererSW::Draw()
if(!t) {ASSERT(0); return;}
gd.tex = t;
gd.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y);
gd.clut = (uint16*)_aligned_malloc(sizeof(uint16) * 256, 32);
memcpy(gd.clut, m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y), sizeof(uint16) * (env.STATUS.TP == 0 ? 16 : 256));
gd.twin = GSVector4i(env.TWIN.TWW, env.TWIN.TWH, env.TWIN.TWX, env.TWIN.TWY);
}
@ -108,25 +114,22 @@ void GPURendererSW::Draw()
gd.vm = m_mem.GetPixelAddress(0, 0);
//
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count);
data->count = m_count;
GSRasterizerData data;
data->frame = m_perfmon.GetFrame();
data.vertices = m_vertices;
data.count = m_count;
data.frame = m_perfmon.GetFrame();
data.param = &gd;
data.scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
data.scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
data.scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
data.scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
switch(env.PRIM.TYPE)
{
case GPU_POLYGON: data.primclass = GS_TRIANGLE_CLASS; break;
case GPU_LINE: data.primclass = GS_LINE_CLASS; break;
case GPU_SPRITE: data.primclass = GS_SPRITE_CLASS; break;
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; break;
case GPU_LINE: data->primclass = GS_LINE_CLASS; break;
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; break;
default: __assume(0);
}
@ -135,34 +138,34 @@ void GPURendererSW::Draw()
GSVector4 tl(+1e10f);
GSVector4 br(-1e10f);
GSVertexSW* v = data->vertices;
for(int i = 0, j = m_count; i < j; i++)
{
GSVector4 p = m_vertices[i].p;
GSVector4 p = v[i].p;
tl = tl.min(p);
br = br.max(p);
}
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);
data->bbox = GSVector4i(tl.xyxy(br));
GSVector4i r = data->bbox.rintersect(data->scissor);
r.left >>= m_scale.x;
r.top >>= m_scale.y;
r.right >>= m_scale.x;
r.bottom >>= m_scale.y;
m_rl.Draw(&data, r.width(), r.height());
Invalidate(r);
m_rl.Sync();
m_rl->Queue(data);
GSRasterizerStats stats;
m_rl->Sync();
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
// TODO: m_perfmon.Put(GSPerfMon::Draw, 1);
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims);
// TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
}
void GPURendererSW::VertexKick()

View File

@ -26,8 +26,30 @@
class GPURendererSW : public GPURendererT<GSVertexSW>
{
class GPURasterizerData : public GSRasterizerData
{
public:
GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)_aligned_malloc(sizeof(GPUScanlineGlobalData), 32);
gd->clut = NULL;
param = gd;
}
virtual ~GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
_aligned_free(gd);
}
};
protected:
GSRasterizerList m_rl;
IRasterizer* m_rl;
GSTexture* m_texture;
uint32* m_output;

View File

@ -62,7 +62,7 @@ __aligned(struct, 32) GPUScanlineGlobalData
void* vm;
const void* tex;
const uint16* clut;
uint16* clut;
GSVector4i twin; // TWW, TWH, TWX, TWY
};

View File

@ -73,8 +73,8 @@ void GPUSettingsDlg::OnInit()
CheckDlgButton(m_hWnd, IDC_WINDOWED, theApp.GetConfig("windowed", 1));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 1));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("swthreads", 1), 0));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 0));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("extrathreads", 0), 0));
UpdateControls();
}
@ -124,7 +124,7 @@ bool GPUSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code)
theApp.SetConfig("scale_y", (data >> 2) & 3);
}
theApp.SetConfig("swthreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0));
theApp.SetConfig("extrathreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0));
theApp.SetConfig("windowed", (int)IsDlgButtonChecked(m_hWnd, IDC_WINDOWED));
}

View File

@ -141,7 +141,7 @@ void GPUState::Invalidate(const GSVector4i& r)
void GPUState::WriteData(const uint8* mem, uint32 size)
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
size <<= 2;
@ -165,7 +165,7 @@ void GPUState::WriteData(const uint8* mem, uint32 size)
void GPUState::ReadData(uint8* mem, uint32 size)
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
int remaining = m_read.bytes - m_read.cur;
@ -194,7 +194,7 @@ void GPUState::ReadData(uint8* mem, uint32 size)
void GPUState::WriteStatus(uint32 status)
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
uint32 b = status >> 24;
@ -205,7 +205,7 @@ void GPUState::WriteStatus(uint32 status)
uint32 GPUState::ReadStatus()
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ?

View File

@ -193,7 +193,7 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
if(threads == -1)
{
threads = theApp.GetConfig("swthreads", 1);
threads = theApp.GetConfig("extrathreads", 0);
}
try
@ -776,6 +776,8 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
GSvsync(1);
Sleep(100);
bool exit = false;
while(!exit)

View File

@ -194,6 +194,7 @@ CRC::Game CRC::m_games[] =
{0xD71B57F4, Genji, US, 0},
{0xFADEBC45, Genji, EU, 0},
{0xB4776FC1, Genji, JP, 0},
{0x7D4EA48F, Genji, NoRegion, 0},
{0xE04EA200, StarOcean3, EU, 0},
{0x23A97857, StarOcean3, US, 0},
{0xBEC32D49, StarOcean3, JP, 0},
@ -366,7 +367,7 @@ CRC::Game CRC::Lookup(uint32 crc)
// printf( "GSdx: excluding CRC hack for 0x%08x\n", m_games[i].crc );
}
}
#ifndef NO_CRC_HACKS
#ifndef DISABLE_CRC_HACKS
hash_map<uint32, Game*>::iterator i = m_map.find(crc);
if(i != m_map.end())

View File

@ -117,7 +117,7 @@ void GSDeviceSW::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
class ShaderBase
{
protected:
GSVector4i Sample(const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf)
GSVector4i Sample(const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const
{
GSVector4i c0 = c.upl8();
GSVector4i c1 = c.uph8();
@ -128,17 +128,17 @@ protected:
return c0;
}
GSVector4i Blend(const GSVector4i& c0, const GSVector4i& c1)
GSVector4i Blend(const GSVector4i& c0, const GSVector4i& c1) const
{
return c0.lerp16<0>(c1, c1.wwwwl().sll16(7));
}
GSVector4i Blend2x(const GSVector4i& c0, const GSVector4i& c1)
GSVector4i Blend2x(const GSVector4i& c0, const GSVector4i& c1) const
{
return c0.lerp16<0>(c1, c1.wwwwl().sll16(1).pu16().uph8().sll16(7)); // .sll16(1).pu16() => 2x, then clamp (...)
}
GSVector4i Blend(const GSVector4i& c0, const GSVector4i& c1, const GSVector4i& f)
GSVector4i Blend(const GSVector4i& c0, const GSVector4i& c1, const GSVector4i& f) const
{
return c0.lerp16<0>(c1, f);
}
@ -147,12 +147,12 @@ protected:
class ShaderCopy : public ShaderBase
{
public:
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf)
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const
{
*dst = Sample(c, uf, vf).pu16().extract32<0>();
}
void operator() (uint32* RESTRICT dst, uint32 c)
void operator() (uint32* RESTRICT dst, uint32 c) const
{
*dst = c;
}
@ -161,12 +161,12 @@ public:
class ShaderAlphaBlend : public ShaderBase
{
public:
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf)
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const
{
*dst = Blend(Sample(c, uf, vf), GSVector4i(*dst).uph8()).pu16().extract32<0>();
}
void operator() (uint32* RESTRICT dst, uint32 c)
void operator() (uint32* RESTRICT dst, uint32 c) const
{
*dst = Blend(GSVector4i(c), GSVector4i(*dst).uph8()).pu16().extract32<0>();
}
@ -175,12 +175,12 @@ public:
class ShaderAlpha2xBlend : public ShaderBase
{
public:
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf)
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const
{
*dst = Blend2x(Sample(c, uf, vf), GSVector4i(*dst).uph8()).pu16().extract32<0>();
}
void operator() (uint32* RESTRICT dst, uint32 c)
void operator() (uint32* RESTRICT dst, uint32 c) const
{
*dst = Blend2x(GSVector4i(c), GSVector4i(*dst).uph8()).pu16().extract32<0>();
}
@ -196,18 +196,18 @@ public:
m_f = GSVector4i((f << 16) | f).xxxx().srl16(1);
}
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf)
void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const
{
*dst = Blend(Sample(c, uf, vf), GSVector4i(*dst).uph8(), m_f).pu16().extract32<0>();
}
void operator() (uint32* RESTRICT dst, uint32 c)
void operator() (uint32* RESTRICT dst, uint32 c) const
{
*dst = Blend(GSVector4i(c), GSVector4i(*dst).uph8(), m_f).pu16().extract32<0>();
}
};
template<class SHADER> static void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, SHADER shader, bool linear)
template<class SHADER> static void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, const SHADER& shader, bool linear)
{
GSVector4i r(dr.ceil());

View File

@ -40,6 +40,8 @@ public:
GSDialog(UINT id);
virtual ~GSDialog() {}
int GetId() const {return m_id;}
INT_PTR DoModal();
string GetText(UINT id);

View File

@ -95,17 +95,12 @@ void GSDrawScanline::BeginDraw(const void* param)
m_sp = m_sp_map[sel];
}
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats, uint64 frame)
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
{
m_ds_map.UpdateStats(stats, frame);
m_ds_map.UpdateStats(frame, ticks, pixels);
}
void GSDrawScanline::PrintStats()
{
m_ds_map.PrintStats();
}
#ifndef JIT_DRAW
#ifndef ENABLE_JIT_RASTERIZER
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
{
@ -1416,7 +1411,7 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
}
else
{
if(m == 0)
if((m & 0xffff) == 0)
{
DrawRectT<uint16, false>(zbr, zbc, r, z, m);
}
@ -1456,7 +1451,7 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{
c = ((c & 0xf8) >> 3) | ((c & 0xf800) >> 6) | ((c & 0xf80000) >> 9) | ((c & 0x80000000) >> 16);
if(m == 0)
if((m & 0xffff) == 0)
{
DrawRectT<uint16, false>(fbr, fbc, r, c, m);
}
@ -1482,6 +1477,8 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
mask = mask.xxzzlh();
}
if(masked) ASSERT(mask.u32[0] != 0);
color = color.andnot(mask);
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));

View File

@ -51,12 +51,11 @@ public:
// IDrawScanline
void BeginDraw(const void* param);
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
void PrintStats();
void EndDraw(uint64 frame, uint64 ticks, int pixels);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
#ifndef JIT_DRAW
#ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -63,6 +63,7 @@ L("loop");
// ecx = steps
// esi = fzbr
// edi = fzbc
// ebp = za
// - xmm0
// xmm2 = s/u (tme)
// xmm3 = t/v (tme)
@ -688,7 +689,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
mov(ebx, ptr[&m_local.gd->tex[0]]);
if(m_sel.tlu)
{
mov(edx, ptr[&m_local.gd->clut]);
}
// ebx = tex
// edx = clut
if(!m_sel.fst)
{
@ -1095,7 +1102,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
return;
}
mov(edx, (size_t)m_local.gd->tex);
push(ebp);
mov(ebp, (size_t)m_local.gd->tex);
if(m_sel.tlu)
{
mov(edx, ptr[&m_local.gd->clut]);
}
if(!m_sel.fst)
{
@ -1477,255 +1491,258 @@ return;
vpsrlw(xmm6, 8);
}
if(m_sel.mmin == 1) return; // round-off mode
vmovdqa(ptr[&m_local.temp.trb], xmm5);
vmovdqa(ptr[&m_local.temp.tga], xmm6);
vmovdqa(xmm2, ptr[&m_local.temp.uv[0]]);
vmovdqa(xmm3, ptr[&m_local.temp.uv[1]]);
vpsrad(xmm2, 1);
vpsrad(xmm3, 1);
vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
vpsrlw(xmm5, 1);
vpsrlw(xmm6, 1);
if(m_sel.ltf)
if(m_sel.mmin != 1) // !round-off mode
{
// u -= 0x8000;
// v -= 0x8000;
vmovdqa(ptr[&m_local.temp.trb], xmm5);
vmovdqa(ptr[&m_local.temp.tga], xmm6);
mov(eax, 0x8000);
vmovd(xmm4, eax);
vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
vmovdqa(xmm2, ptr[&m_local.temp.uv[0]]);
vmovdqa(xmm3, ptr[&m_local.temp.uv[1]]);
vpsubd(xmm2, xmm4);
vpsubd(xmm3, xmm4);
vpsrad(xmm2, 1);
vpsrad(xmm3, 1);
// GSVector4i uf = u.xxzzlh().srl16(1);
vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
vpsrlw(xmm5, 1);
vpsrlw(xmm6, 1);
if(m_sel.ltf)
{
// u -= 0x8000;
// v -= 0x8000;
mov(eax, 0x8000);
vmovd(xmm4, eax);
vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
vpsubd(xmm2, xmm4);
vpsubd(xmm3, xmm4);
// GSVector4i uf = u.xxzzlh().srl16(1);
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
// GSVector4i vf = v.xxzzlh().srl16(1);
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
// GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
// GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
vpsrad(xmm2, 16);
vpsrad(xmm3, 16);
vpackssdw(xmm2, xmm3);
vpsrad(xmm2, 16);
vpsrad(xmm3, 16);
vpackssdw(xmm2, xmm3);
if(m_sel.ltf)
{
// GSVector4i uv1 = uv0.add16(GSVector4i::x0001());
if(m_sel.ltf)
{
// GSVector4i uv1 = uv0.add16(GSVector4i::x0001());
vpcmpeqd(xmm1, xmm1);
vpsrlw(xmm1, 15);
vpaddw(xmm3, xmm2, xmm1);
vpcmpeqd(xmm1, xmm1);
vpsrlw(xmm1, 15);
vpaddw(xmm3, xmm2, xmm1);
// uv0 = Wrap(uv0);
// uv1 = Wrap(uv1);
// uv0 = Wrap(uv0);
// uv1 = Wrap(uv1);
WrapLOD(xmm2, xmm3);
}
else
{
// uv0 = Wrap(uv0);
WrapLOD(xmm2, xmm3);
}
else
{
// uv0 = Wrap(uv0);
WrapLOD(xmm2);
}
WrapLOD(xmm2);
}
// xmm2 = uv0
// xmm3 = uv1 (ltf)
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
// xmm7 = used
// xmm2 = uv0
// xmm3 = uv1 (ltf)
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
// xmm7 = used
// GSVector4i x0 = uv0.upl16();
// GSVector4i y0 = uv0.uph16() << tw;
// GSVector4i x0 = uv0.upl16();
// GSVector4i y0 = uv0.uph16() << tw;
vpxor(xmm0, xmm0);
vpxor(xmm0, xmm0);
vpunpcklwd(xmm4, xmm2, xmm0);
vpunpckhwd(xmm2, xmm2, xmm0);
vpslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
// xmm2 = y0
// xmm3 = uv1 (ltf)
// xmm4 = x0
// xmm1, xmm5, xmm6 = free
// xmm7 = used
if(m_sel.ltf)
{
// GSVector4i x1 = uv1.upl16();
// GSVector4i y1 = uv1.uph16() << tw;
vpunpcklwd(xmm6, xmm3, xmm0);
vpunpckhwd(xmm3, xmm3, xmm0);
vpslld(xmm3, m_sel.tw + 3);
vpunpcklwd(xmm4, xmm2, xmm0);
vpunpckhwd(xmm2, xmm2, xmm0);
vpslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
// xmm2 = y0
// xmm3 = y1
// xmm3 = uv1 (ltf)
// xmm4 = x0
// xmm6 = x1
// xmm0, xmm5, xmm6 = free
// xmm1, xmm5, xmm6 = free
// xmm7 = used
// GSVector4i addr00 = y0 + x0;
// GSVector4i addr01 = y0 + x1;
// GSVector4i addr10 = y1 + x0;
// GSVector4i addr11 = y1 + x1;
if(m_sel.ltf)
{
// GSVector4i x1 = uv1.upl16();
// GSVector4i y1 = uv1.uph16() << tw;
vpaddd(xmm5, xmm2, xmm4);
vpaddd(xmm2, xmm2, xmm6);
vpaddd(xmm0, xmm3, xmm4);
vpaddd(xmm3, xmm3, xmm6);
vpunpcklwd(xmm6, xmm3, xmm0);
vpunpckhwd(xmm3, xmm3, xmm0);
vpslld(xmm3, m_sel.tw + 3);
// xmm5 = addr00
// xmm2 = addr01
// xmm0 = addr10
// xmm3 = addr11
// xmm1, xmm4, xmm6 = free
// xmm7 = used
// xmm2 = y0
// xmm3 = y1
// xmm4 = x0
// xmm6 = x1
// xmm0, xmm5, xmm6 = free
// xmm7 = used
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
// c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]);
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
// GSVector4i addr00 = y0 + x0;
// GSVector4i addr01 = y0 + x1;
// GSVector4i addr10 = y1 + x0;
// GSVector4i addr11 = y1 + x1;
ReadTexel(4, 1);
vpaddd(xmm5, xmm2, xmm4);
vpaddd(xmm2, xmm2, xmm6);
vpaddd(xmm0, xmm3, xmm4);
vpaddd(xmm3, xmm3, xmm6);
// xmm6 = c00
// xmm4 = c01
// xmm1 = c10
// xmm5 = c11
// xmm0, xmm2, xmm3 = free
// xmm7 = used
// xmm5 = addr00
// xmm2 = addr01
// xmm0 = addr10
// xmm3 = addr11
// xmm1, xmm4, xmm6 = free
// xmm7 = used
vmovdqa(xmm0, ptr[&m_local.temp.uf]);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
// c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]);
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
ReadTexel(4, 1);
vpsllw(xmm2, xmm6, 8);
vpsrlw(xmm2, 8);
vpsrlw(xmm6, 8);
// xmm6 = c00
// xmm4 = c01
// xmm1 = c10
// xmm5 = c11
// xmm0, xmm2, xmm3 = free
// xmm7 = used
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
vmovdqa(xmm0, ptr[&m_local.temp.uf]);
vpsllw(xmm3, xmm4, 8);
vpsrlw(xmm3, 8);
vpsrlw(xmm4, 8);
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
// xmm0 = uf
// xmm2 = rb00
// xmm3 = rb01
// xmm6 = ga00
// xmm4 = ga01
// xmm1 = c10
// xmm5 = c11
// xmm7 = used
vpsllw(xmm2, xmm6, 8);
vpsrlw(xmm2, 8);
vpsrlw(xmm6, 8);
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
vpsllw(xmm3, xmm4, 8);
vpsrlw(xmm3, 8);
vpsrlw(xmm4, 8);
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = c10
// xmm5 = c11
// xmm2, xmm6 = free
// xmm7 = used
// xmm0 = uf
// xmm2 = rb00
// xmm3 = rb01
// xmm6 = ga00
// xmm4 = ga01
// xmm1 = c10
// xmm5 = c11
// xmm7 = used
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
vpsrlw(xmm2, xmm1, 8);
vpsllw(xmm1, 8);
vpsrlw(xmm1, 8);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = c10
// xmm5 = c11
// xmm2, xmm6 = free
// xmm7 = used
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsrlw(xmm5, 8);
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = rb10
// xmm5 = rb11
// xmm2 = ga10
// xmm6 = ga11
// xmm7 = used
vpsrlw(xmm2, xmm1, 8);
vpsllw(xmm1, 8);
vpsrlw(xmm1, 8);
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsrlw(xmm5, 8);
// xmm3 = rb00
// xmm4 = ga00
// xmm5 = rb10
// xmm6 = ga10
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = rb10
// xmm5 = rb11
// xmm2 = ga10
// xmm6 = ga11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
}
else
{
// GSVector4i addr00 = y0 + x0;
// xmm3 = rb00
// xmm4 = ga00
// xmm5 = rb10
// xmm6 = ga10
// xmm0, xmm1, xmm2 = free
// xmm7 = used
vpaddd(xmm5, xmm2, xmm4);
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
ReadTexel(1, 1);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
}
else
{
// GSVector4i addr00 = y0 + x0;
// GSVector4i mask = GSVector4i::x00ff();
vpaddd(xmm5, xmm2, xmm4);
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
ReadTexel(1, 1);
// GSVector4i mask = GSVector4i::x00ff();
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
}
vmovdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]);
vpsrlw(xmm0, xmm0, 1);
vmovdqa(xmm2, ptr[&m_local.temp.trb]);
vmovdqa(xmm3, ptr[&m_local.temp.tga]);
lerp16(xmm5, xmm2, xmm0, 0);
lerp16(xmm6, xmm3, xmm0, 0);
}
vmovdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]);
vpsrlw(xmm0, xmm0, 1);
vmovdqa(xmm2, ptr[&m_local.temp.trb]);
vmovdqa(xmm3, ptr[&m_local.temp.tga]);
lerp16(xmm5, xmm2, xmm0, 0);
lerp16(xmm6, xmm3, xmm0, 0);
pop(ebp);
}
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
@ -2592,8 +2609,9 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
vpaddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);
vpaddw(xmm6, ptr[eax + (size_t)&m_local.gd->dimx[1]]);
mov(ebp, ptr[&m_local.gd->dimx]);
vpaddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]);
vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]);
}
// GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1]));
@ -2739,7 +2757,8 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
// xmm0 = addr10
// xmm3 = addr11
// ebx = m_local.tex[0] (!m_sel.mmin)
// edx = m_local.tex (m_sel.mmin)
// ebp = m_local.tex (m_sel.mmin)
// edx = m_local.clut (m_sel.tlu)
// out
// xmm6 = c00
@ -2765,7 +2784,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < pixels; i++)
{
@ -2784,7 +2803,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
if(m_sel.mmin && m_sel.lcm)
{
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
}
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
@ -2801,7 +2820,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
{
const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_local.gd->clut] : ptr[ebx + eax * 4];
const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4];
if(i == 0) vmovd(eax, addr);
else vpextrd(eax, addr, i);

View File

@ -63,6 +63,7 @@ L("loop");
// ecx = steps
// esi = fzbr
// edi = fzbc
// ebp = za
// - xmm0
// xmm2 = s/u (tme)
// xmm3 = t/v (tme)
@ -693,7 +694,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
mov(ebx, ptr[&m_local.gd->tex[0]]);
if(m_sel.tlu)
{
mov(edx, ptr[&m_local.gd->clut]);
}
// ebx = tex
// edx = clut
if(!m_sel.fst)
{
@ -1144,7 +1151,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
return;
}
mov(edx, (size_t)m_local.gd->tex);
push(ebp);
mov(ebp, (size_t)m_local.gd->tex);
if(m_sel.tlu)
{
mov(edx, ptr[&m_local.gd->clut]);
}
if(!m_sel.fst)
{
@ -1544,267 +1558,270 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
psrlw(xmm6, 8);
}
if(m_sel.mmin == 1) return; // round-off mode
movdqa(ptr[&m_local.temp.trb], xmm5);
movdqa(ptr[&m_local.temp.tga], xmm6);
movdqa(xmm2, ptr[&m_local.temp.uv[0]]);
movdqa(xmm3, ptr[&m_local.temp.uv[1]]);
psrad(xmm2, 1);
psrad(xmm3, 1);
movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
psrlw(xmm5, 1);
psrlw(xmm6, 1);
if(m_sel.ltf)
if(m_sel.mmin != 1) // !round-off mode
{
// u -= 0x8000;
// v -= 0x8000;
movdqa(ptr[&m_local.temp.trb], xmm5);
movdqa(ptr[&m_local.temp.tga], xmm6);
mov(eax, 0x8000);
movd(xmm4, eax);
pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
movdqa(xmm2, ptr[&m_local.temp.uv[0]]);
movdqa(xmm3, ptr[&m_local.temp.uv[1]]);
psubd(xmm2, xmm4);
psubd(xmm3, xmm4);
psrad(xmm2, 1);
psrad(xmm3, 1);
// GSVector4i uf = u.xxzzlh().srl16(1);
movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
psrlw(xmm5, 1);
psrlw(xmm6, 1);
if(m_sel.ltf)
{
// u -= 0x8000;
// v -= 0x8000;
mov(eax, 0x8000);
movd(xmm4, eax);
pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
psubd(xmm2, xmm4);
psubd(xmm3, xmm4);
// GSVector4i uf = u.xxzzlh().srl16(1);
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.uf], xmm0);
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
// GSVector4i vf = v.xxzzlh().srl16(1);
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
// GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
// GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
psrad(xmm2, 16);
psrad(xmm3, 16);
packssdw(xmm2, xmm3);
psrad(xmm2, 16);
psrad(xmm3, 16);
packssdw(xmm2, xmm3);
if(m_sel.ltf)
{
// GSVector4i uv1 = uv0.add16(GSVector4i::x0001());
if(m_sel.ltf)
{
// GSVector4i uv1 = uv0.add16(GSVector4i::x0001());
movdqa(xmm3, xmm2);
pcmpeqd(xmm1, xmm1);
psrlw(xmm1, 15);
paddw(xmm3, xmm1);
movdqa(xmm3, xmm2);
pcmpeqd(xmm1, xmm1);
psrlw(xmm1, 15);
paddw(xmm3, xmm1);
// uv0 = Wrap(uv0);
// uv1 = Wrap(uv1);
// uv0 = Wrap(uv0);
// uv1 = Wrap(uv1);
WrapLOD(xmm2, xmm3);
}
else
{
// uv0 = Wrap(uv0);
WrapLOD(xmm2, xmm3);
}
else
{
// uv0 = Wrap(uv0);
WrapLOD(xmm2);
}
WrapLOD(xmm2);
}
// xmm2 = uv0
// xmm3 = uv1 (ltf)
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
// xmm7 = used
// xmm2 = uv0
// xmm3 = uv1 (ltf)
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
// xmm7 = used
// GSVector4i x0 = uv0.upl16();
// GSVector4i y0 = uv0.uph16() << tw;
// GSVector4i x0 = uv0.upl16();
// GSVector4i y0 = uv0.uph16() << tw;
pxor(xmm0, xmm0);
pxor(xmm0, xmm0);
movdqa(xmm4, xmm2);
punpckhwd(xmm2, xmm0);
punpcklwd(xmm4, xmm0);
pslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
// xmm2 = y0
// xmm3 = uv1 (ltf)
// xmm4 = x0
// xmm1, xmm5, xmm6 = free
// xmm7 = used
if(m_sel.ltf)
{
// GSVector4i x1 = uv1.upl16();
// GSVector4i y1 = uv1.uph16() << tw;
movdqa(xmm6, xmm3);
punpckhwd(xmm3, xmm0);
punpcklwd(xmm6, xmm0);
pslld(xmm3, m_sel.tw + 3);
movdqa(xmm4, xmm2);
punpckhwd(xmm2, xmm0);
punpcklwd(xmm4, xmm0);
pslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
// xmm2 = y0
// xmm3 = y1
// xmm3 = uv1 (ltf)
// xmm4 = x0
// xmm6 = x1
// xmm0, xmm5, xmm6 = free
// xmm1, xmm5, xmm6 = free
// xmm7 = used
// GSVector4i addr00 = y0 + x0;
// GSVector4i addr01 = y0 + x1;
// GSVector4i addr10 = y1 + x0;
// GSVector4i addr11 = y1 + x1;
if(m_sel.ltf)
{
// GSVector4i x1 = uv1.upl16();
// GSVector4i y1 = uv1.uph16() << tw;
movdqa(xmm5, xmm2);
paddd(xmm5, xmm4);
paddd(xmm2, xmm6);
movdqa(xmm6, xmm3);
punpckhwd(xmm3, xmm0);
punpcklwd(xmm6, xmm0);
pslld(xmm3, m_sel.tw + 3);
movdqa(xmm0, xmm3);
paddd(xmm0, xmm4);
paddd(xmm3, xmm6);
// xmm2 = y0
// xmm3 = y1
// xmm4 = x0
// xmm6 = x1
// xmm0, xmm5, xmm6 = free
// xmm7 = used
// xmm5 = addr00
// xmm2 = addr01
// xmm0 = addr10
// xmm3 = addr11
// xmm1, xmm4, xmm6 = free
// xmm7 = used
// GSVector4i addr00 = y0 + x0;
// GSVector4i addr01 = y0 + x1;
// GSVector4i addr10 = y1 + x0;
// GSVector4i addr11 = y1 + x1;
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
// c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]);
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
movdqa(xmm5, xmm2);
paddd(xmm5, xmm4);
paddd(xmm2, xmm6);
ReadTexel(4, 1);
movdqa(xmm0, xmm3);
paddd(xmm0, xmm4);
paddd(xmm3, xmm6);
// xmm6 = c00
// xmm4 = c01
// xmm1 = c10
// xmm5 = c11
// xmm0, xmm2, xmm3 = free
// xmm7 = used
// xmm5 = addr00
// xmm2 = addr01
// xmm0 = addr10
// xmm3 = addr11
// xmm1, xmm4, xmm6 = free
// xmm7 = used
movdqa(xmm0, ptr[&m_local.temp.uf]);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
// c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]);
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
ReadTexel(4, 1);
movdqa(xmm2, xmm6);
psllw(xmm2, 8);
psrlw(xmm2, 8);
psrlw(xmm6, 8);
// xmm6 = c00
// xmm4 = c01
// xmm1 = c10
// xmm5 = c11
// xmm0, xmm2, xmm3 = free
// xmm7 = used
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
movdqa(xmm0, ptr[&m_local.temp.uf]);
movdqa(xmm3, xmm4);
psllw(xmm3, 8);
psrlw(xmm3, 8);
psrlw(xmm4, 8);
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
// xmm0 = uf
// xmm2 = rb00
// xmm3 = rb01
// xmm6 = ga00
// xmm4 = ga01
// xmm1 = c10
// xmm5 = c11
// xmm7 = used
movdqa(xmm2, xmm6);
psllw(xmm2, 8);
psrlw(xmm2, 8);
psrlw(xmm6, 8);
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
movdqa(xmm3, xmm4);
psllw(xmm3, 8);
psrlw(xmm3, 8);
psrlw(xmm4, 8);
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = c10
// xmm5 = c11
// xmm2, xmm6 = free
// xmm7 = used
// xmm0 = uf
// xmm2 = rb00
// xmm3 = rb01
// xmm6 = ga00
// xmm4 = ga01
// xmm1 = c10
// xmm5 = c11
// xmm7 = used
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
movdqa(xmm2, xmm1);
psllw(xmm1, 8);
psrlw(xmm1, 8);
psrlw(xmm2, 8);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = c10
// xmm5 = c11
// xmm2, xmm6 = free
// xmm7 = used
movdqa(xmm6, xmm5);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = rb10
// xmm5 = rb11
// xmm2 = ga10
// xmm6 = ga11
// xmm7 = used
movdqa(xmm2, xmm1);
psllw(xmm1, 8);
psrlw(xmm1, 8);
psrlw(xmm2, 8);
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
movdqa(xmm6, xmm5);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
// xmm3 = rb00
// xmm4 = ga00
// xmm5 = rb10
// xmm6 = ga10
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// xmm0 = uf
// xmm3 = rb00
// xmm4 = ga00
// xmm1 = rb10
// xmm5 = rb11
// xmm2 = ga10
// xmm6 = ga11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
movdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
}
else
{
// GSVector4i addr00 = y0 + x0;
// xmm3 = rb00
// xmm4 = ga00
// xmm5 = rb10
// xmm6 = ga10
// xmm0, xmm1, xmm2 = free
// xmm7 = used
paddd(xmm2, xmm4);
movdqa(xmm5, xmm2);
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
movdqa(xmm0, ptr[&m_local.temp.vf]);
ReadTexel(1, 1);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
}
else
{
// GSVector4i addr00 = y0 + x0;
// GSVector4i mask = GSVector4i::x00ff();
paddd(xmm2, xmm4);
movdqa(xmm5, xmm2);
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
movdqa(xmm5, xmm6);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
ReadTexel(1, 1);
// GSVector4i mask = GSVector4i::x00ff();
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
movdqa(xmm5, xmm6);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
}
movdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]);
psrlw(xmm0, 1);
movdqa(xmm2, ptr[&m_local.temp.trb]);
movdqa(xmm3, ptr[&m_local.temp.tga]);
lerp16(xmm5, xmm2, xmm0, 0);
lerp16(xmm6, xmm3, xmm0, 0);
}
movdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]);
psrlw(xmm0, 1);
movdqa(xmm2, ptr[&m_local.temp.trb]);
movdqa(xmm3, ptr[&m_local.temp.tga]);
lerp16(xmm5, xmm2, xmm0, 0);
lerp16(xmm6, xmm3, xmm0, 0);
pop(ebp);
}
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
@ -2727,8 +2744,9 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
paddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);
paddw(xmm6, ptr[eax + (size_t)&m_local.gd->dimx[1]]);
mov(ebp, ptr[&m_local.gd->dimx]);
paddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]);
paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]);
}
// GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1]));
@ -2902,7 +2920,8 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
// xmm0 = addr10
// xmm3 = addr11
// ebx = m_local.tex[0] (!m_sel.mmin)
// edx = m_local.tex (m_sel.mmin)
// ebp = m_local.tex (m_sel.mmin)
// edx = m_local.clut (m_sel.tlu)
// out
// xmm6 = c00
@ -2930,7 +2949,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < pixels; i++)
{
@ -2951,7 +2970,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
movdqa(ptr[&m_local.temp.test], xmm7);
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, 0);
psrldq(xmm5, 4);
@ -2959,7 +2978,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
psrldq(xmm2, 4);
mov(ebx, ptr[&lod_i->u32[1]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm1, xmm5, 0);
psrldq(xmm5, 4);
@ -2970,7 +2989,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
punpckldq(xmm4, xmm7);
mov(ebx, ptr[&lod_i->u32[2]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm1, xmm5, 0);
psrldq(xmm5, 4);
@ -2978,7 +2997,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
psrldq(xmm2, 4);
mov(ebx, ptr[&lod_i->u32[3]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm5, xmm5, 0);
ReadTexel(xmm2, xmm2, 0);
@ -2990,7 +3009,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
punpcklqdq(xmm4, xmm7);
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm1, xmm0, 0);
psrldq(xmm0, 4);
@ -2998,7 +3017,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
psrldq(xmm3, 4);
mov(ebx, ptr[&lod_i->u32[1]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm2, xmm0, 0);
psrldq(xmm0, 4);
@ -3009,7 +3028,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
punpckldq(xmm5, xmm7);
mov(ebx, ptr[&lod_i->u32[2]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm2, xmm0, 0);
psrldq(xmm0, 4);
@ -3017,7 +3036,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
psrldq(xmm3, 4);
mov(ebx, ptr[&lod_i->u32[3]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm0, xmm0, 0);
ReadTexel(xmm3, xmm3, 0);
@ -3033,13 +3052,13 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
else
{
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, 0);
psrldq(xmm5, 4); // shuffle instead? (1 2 3 0 ~ rotation)
mov(ebx, ptr[&lod_i->u32[1]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm1, xmm5, 0);
psrldq(xmm5, 4);
@ -3047,13 +3066,13 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
punpckldq(xmm6, xmm1);
mov(ebx, ptr[&lod_i->u32[2]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm1, xmm5, 0);
psrldq(xmm5, 4);
mov(ebx, ptr[&lod_i->u32[3]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm4, xmm5, 0);
// psrldq(xmm5, 4);
@ -3070,7 +3089,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
if(m_sel.mmin && m_sel.lcm)
{
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
}
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
@ -3117,7 +3136,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
{
const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_local.gd->clut] : ptr[ebx + eax * 4];
const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4];
#if _M_SSE < 0x401

View File

@ -26,30 +26,13 @@
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
struct GSRasterizerStats
{
int64 ticks;
int prims, pixels;
GSRasterizerStats()
{
Reset();
}
void Reset()
{
ticks = 0;
pixels = prims = 0;
}
};
template<class KEY, class VALUE> class GSFunctionMap
{
protected:
struct ActivePtr
{
uint64 frame, frames;
int64 ticks, pixels;
uint64 ticks, pixels;
VALUE f;
};
@ -101,7 +84,7 @@ public:
return m_active->f;
}
void UpdateStats(const GSRasterizerStats& stats, uint64 frame)
void UpdateStats(uint64 frame, uint64 ticks, int pixels)
{
if(m_active)
{
@ -111,14 +94,14 @@ public:
m_active->frames++;
}
m_active->pixels += stats.pixels;
m_active->ticks += stats.ticks;
m_active->ticks += ticks;
m_active->pixels += pixels;
}
}
virtual void PrintStats()
{
int64 ttpf = 0;
uint64 ttpf = 0;
typename hash_map<KEY, ActivePtr*>::iterator i;
@ -141,9 +124,9 @@ public:
if(p->frames > 0)
{
int64 tpp = p->pixels > 0 ? p->ticks / p->pixels : 0;
int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
uint64 tpp = p->pixels > 0 ? p->ticks / p->pixels : 0;
uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
uint64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
printf("[%014llx]%c %6.2f%% | %5.2f%% | f %4lld | p %10lld | tpp %4lld | tpf %9lld | ppf %7lld\n",
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
@ -168,14 +151,6 @@ public:
}
};
#if 0 // we can't legally distribute vtune libraries or headers
#ifdef _WINDOWS
#include "vtune/JITProfiling.h"
#endif
#endif
template<class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
@ -215,8 +190,7 @@ public:
m_cgmap[key] = ret;
#if 0 // we can't legally distribute vtune libraries or headers
#ifdef _WINDOWS
#ifdef ENABLE_VTUNE
// vtune method registration
@ -254,7 +228,6 @@ public:
*/
}
#endif
#endif
delete cg;

View File

@ -117,10 +117,10 @@ bool RunLinuxDialog()
gtk_container_add(GTK_CONTAINER(main_box), interlace_label);
gtk_container_add(GTK_CONTAINER(main_box), interlace_combo_box);
swthreads_label = gtk_label_new("Software renderer threads:");
swthreads_label = gtk_label_new("Extra sw renderer threads:");
swthreads_text = gtk_entry_new();
char buf[5];
sprintf(buf, "%d", theApp.GetConfig("swthreads", 1));
sprintf(buf, "%d", theApp.GetConfig("extrathreads", 0));
gtk_entry_set_text(GTK_ENTRY(swthreads_text), buf);
gtk_container_add(GTK_CONTAINER(main_box), swthreads_label);
@ -178,7 +178,7 @@ bool RunLinuxDialog()
theApp.SetConfig( "interlace", (int)gtk_combo_box_get_active(GTK_COMBO_BOX(interlace_combo_box)) );
theApp.SetConfig("swthreads", atoi((char*)gtk_entry_get_text(GTK_ENTRY(swthreads_text))) );
theApp.SetConfig("extrathreads", atoi((char*)gtk_entry_get_text(GTK_ENTRY(swthreads_text))) );
theApp.SetConfig("filter", (int)gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(filter_check)));
theApp.SetConfig("logz", (int)gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(logz_check)));

View File

@ -466,30 +466,7 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
return i->second;
}
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 32);
o->hash = hash;
pixelAddress bn = m_psm[psm].bn;
for(int i = 0; i < 256; i++)
{
o->block.row[i] = (short)bn(0, i << 3, bp, bw);
}
o->block.col = m_psm[psm].blockOffset;
pixelAddress pa = m_psm[psm].pa;
for(int i = 0; i < 4096; i++)
{
o->pixel.row[i] = (int)pa(0, i & 0x7ff, bp, bw);
}
for(int i = 0; i < 8; i++)
{
o->pixel.col[i] = m_psm[psm].rowOffset[i];
}
GSOffset* o = new GSOffset(bp, bw, psm);
m_omap[hash] = o;
@ -547,6 +524,8 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
return o;
}
static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;}
list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
{
uint32 hash = TEX0.TBP0 | (TEX0.TBW << 14) | (TEX0.PSM << 20) | (TEX0.TW << 26);
@ -613,10 +592,18 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
}
}
// sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y)
vector<GSVector2i> tmp;
for(hash_map<uint32, uint32>::iterator j = m.begin(); j != m.end(); j++)
{
p2t[page].push_back(GSVector2i(j->first, j->second));
tmp.push_back(GSVector2i(j->first, ~j->second));
}
std::sort(tmp.begin(), tmp.end(), cmp_vec2x);
p2t[page].insert(p2t[page].end(), tmp.begin(), tmp.end());
}
m_p2tmap[hash] = p2t;
@ -1976,3 +1963,100 @@ void GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 psm,
_aligned_free(bits);
}
// GSOffset
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
{
hash = _bp | (_bw << 14) | (_psm << 20);
GSLocalMemory::pixelAddress bn = GSLocalMemory::m_psm[_psm].bn;
for(int i = 0; i < 256; i++)
{
block.row[i] = (short)bn(0, i << 3, _bp, _bw);
}
block.col = GSLocalMemory::m_psm[_psm].blockOffset;
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[_psm].pa;
for(int i = 0; i < 4096; i++)
{
pixel.row[i] = (int)pa(0, i & 0x7ff, _bp, _bw);
}
for(int i = 0; i < 8; i++)
{
pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i];
}
}
GSOffset::~GSOffset()
{
for(hash_map<uint64, list<uint32>*>::iterator i = m_cache.begin(); i != m_cache.end(); i++)
{
delete i->second;
}
}
list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
{
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
if(bbox != NULL) *bbox = r;
uint64 r_hash;
GSVector4i::storel(&r_hash, r.sra32(3).ps32()); // max 19-bit coordinates, should not be a problem (can shift right by 3 because it is mod8, smallest block size)
hash_map<uint64, list<uint32>*>::iterator i = m_cache.find(r_hash);
if(i != m_cache.end())
{
return i->second;
}
uint32 tmp[16];
memset(tmp, 0, sizeof(tmp));
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 n = (base + block.col[x >> 3]) >> 5;
if(n < MAX_PAGES)
{
tmp[n >> 5] |= 1 << (n & 31);
}
}
}
list<uint32>* l = new list<uint32>();
for(int i = 0; i < countof(tmp); i++)
{
uint32 p = tmp[i];
if(p == 0) continue;
unsigned long j;
while(_BitScanForward(&j, p))
{
p ^= 1 << j;
l->push_back((i << 5) + j);
}
}
m_cache[r_hash] = l;
return l;
}

View File

@ -26,22 +26,34 @@
#include "GSVector.h"
#include "GSBlock.h"
#include "GSClut.h"
#include "GSThread.h"
struct GSOffset
class GSOffset : public GSAlignedClass<32>
{
struct
hash_map<uint64, list<uint32>*> m_cache;
public:
__aligned(struct, 32) Block
{
short row[256]; // yn (n = 0 8 16 ...)
short* col; // blockOffset*
} block;
struct
};
__aligned(struct, 32) Pixel
{
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
int* col[8]; // rowOffset*
} pixel;
};
union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};};
Block block;
Pixel pixel;
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
list<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
};
struct GSPixelOffset4

View File

@ -23,14 +23,14 @@
#include "GSPerfMon.h"
GSPerfMon::GSPerfMon()
: m_total(0)
, m_begin(0)
, m_frame(0)
: m_frame(0)
, m_lastframe(0)
, m_count(0)
{
memset(m_counters, 0, sizeof(m_counters));
memset(m_stats, 0, sizeof(m_stats));
memset(m_total, 0, sizeof(m_total));
memset(m_begin, 0, sizeof(m_begin));
}
void GSPerfMon::Put(counter_t c, double val)
@ -69,32 +69,35 @@ void GSPerfMon::Update()
memset(m_counters, 0, sizeof(m_counters));
}
void GSPerfMon::Start()
void GSPerfMon::Start(int timer)
{
m_start = __rdtsc();
m_start[timer] = __rdtsc();
if(m_begin == 0)
if(m_begin[timer] == 0)
{
m_begin = m_start;
m_begin[timer] = m_start[timer];
}
}
void GSPerfMon::Stop()
void GSPerfMon::Stop(int timer)
{
if(m_start > 0)
if(m_start[timer] > 0)
{
m_total += __rdtsc() - m_start;
m_start = 0;
m_total[timer] += __rdtsc() - m_start[timer];
m_start[timer] = 0;
}
}
int GSPerfMon::CPU()
int GSPerfMon::CPU(int timer, bool reset)
{
int percent = (int)(100 * m_total / (__rdtsc() - m_begin));
int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer]));
m_begin = 0;
m_start = 0;
m_total = 0;
if(reset)
{
m_begin[timer] = 0;
m_start[timer] = 0;
m_total[timer] = 0;
}
return percent;
}

View File

@ -24,18 +24,30 @@
class GSPerfMon
{
public:
enum counter_t {Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, CounterLast};
enum timer_t
{
Main,
Sync,
WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15,
WorkerSync0, WorkerSync1, WorkerSync2, WorkerSync3, WorkerSync4, WorkerSync5, WorkerSync6, WorkerSync7, WorkerSync8, WorkerSync9, WorkerSync10, WorkerSync11, WorkerSync12, WorkerSync13, WorkerSync14, WorkerSync15,
WorkerSleep0, WorkerSleep1, WorkerSleep2, WorkerSleep3, WorkerSleep4, WorkerSleep5, WorkerSleep6, WorkerSleep7, WorkerSleep8, WorkerSleep9, WorkerSleep10, WorkerSleep11, WorkerSleep12, WorkerSleep13, WorkerSleep14, WorkerSleep15,
TimerLast,
};
enum counter_t
{
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad,
CounterLast,
};
protected:
double m_counters[CounterLast];
double m_stats[CounterLast];
uint64 m_begin, m_total, m_start, m_frame;
uint64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast];
uint64 m_frame;
clock_t m_lastframe;
int m_count;
void Start();
void Stop();
friend class GSPerfMonAutoTimer;
public:
@ -43,17 +55,22 @@ public:
void SetFrame(uint64 frame) {m_frame = frame;}
uint64 GetFrame() {return m_frame;}
void Put(counter_t c, double val = 0);
double Get(counter_t c) {return m_stats[c];}
void Update();
int CPU();
void Start(int timer = Main);
void Stop(int timer = Main);
int CPU(int timer = Main, bool reset = true);
};
class GSPerfMonAutoTimer
{
GSPerfMon* m_pm;
int m_timer;
public:
GSPerfMonAutoTimer(GSPerfMon& pm) {(m_pm = &pm)->Start();}
~GSPerfMonAutoTimer() {m_pm->Stop();}
GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main) {m_timer = timer; (m_pm = pm)->Start(m_timer);}
~GSPerfMonAutoTimer() {m_pm->Stop(m_timer);}
};

View File

@ -24,19 +24,32 @@
#include "stdafx.h"
#include "GSRasterizer.h"
#define THREAD_HEIGHT 5
// - for more threads screen segments should be smaller to better distribute the pixels
// - but not too small to keep the threading overhead low
// - ideal value between 3 and 5, or log2(64 / number of threads)
GSRasterizer::GSRasterizer(IDrawScanline* ds)
#define THREAD_HEIGHT 4
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
: m_ds(ds)
, m_id(-1)
, m_threads(-1)
, m_id(id)
, m_threads(threads)
, m_perfmon(perfmon)
{
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0;
m_myscanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64);
SetThreadId(0, 1);
int row = 0;
while(row < (2048 >> THREAD_HEIGHT))
{
for(int i = 0; i < threads; i++, row++)
{
m_myscanline[row] = i == id ? 1 : 0;
}
}
}
GSRasterizer::~GSRasterizer()
@ -48,90 +61,115 @@ GSRasterizer::~GSRasterizer()
delete m_ds;
}
bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
bool GSRasterizer::IsOneOfMyScanlines(int top) const
{
return m_myscanline[scanline >> THREAD_HEIGHT] != 0;
return m_myscanline[top >> THREAD_HEIGHT] != 0;
}
void GSRasterizer::Draw(const GSRasterizerData* data)
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
{
top = top >> THREAD_HEIGHT;
bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT;
while(top < bottom)
{
if(m_myscanline[top++])
{
return true;
}
}
return false;
}
int GSRasterizer::FindMyNextScanline(int top) const
{
int i = top >> THREAD_HEIGHT;
if(m_myscanline[i] == 0)
{
while(m_myscanline[++i] == 0);
top = i << THREAD_HEIGHT;
}
return top;
}
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
{
Draw(data);
}
void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->count == 0) return;
m_ds->BeginDraw(data->param);
const GSVertexSW* vertices = data->vertices;
const int count = data->count;
const GSVertexSW* vertices_end = data->vertices + data->count;
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
m_scissor = data->scissor;
m_fscissor = GSVector4(data->scissor);
m_stats.Reset();
m_pixels = 0;
uint64 start = __rdtsc();
// NOTE: data->scissor_test with templated Draw* speeds up large point lists (ffxii videos), but do not seem to make any difference for others
switch(data->primclass)
{
case GS_POINT_CLASS:
m_stats.prims = count;
if(data->scissor_test) DrawPoint<true>(vertices, count);
else DrawPoint<false>(vertices, count);
if(scissor_test)
{
DrawPoint<true>(vertices, data->count);
}
else
{
DrawPoint<false>(vertices, data->count);
}
break;
case GS_LINE_CLASS:
ASSERT(!(count & 1));
m_stats.prims = count / 2;
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i]);
do {DrawLine(vertices); vertices += 2;}
while(vertices < vertices_end);
break;
case GS_TRIANGLE_CLASS:
ASSERT(!(count % 3));
m_stats.prims = count / 3;
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i]);
do {DrawTriangle(vertices); vertices += 3;}
while(vertices < vertices_end);
break;
case GS_SPRITE_CLASS:
ASSERT(!(count & 1));
m_stats.prims = count / 2;
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i]);
do {DrawSprite(vertices, data->solidrect); vertices += 2;}
while(vertices < vertices_end);
break;
default:
__assume(0);
}
m_stats.ticks = __rdtsc() - start;
uint64 ticks = __rdtsc() - start;
m_ds->EndDraw(m_stats, data->frame);
_InterlockedExchangeAdd(&data->ticks, ticks);
_InterlockedExchangeAdd(&data->pixels, m_pixels);
m_ds->EndDraw(data->frame, ticks, m_pixels);
}
void GSRasterizer::SetThreadId(int id, int threads)
{
if(m_id != id || m_threads != threads)
{
m_id = id;
m_threads = threads;
if(threads > 1)
{
int row = 0;
while(row < (2048 >> THREAD_HEIGHT))
{
for(int i = 0; i < threads; i++, row++)
{
m_myscanline[row] = i == id ? 1 : 0;
}
}
}
else
{
memset(m_myscanline, 1, 2048 >> THREAD_HEIGHT);
}
}
}
void GSRasterizer::GetStats(GSRasterizerStats& stats)
{
stats = m_stats;
}
template<bool scissor_test>
template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{
for(; count > 0; count--, v++)
@ -142,7 +180,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{
if(IsOneOfMyScanlines(p.y))
{
m_stats.pixels++;
m_pixels++;
m_ds->SetupPrim(v, *v);
@ -160,7 +198,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy|
if(m_ds->IsEdge())
if(m_ds->HasEdge())
{
DrawEdge(v[0], v[1], dv, i, 0);
DrawEdge(v[0], v[1], dv, i, 1);
@ -191,7 +229,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
{
GSVector4 scissor = m_fscissor.xzxz();
GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil();
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
@ -204,7 +242,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(pixels > 0)
{
m_stats.pixels += pixels;
m_pixels += pixels;
GSVertexSW dscan = dv / dv.p.xxxx();
@ -250,8 +288,6 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
m_edge.count = e - m_edge.buff;
m_stats.pixels += m_edge.count;
Flush(v, GSVertexSW::zero());
}
}
@ -308,7 +344,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 cross = dv[0].p * dv[1].p.yxwz();
cross = (cross - cross.yxwz()).yyyy(); // select the second component, the negated cross product
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
int j = cross.upl(cross == GSVector4::zero()).mask();
@ -324,8 +360,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dx = dxy01.xzxy(dv[2].p);
GSVector4 dy = dxy01.ywyx(dv[2].p);
GSVector4 ddx[3];
GSVector4 ddx[3];
ddx[0] = dx / dy;
ddx[1] = ddx[0].yxzw();
ddx[2] = ddx[0].xzyw();
@ -343,7 +379,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 _s = dxy01c * dv[1].t.xxxx(dv[0].t); // dx0 * s1, dy0 * s1, dx1 * s0, dy1 * s0
GSVector4 _t = dxy01c * dv[1].t.yyyy(dv[0].t); // dx0 * t1, dy0 * t1, dx1 * t0, dy1 * t0
GSVector4 _q = dxy01c * dv[1].t.zzzz(dv[0].t); // dx0 * q1, dy0 * q1, dx1 * q0, dy1 * q0
dscan.t = _s.ywyw(_t).hsub(_q.ywyw()); // dy0 * s1 - dy1 * s0, dy0 * t1 - dy1 * t0, dy0 * q1 - dy1 * q0
dedge.t = _s.zxzx(_t).hsub(_q.zxzx()); // dx1 * s0 - dx0 * s1, dx1 * t0 - dx0 * t1, dx1 * q0 - dx0 * q1
@ -364,13 +400,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
edge.p = edge.p.insert<0, 1>(v[j].p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p.xxxx(), v[1 - j].p.yyyy());
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p);
}
}
else
{
GSVector4 x0 = v[0].p.xxxx();
if(tb.x < tb.z)
{
edge = v[0];
@ -378,23 +412,23 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
edge.p = edge.p.xxzw();
dedge.p = ddx[j].xyzw(dedge.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, x0, v[0].p.yyyy());
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v[0].p);
}
if(tb.y < tb.w)
{
edge = v[1];
edge.p = (x0 + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p);
edge.p = (v[0].p.xxxx() + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p.xxxx(), v[1].p.yyyy());
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p);
}
}
Flush(v, dscan);
if(m_ds->IsEdge())
if(m_ds->HasEdge())
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
@ -411,7 +445,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
}
}
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& x0, const GSVector4& y0)
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0)
{
ASSERT(top < bottom);
ASSERT(edge.p.x <= edge.p.y);
@ -420,39 +454,46 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
GSVector4 scissor = m_fscissor.xzxz();
while(1)
top = FindMyNextScanline(top);
while(top < bottom)
{
if(IsOneOfMyScanlines(top))
GSVector4 dy = GSVector4(top) - p0.yyyy();
GSVertexSW scan;
scan.p = edge.p + dedge.p * dy;
GSVector4 lrf = scan.p.ceil();
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
GSVector4i lr = GSVector4i(l.xxyy(r));
int left = lr.extract32<0>();
int right = lr.extract32<2>();
int pixels = right - left;
if(pixels > 0)
{
GSVertexSW scan = edge + dedge * (GSVector4(top) - y0);
GSVector4 lrf = scan.p.ceil();
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
GSVector4i lr = GSVector4i(l.xxyy(r));
scan.t = edge.t + dedge.t * dy;
scan.c = edge.c + dedge.c * dy;
int left = lr.extract32<0>();
int right = lr.extract32<2>();
int pixels = right - left;
if(pixels > 0)
{
m_stats.pixels += pixels;
GSVector4 prestep = l.xxxx() - x0;
AddScanline(e++, pixels, left, top, scan + dscan * prestep);
}
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
}
if(++top >= bottom) break;
top++;
if(!IsOneOfMyScanlines(top))
{
top += (m_threads - 1) << THREAD_HEIGHT;
}
}
m_edge.count += e - &m_edge.buff[m_edge.count];
}
void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
{
GSVertexSW v[2];
@ -473,13 +514,13 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
GSVertexSW scan = v[0];
if(m_ds->IsRect())
if(solidrect)
{
if(m_id == 0)
{
m_ds->DrawRect(r, scan);
m_stats.pixels += r.width() * r.height();
m_pixels += r.width() * r.height();
}
return;
@ -508,7 +549,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
{
if(IsOneOfMyScanlines(r.top))
{
m_stats.pixels += r.width();
m_pixels += r.width();
m_ds->DrawScanline(r.width(), r.left, r.top, scan);
}
@ -533,7 +574,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
// TODO: it does not always line up with the edge of the surrounded triangle
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
if(orientation)
@ -707,11 +748,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
}
int count = e - &m_edge.buff[m_edge.count];
m_stats.pixels += count;
m_edge.count += count;
m_edge.count += e - &m_edge.buff[m_edge.count];
}
void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan)
@ -738,24 +775,28 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
if(!edge)
{
do
do
{
int pixels = e->p.i16[0];
int left = e->p.i16[1];
int top = e->p.i16[2];
m_pixels += pixels;
m_ds->DrawScanline(pixels, left, top, *e++);
}
while(e < ee);
}
else
{
do
do
{
int pixels = e->p.i16[0];
int left = e->p.i16[1];
int top = e->p.i16[2];
m_pixels += pixels;
m_ds->DrawEdge(pixels, left, top, *e++);
}
while(e < ee);
@ -767,101 +808,99 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
//
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, volatile long& sync)
: GSRasterizer(ds)
, m_sync(sync)
, m_data(NULL)
{
CreateThread();
}
GSRasterizerMT::~GSRasterizerMT()
{
Draw(NULL);
CloseThread();
}
void GSRasterizerMT::Draw(const GSRasterizerData* data)
{
m_data = data;
m_draw.Set();
}
void GSRasterizerMT::ThreadProc()
{
while(m_draw.Wait() && m_data != NULL)
{
GSRasterizer::Draw(m_data);
_interlockedbittestandreset(&m_sync, m_id);
}
}
//
GSRasterizerList::GSRasterizerList()
: m_sync(0)
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_sync_count(0)
{
}
GSRasterizerList::~GSRasterizerList()
{
for(size_t i = 0; i < size(); i++) delete (*this)[i];
for(vector<GSWorker*>::iterator i = m_workers.begin(); i != m_workers.end(); i++)
{
delete *i;
}
}
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
{
// disable dispatcher thread for now and pass-through directly,
// would only be relevant if data->syncpoint was utilized more,
// it would hide the syncing latency from the main gs thread
// Push(data);
Process(data); m_count++;
}
void GSRasterizerList::Sync()
{
while(m_sync) _mm_pause();
if(GetCount() == 0) return;
m_stats.ticks = __rdtsc() - m_start;
Wait(); // first dispatch all items to workers
for(int i = 0; i < m_threads; i++)
for(size_t i = 0; i < m_workers.size(); i++)
{
GSRasterizerStats s;
(*this)[i]->GetStats(s);
m_stats.pixels += s.pixels;
m_stats.prims = std::max<int>(m_stats.prims, s.prims);
m_workers[i]->Wait(); // then wait all workers to finish their jobs
}
m_sync_count++;
}
void GSRasterizerList::Draw(const GSRasterizerData* data, int width, int height)
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
m_stats.Reset();
m_start = __rdtsc();
m_threads = std::min<int>(1 + (height >> THREAD_HEIGHT), size());
m_sync = 0;
for(int i = 1; i < m_threads; i++)
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch?
{
m_sync |= 1 << i;
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
m_workers.front()->Process(item);
return;
}
for(int i = 1; i < m_threads; i++)
if(item->syncpoint)
{
(*this)[i]->SetThreadId(i, m_threads);
(*this)[i]->Draw(data);
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
}
(*this)[0]->SetThreadId(0, m_threads);
(*this)[0]->Draw(data);
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Push(item);
}
}
void GSRasterizerList::GetStats(GSRasterizerStats& stats)
// GSRasterizerList::GSWorker
GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_r(r)
{
stats = m_stats;
}
void GSRasterizerList::PrintStats()
GSRasterizerList::GSWorker::~GSWorker()
{
if(!empty())
Wait();
delete m_r;
}
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
{
GSVector4i r = item->bbox.rintersect(item->scissor);
if(m_r->IsOneOfMyScanlines(r.top, r.bottom))
{
front()->PrintStats();
GSJobQueue<shared_ptr<GSRasterizerData> >::Push(item);
}
}
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item);
}

View File

@ -26,22 +26,47 @@
#include "GSFunctionMap.h"
#include "GSThread.h"
#include "GSAlignedClass.h"
#include "GSPerfMon.h"
//
#define JIT_DRAW
__aligned(class, 32) GSRasterizerData
__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32>
{
public:
GSVector4i scissor;
bool scissor_test;
GSVector4i bbox;
GS_PRIM_CLASS primclass;
const GSVertexSW* vertices;
GSVertexSW* vertices;
int count;
bool solidrect;
bool syncpoint;
uint64 frame;
const void* param;
void* param;
GSRasterizerData() : scissor_test(true) {}
// drawing stats
volatile long ticks;
volatile long pixels;
GSRasterizerData()
: scissor(GSVector4i::zero())
, bbox(GSVector4i::zero())
, primclass(GS_INVALID_CLASS)
, vertices(NULL)
, count(0)
, solidrect(false)
, syncpoint(false)
, frame(0)
, param(NULL)
, ticks(0)
, pixels(0)
{
}
virtual ~GSRasterizerData()
{
if(vertices != NULL) _aligned_free(vertices);
// derived class should free param and its members
}
};
class IDrawScanline : public GSAlignedClass<32>
@ -62,10 +87,9 @@ public:
virtual ~IDrawScanline() {}
virtual void BeginDraw(const void* param) = 0;
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
virtual void PrintStats() = 0;
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
#ifdef JIT_DRAW
#ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
@ -81,32 +105,30 @@ public:
#endif
__forceinline bool IsEdge() const {return m_de != NULL;}
__forceinline bool IsRect() const {return m_dr != NULL;}
__forceinline bool HasEdge() const {return m_de != NULL;}
};
class IRasterizer
class IRasterizer : public GSAlignedClass<32>
{
public:
virtual ~IRasterizer() {}
virtual void Draw(const GSRasterizerData* data) = 0;
virtual void SetThreadId(int id, int threads) = 0;
virtual void GetStats(GSRasterizerStats& stats) = 0;
virtual void PrintStats() = 0;
virtual void Queue(shared_ptr<GSRasterizerData> data) = 0;
virtual void Sync() = 0;
};
__aligned(class, 32) GSRasterizer : public GSAlignedClass<32>, public IRasterizer
__aligned(class, 32) GSRasterizer : public IRasterizer
{
protected:
GSPerfMon* m_perfmon;
IDrawScanline* m_ds;
int m_id;
int m_threads;
uint8* m_myscanline;
GSRasterizerStats m_stats;
GSVector4i m_scissor;
GSVector4 m_fscissor;
struct {GSVertexSW* buff; int count;} m_edge;
int m_pixels;
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
@ -114,73 +136,86 @@ protected:
void DrawPoint(const GSVertexSW* v, int count);
void DrawLine(const GSVertexSW* v);
void DrawTriangle(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v, bool solidrect);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& x0, const GSVector4& y0);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline bool IsOneOfMyScanlines(int scanline) const;
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
public:
GSRasterizer(IDrawScanline* ds);
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
virtual ~GSRasterizer();
// IRasterizer
__forceinline bool IsOneOfMyScanlines(int top) const;
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
__forceinline int FindMyNextScanline(int top) const;
void Draw(const GSRasterizerData* data);
void SetThreadId(int id, int threads);
void GetStats(GSRasterizerStats& stats);
void PrintStats() {m_ds->PrintStats();}
};
class GSRasterizerMT : public GSRasterizer, private GSThread
{
protected:
volatile long& m_sync;
GSAutoResetEvent m_draw;
const GSRasterizerData* m_data;
void ThreadProc();
public:
GSRasterizerMT(IDrawScanline* ds, volatile long& sync);
virtual ~GSRasterizerMT();
void Draw(shared_ptr<GSRasterizerData> data);
// IRasterizer
void Draw(const GSRasterizerData* data);
void Queue(shared_ptr<GSRasterizerData> data);
void Sync() {}
};
class GSRasterizerList : protected vector<IRasterizer*>
class GSRasterizerList
: public IRasterizer
, private GSJobQueue<shared_ptr<GSRasterizerData> >
{
protected:
volatile long m_sync;
GSRasterizerStats m_stats;
int64 m_start;
int m_threads;
class GSWorker : public GSJobQueue<shared_ptr<GSRasterizerData> >
{
GSRasterizer* m_r;
public:
GSWorker(GSRasterizer* r);
virtual ~GSWorker();
// GSJobQueue
void Push(const shared_ptr<GSRasterizerData>& item);
void Process(shared_ptr<GSRasterizerData>& item);
};
vector<GSWorker*> m_workers;
public:
GSRasterizerList();
// GSJobQueue
void Process(shared_ptr<GSRasterizerData>& item);
public:
virtual ~GSRasterizerList();
template<class DS> void Create(int threads)
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
{
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
threads = std::max<int>(threads, 0);
push_back(new GSRasterizer(new DS()));
for(int i = 1; i < threads; i++)
if(threads == 0)
{
push_back(new GSRasterizerMT(new DS(), m_sync));
return new GSRasterizer(new DS(), 0, 1, perfmon);
}
else
{
GSRasterizerList* rl = new GSRasterizerList();
for(int i = 0; i < threads; i++)
{
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
}
return rl;
}
}
void Sync();
int m_sync_count;
void Draw(const GSRasterizerData* data, int width, int height);
void GetStats(GSRasterizerStats& stats);
void PrintStats();
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
};

View File

@ -289,7 +289,7 @@ void GSRenderer::SetVSync(bool enabled)
void GSRenderer::VSync(int field)
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
m_perfmon.Put(GSPerfMon::Frame);

View File

@ -67,6 +67,7 @@ public:
bool s_save;
bool s_savez;
int s_saven;
GSCritSec s_lock;
public:
GSRenderer();

View File

@ -45,7 +45,7 @@ void GSRendererDX11::VertexKick(bool skip)
dst = *(GSVertexHW11*)&m_v;
#ifdef USE_UPSCALE_HACKS
#ifdef ENABLE_UPSCALE_HACKS
if(tme && fst)
{

View File

@ -84,7 +84,7 @@ void GSRendererDX9::VertexKick(bool skip)
{
dst.t = m_v.GetUV();
#ifdef USE_UPSCALE_HACKS
#ifdef ENABLE_UPSCALE_HACKS
int Udiff = 0;
int Vdiff = 0;

View File

@ -747,16 +747,18 @@ protected:
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void Draw()
{
#ifndef NO_CRC_HACKS
#ifndef DISABLE_CRC_HACKS
if(GSRendererT<Vertex>::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
@ -922,7 +924,7 @@ protected:
s_n++;
}
#ifdef HW_NO_TEXTURE_CACHE
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}

View File

@ -54,7 +54,7 @@ void GSRendererOGL::VertexKick(bool skip)
dst = *(GSVertexHW11*)&m_v;
#ifdef USE_UPSCALE_HACKS
#ifdef ENABLE_UPSCALE_HACKS
if(tme && fst)
{

View File

@ -25,6 +25,7 @@
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSRendererSW::GSRendererSW(int threads)
: m_fzb(NULL)
{
InitVertexKick(GSRendererSW);
@ -32,9 +33,12 @@ GSRendererSW::GSRendererSW(int threads)
memset(m_texture, 0, sizeof(m_texture));
m_rl.Create<GSDrawScanline>(threads);
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
memset(m_tex_pages, 0, sizeof(m_tex_pages));
}
GSRendererSW::~GSRendererSW()
@ -46,6 +50,8 @@ GSRendererSW::~GSRendererSW()
delete m_texture[i];
}
delete m_rl;
_aligned_free(m_output);
}
@ -61,6 +67,40 @@ void GSRendererSW::Reset()
void GSRendererSW::VSync(int field)
{
Sync(0); // IncAge might delete a cached texture in use
/*
printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n",
m_perfmon.CPU(GSPerfMon::Main),
m_perfmon.CPU(GSPerfMon::Sync),
m_perfmon.CPU(GSPerfMon::WorkerSync0),
m_perfmon.CPU(GSPerfMon::WorkerSleep0),
m_perfmon.CPU(GSPerfMon::WorkerDraw0),
m_perfmon.CPU(GSPerfMon::WorkerSync1),
m_perfmon.CPU(GSPerfMon::WorkerSleep1),
m_perfmon.CPU(GSPerfMon::WorkerDraw1),
m_perfmon.CPU(GSPerfMon::WorkerSync2),
m_perfmon.CPU(GSPerfMon::WorkerSleep2),
m_perfmon.CPU(GSPerfMon::WorkerDraw2),
m_perfmon.CPU(GSPerfMon::WorkerSync3),
m_perfmon.CPU(GSPerfMon::WorkerSleep3),
m_perfmon.CPU(GSPerfMon::WorkerDraw3),
m_perfmon.CPU(GSPerfMon::WorkerSync4),
m_perfmon.CPU(GSPerfMon::WorkerSleep4),
m_perfmon.CPU(GSPerfMon::WorkerDraw4),
m_perfmon.CPU(GSPerfMon::WorkerSync5),
m_perfmon.CPU(GSPerfMon::WorkerSleep5),
m_perfmon.CPU(GSPerfMon::WorkerDraw5),
m_perfmon.CPU(GSPerfMon::WorkerSync6),
m_perfmon.CPU(GSPerfMon::WorkerSleep6),
m_perfmon.CPU(GSPerfMon::WorkerDraw6),
m_perfmon.CPU(GSPerfMon::WorkerSync7),
m_perfmon.CPU(GSPerfMon::WorkerSleep7),
m_perfmon.CPU(GSPerfMon::WorkerDraw7));
//
printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0;
*/
GSRendererT<GSVertexSW>::VSync(field);
m_tc->IncAge();
@ -87,6 +127,8 @@ void GSRendererSW::ResetDevice()
GSTexture* GSRendererSW::GetOutput(int i)
{
Sync(1);
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
int w = DISPFB.FBW * 64;
@ -122,25 +164,104 @@ GSTexture* GSRendererSW::GetOutput(int i)
void GSRendererSW::Draw()
{
if(m_dump)
{
m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
}
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
GSScanlineGlobalData gd;
GSVector4i scissor = GSVector4i(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
if(!GetScanlineGlobalData(gd))
scissor.z = std::min<int>(scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
GSVector4i r = bbox.rintersect(scissor);
list<uint32>* fb_pages = m_context->offset.fb->GetPages(r);
list<uint32>* zb_pages = m_context->offset.zb->GetPages(r);
GSRasterizerData2* data2 = new GSRasterizerData2(this, fb_pages, zb_pages);
shared_ptr<GSRasterizerData> data(data2);
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
if(!GetScanlineGlobalData(*gd))
{
return;
}
if(!gd.sel.fwrite && !gd.sel.zwrite)
data->scissor = scissor;
data->bbox = bbox;
data->primclass = m_vt.m_primclass;
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later?
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here
data->count = m_count;
data->solidrect = gd->sel.IsSolidRect();
data->frame = m_perfmon.GetFrame();
//
if(gd->sel.fwrite)
{
return;
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
}
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
if(gd->sel.zwrite)
{
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
}
// set data->syncpoint
if(m_fzb != m_context->offset.fzb)
{
m_fzb = m_context->offset.fzb;
data->syncpoint = true;
}
// - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue
// - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
if(!data->syncpoint)
{
if(gd->sel.fwrite)
{
for(list<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0xffff0000) // already used as a z-buffer
{
data->syncpoint = true;
break;
}
}
}
}
if(!data->syncpoint)
{
if(gd->sel.zwrite)
{
for(list<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0x0000ffff) // already used as a frame buffer
{
data->syncpoint = true;
break;
}
}
}
}
//
data2->UseTargetPages();
//
if(s_dump)
{
Sync(3);
uint64 frame = m_perfmon.GetFrame();
string s;
@ -158,7 +279,7 @@ void GSRendererSW::Draw()
{
s = format("c:\\temp1\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM);
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);//GetFrameSize(1).cy);
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
}
if(s_savez && s_n >= s_saven)
@ -169,58 +290,16 @@ void GSRendererSW::Draw()
}
s_n++;
}
GSVector4i scissor(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p));
GSVector4i r = bbox.rintersect(scissor);
GSRasterizerData data;
m_rl->Queue(data);
data.scissor = scissor;
data.scissor.z = std::min<int>(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.scissor_test = !bbox.eq(r);
data.primclass = m_vt.m_primclass;
data.vertices = m_vertices;
data.count = m_count;
data.frame = m_perfmon.GetFrame();
data.param = &gd;
m_rl.Draw(&data, r.width(), r.height());
if(gd.sel.fwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
}
if(gd.sel.zwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
}
// By only syncing here we can do the two InvalidateVideoMem calls free if the other threads finish
// their drawings later than this one (they usually do because they start on an event).
m_rl.Sync();
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
{
uint64 frame = m_perfmon.GetFrame();
string s;
Sync(4);
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM);
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);//GetFrameSize(1).cy);
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
}
if(s_savez && s_n >= s_saven)
@ -232,20 +311,125 @@ void GSRendererSW::Draw()
s_n++;
}
else
{
m_rl->Queue(data);
}
int prims = 0;
switch(data->primclass)
{
case GS_POINT_CLASS: prims = data->count; break;
case GS_LINE_CLASS: prims = data->count / 2; break;
case GS_TRIANGLE_CLASS: prims = data->count / 3; break;
case GS_SPRITE_CLASS: prims = data->count / 2; break;
}
m_perfmon.Put(GSPerfMon::Prim, prims);
/*
if(0)//stats.ticks > 5000000)
{
printf("* [%lld | %012llx] ticks %lld prims %d (%d) pixels %d (%d)\n",
m_perfmon.GetFrame(), gd.sel.key,
m_perfmon.GetFrame(), gd->sel.key,
stats.ticks,
stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1,
stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1);
}
*/
}
void GSRendererSW::Sync(int reason)
{
//printf("sync %d\n", reason);
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
m_rl->Sync();
// NOTE: m_fzb_pages is refcounted, zeroing is done automatically
memset(m_tex_pages, 0, sizeof(m_tex_pages));
}
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
list<uint32>* pages = o->GetPages(r);
m_tc->InvalidatePages(pages, o->psm);
// check if the changing pages either used as a texture or a target
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
uint32 page = *i;
if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31))))
{
Sync(5);
break;
}
}
}
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
list<uint32>* pages = o->GetPages(r);
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
if(m_fzb_pages[*i])
{
Sync(6);
break;
}
}
}
void GSRendererSW::UseTargetPages(const list<uint32>* pages, int offset)
{
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset);
}
}
void GSRendererSW::ReleaseTargetPages(const list<uint32>* pages, int offset)
{
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0);
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset);
}
}
void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t)
{
for(list<uint32>::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
{
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
{
Sync(7);
return;
}
}
for(size_t i = 0; i < countof(t->m_pages.bm); i++)
{
m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used
}
}
#include "GSTextureSW.h"
@ -257,7 +441,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
gd.vm = m_mem.m_vm8;
gd.dimx = env.dimx;
gd.fbr = context->offset.fb->pixel.row;
gd.zbr = context->offset.zb->pixel.row;
@ -315,6 +498,11 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
bool fwrite = fm != 0xffffffff;
bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
bool zwrite = zm != 0xffffffff;
bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
if(!fwrite && !zwrite) return false;
gd.sel.fwrite = fwrite;
gd.sel.ftest = ftest;
@ -329,13 +517,20 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(PRIM->TME)
{
gd.clut = m_mem.m_clut;
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt.IsLinear();
gd.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0;
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
gd.sel.tlu = 1;
gd.clut = (uint32*)_aligned_malloc(sizeof(uint32) * 256, 32); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
memcpy(gd.clut, (const uint32*)m_mem.m_clut, sizeof(uint32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
}
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
@ -346,13 +541,17 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.tfx = TFX_DECAL;
}
GSTextureCacheSW::Texture* t = m_tc->Lookup(context->TEX0, env.TEXA);
if(t == NULL) {ASSERT(0); return false;}
UseSourcePages(t);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, gd.sel.ltf);
const GSTextureCacheSW::Texture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
if(t == NULL) {ASSERT(0); return false;}
if(!t->Update(r)) {ASSERT(0); return false;}
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
{
@ -360,7 +559,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
string s;
if(s_save && s_n >= s_saven && PRIM->TME)
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp1\\_%05d_f%lld_tex32_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
@ -380,7 +579,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// 011 p tri
// 100 l round
// 101 l tri
if(m_vt.m_lod.x > 0)
{
gd.sel.ltf = context->TEX1.MMIN >> 2;
@ -456,29 +655,29 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{
switch(i)
{
case 1:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP1;
MIP_TEX0.TBW = context->MIPTBP1.TBW1;
case 1:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP1;
MIP_TEX0.TBW = context->MIPTBP1.TBW1;
break;
case 2:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP2;
MIP_TEX0.TBW = context->MIPTBP1.TBW2;
case 2:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP2;
MIP_TEX0.TBW = context->MIPTBP1.TBW2;
break;
case 3:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP3;
MIP_TEX0.TBW = context->MIPTBP1.TBW3;
case 3:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP3;
MIP_TEX0.TBW = context->MIPTBP1.TBW3;
break;
case 4:
MIP_TEX0.TBP0 = context->MIPTBP2.TBP4;
MIP_TEX0.TBW = context->MIPTBP2.TBW4;
case 4:
MIP_TEX0.TBP0 = context->MIPTBP2.TBP4;
MIP_TEX0.TBW = context->MIPTBP2.TBW4;
break;
case 5:
MIP_TEX0.TBP0 = context->MIPTBP2.TBP5;
MIP_TEX0.TBW = context->MIPTBP2.TBW5;
case 5:
MIP_TEX0.TBP0 = context->MIPTBP2.TBP5;
MIP_TEX0.TBW = context->MIPTBP2.TBW5;
break;
case 6:
MIP_TEX0.TBP0 = context->MIPTBP2.TBP6;
MIP_TEX0.TBW = context->MIPTBP2.TBW6;
case 6:
MIP_TEX0.TBP0 = context->MIPTBP2.TBP6;
MIP_TEX0.TBW = context->MIPTBP2.TBW6;
break;
default:
__assume(0);
@ -495,13 +694,17 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;}
UseSourcePages(t);
GSVector4i r;
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
const GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, r, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;}
if(!t->Update(r)) {ASSERT(0); return false;}
gd.tex[i] = t->m_buff;
@ -699,11 +902,16 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.colclamp = env.COLCLAMP.CLAMP;
gd.sel.fba = context->FBA.FBA;
gd.sel.dthe = env.DTHE.DTHE;
}
bool zwrite = zm != 0xffffffff;
bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
if(env.DTHE.DTHE)
{
gd.sel.dthe = 1;
gd.dimx = (GSVector4i*)_aligned_malloc(sizeof(env.dimx), 32);
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
}
}
gd.sel.zwrite = zwrite;
gd.sel.ztest = ztest;
@ -782,25 +990,24 @@ void GSRendererSW::VertexKick(bool skip)
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
{
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(prim);
if(!m_dump)
{
GSVector4 pmin, pmax;
switch(prim)
switch(primclass)
{
case GS_POINTLIST:
case GS_POINT_CLASS:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLE_CLASS:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
@ -810,21 +1017,17 @@ if(!m_dump)
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
switch(primclass)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
case GS_TRIANGLE_CLASS:
case GS_SPRITE_CLASS:
test |= pmin.ceil() == pmax.ceil();
break;
}
switch(prim)
switch(primclass)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLE_CLASS:
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
@ -836,42 +1039,26 @@ if(!m_dump)
return;
}
}
switch(prim)
switch(primclass)
{
case GS_POINTLIST:
case GS_POINT_CLASS:
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_LINE_CLASS:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLE_CLASS:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
break;
case GS_SPRITE:
case GS_SPRITE_CLASS:
break;
}
if(m_count < 30 && m_count >= 3)
{
GSVertexSW* v = &m_vertices[m_count - 3];
int tl = 0;
int br = 0;
bool isquad = false;
switch(prim)
{
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLELIST:
isquad = GSVertexSW::IsQuad(v, tl, br);
break;
}
if(isquad)
if(primclass == GS_TRIANGLE_CLASS && GSVertexSW::IsQuad(&m_vertices[m_count - 3], tl, br))
{
m_count -= 3;

View File

@ -27,12 +27,92 @@
class GSRendererSW : public GSRendererT<GSVertexSW>
{
class GSRasterizerData2 : public GSRasterizerData
{
GSRendererSW* m_parent;
const list<uint32>* m_fb_pages;
const list<uint32>* m_zb_pages;
bool m_using_pages;
public:
GSRasterizerData2(GSRendererSW* parent, const list<uint32>* fb_pages, const list<uint32>* zb_pages)
: m_parent(parent)
, m_fb_pages(fb_pages)
, m_zb_pages(zb_pages)
, m_using_pages(false)
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
gd->sel.key = 0;
gd->clut = NULL;
gd->dimx = NULL;
param = gd;
}
virtual ~GSRasterizerData2()
{
ReleaseTargetPages();
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
if(gd->dimx) _aligned_free(gd->dimx);
_aligned_free(gd);
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
}
void UseTargetPages()
{
if(m_using_pages) {ASSERT(0); return;}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
{
m_parent->UseTargetPages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_parent->UseTargetPages(m_zb_pages, 1);
}
m_using_pages = true;
}
void ReleaseTargetPages()
{
if(!m_using_pages) {ASSERT(0); return;}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
{
m_parent->ReleaseTargetPages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_parent->ReleaseTargetPages(m_zb_pages, 1);
}
m_using_pages = false;
}
};
protected:
GSRasterizerList m_rl;
IRasterizer* m_rl;
GSTextureCacheSW* m_tc;
GSTexture* m_texture[2];
uint8* m_output;
bool m_reset;
GSPixelOffset4* m_fzb;
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint32 m_tex_pages[16];
void Reset();
void VSync(int field);
@ -40,7 +120,13 @@ protected:
GSTexture* GetOutput(int i);
void Draw();
void Sync(int reason);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UseTargetPages(const list<uint32>* pages, int offset);
void ReleaseTargetPages(const list<uint32>* pages, int offset);
void UseSourcePages(const GSTextureCacheSW::Texture* t);
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);
@ -48,6 +134,6 @@ public:
GSRendererSW(int threads);
virtual ~GSRendererSW();
template<uint32 prim, uint32 tme, uint32 fst>
template<uint32 prim, uint32 tme, uint32 fst>
void VertexKick(bool skip);
};

View File

@ -107,15 +107,14 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
{
GSScanlineSelector sel;
// - the data of vm, tex, clut, dimx may change, multi-threaded drawing must be finished before that happens (an idea: remember which pages are used, sync when something needs to read or write them)
// - the data of vm, tex may change, multi-threaded drawing must be finished before that happens, clut and dimx are copies
// - tex is a cached texture, it may be recycled to free up memory, its absolute address cannot be compiled into code
// - row and column pointers are allocated once and never change or freed, thier address can be used directly
// - if in the future drawing does not have to be synchronized per batch, the rest of GSRasterizerData should be copied here, too (scissor, prim type, vertices)
void* vm;
const void* tex[7];
const uint32* clut;
const GSVector4i* dimx;
uint32* clut;
GSVector4i* dimx;
const int* fbr;
const int* zbr;

View File

@ -130,8 +130,8 @@ void GSSettingsDlg::OnInit()
SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETRANGE, 0, MAKELPARAM(16, 0));
SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("msaa", 0), 0));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 1));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("swthreads", 1), 0));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 0));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("extrathreads", 0), 0));
UpdateControls();
}
@ -256,7 +256,11 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code)
theApp.SetConfig("upscale_multiplier", 1);
}
theApp.SetConfig("windowed", (int)IsDlgButtonChecked(m_hWnd, IDC_WINDOWED));
if(GetId() == IDD_CONFIG) // TODO: other options may not be present in IDD_CONFIG2 as well
{
theApp.SetConfig("windowed", (int)IsDlgButtonChecked(m_hWnd, IDC_WINDOWED));
}
theApp.SetConfig("filter", (int)IsDlgButtonChecked(m_hWnd, IDC_FILTER));
theApp.SetConfig("paltex", (int)IsDlgButtonChecked(m_hWnd, IDC_PALTEX));
theApp.SetConfig("vsync", (int)IsDlgButtonChecked(m_hWnd, IDC_VSYNC));
@ -266,7 +270,7 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code)
theApp.SetConfig("nativeres", (int)IsDlgButtonChecked(m_hWnd, IDC_NATIVERES));
theApp.SetConfig("resx", (int)SendMessage(GetDlgItem(m_hWnd, IDC_RESX), UDM_GETPOS, 0, 0));
theApp.SetConfig("resy", (int)SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_GETPOS, 0, 0));
theApp.SetConfig("swthreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0));
theApp.SetConfig("extrathreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0));
theApp.SetConfig("msaa", (int)SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_GETPOS, 0, 0));
// Hacks
theApp.SetConfig("UserHacks_AlphaHack", (int)IsDlgButtonChecked(m_hWnd, IDC_ALPHAHACK));

View File

@ -22,11 +22,6 @@
#include "stdafx.h"
#include "GSState.h"
//#define DISABLE_BITMASKING
//#define DISABLE_COLCLAMP
//#define DISABLE_DATE
//see stdafx.h for #define HW_NO_TEXTURE_CACHE and #define NO_CRC_HACKS
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
//#define Offset_UV // Fixes / breaks various titles
@ -391,12 +386,12 @@ float GSState::GetFPS()
// GIFPackedRegHandler*
__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
{
// ASSERT(0);
}
__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
{
#if _M_SSE >= 0x301
@ -423,7 +418,7 @@ __forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
m_v.RGBAQ.Q = m_q;
}
__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
{
#if defined(_M_AMD64)
@ -450,7 +445,7 @@ __forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
#endif
}
__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
{
#if _M_SSE >= 0x200
@ -470,7 +465,7 @@ __forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
#endif
}
__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{
m_v.XYZ.X = r->XYZF2.X;
m_v.XYZ.Y = r->XYZF2.Y;
@ -480,7 +475,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
VertexKick(r->XYZF2.ADC);
}
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{
m_v.XYZ.X = r->XYZ2.X;
m_v.XYZ.Y = r->XYZ2.Y;
@ -489,23 +484,23 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
VertexKick(r->XYZ2.ADC);
}
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
{
m_v.FOG.F = r->FOG.F;
}
__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r)
{
(this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r);
}
__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* r)
__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
{
}
// GIFRegHandler*
void GSState::GIFRegHandlerNull(const GIFReg* r)
void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
{
// ASSERT(0);
}
@ -536,19 +531,19 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
ResetPrim();
}
void GSState::GIFRegHandlerPRIM(const GIFReg* r)
void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
{
ALIGN_STACK(32);
ApplyPRIM(r->PRIM);
}
__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* r)
__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
{
m_v.RGBAQ = (GSVector4i)r->RGBAQ;
}
__forceinline void GSState::GIFRegHandlerST(const GIFReg* r)
__forceinline void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
{
m_v.ST = (GSVector4i)r->ST;
@ -559,7 +554,7 @@ __forceinline void GSState::GIFRegHandlerST(const GIFReg* r)
#endif
}
__forceinline void GSState::GIFRegHandlerUV(const GIFReg* r)
__forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
{
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
@ -569,7 +564,7 @@ __forceinline void GSState::GIFRegHandlerUV(const GIFReg* r)
#endif
}
void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
{
/*
m_v.XYZ.X = r->XYZF.X;
@ -584,7 +579,7 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
VertexKick(false);
}
void GSState::GIFRegHandlerXYZ2(const GIFReg* r)
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
@ -597,7 +592,11 @@ void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT);
if(wt || PRIM->CTXT == i && TEX0 != m_env.CTXT[i].TEX0)
// clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this)
uint64 mask = 0x1f78001c3fffffffull; // TBP0 TBW PSM TW TCC TFX CPSM CSA
if(wt || PRIM->CTXT == i && ((TEX0.u64 ^ m_env.CTXT[i].TEX0.u64) & mask))
{
Flush();
}
@ -618,11 +617,24 @@ void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
if(wt)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = TEX0.CBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = TEX0.CSM;
GSVector4i r = GSVector4i::zero();
r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x;
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y;
InvalidateLocalMem(BITBLTBUF, r, true);
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
}
}
template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
{
GIFRegTEX0 TEX0 = r->TEX0;
@ -673,7 +685,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
}
}
template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
{
@ -683,12 +695,12 @@ template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
m_env.CTXT[i].CLAMP = (GSVector4i)r->CLAMP;
}
void GSState::GIFRegHandlerFOG(const GIFReg* r)
void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r)
{
m_v.FOG = (GSVector4i)r->FOG;
}
void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
{
/*
m_v.XYZ.X = r->XYZF.X;
@ -703,18 +715,18 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
VertexKick(true);
}
void GSState::GIFRegHandlerXYZ3(const GIFReg* r)
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(true);
}
void GSState::GIFRegHandlerNOP(const GIFReg* r)
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
{
}
template<int i> void GSState::GIFRegHandlerTEX1(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
{
@ -724,7 +736,7 @@ template<int i> void GSState::GIFRegHandlerTEX1(const GIFReg* r)
m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1;
}
template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
{
// m_env.CTXT[i].TEX2 = r->TEX2; // not used
@ -735,13 +747,15 @@ template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* r)
// TFX, TCC, TH, TW, TBW, and TBP0
uint64 mask = 0xFFFFFFE003F00000ull; // TEX2 bits
GIFRegTEX0 TEX0;
TEX0.u64 = (m_env.CTXT[i].TEX0.u64 & ~mask) | (r->u64 & mask);
ApplyTEX0(i, TEX0);
}
template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
{
GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
@ -755,7 +769,7 @@ template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
m_env.CTXT[i].UpdateScissor();
}
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r)
{
if(r->PRMODECONT != m_env.PRMODECONT)
{
@ -773,7 +787,7 @@ void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
UpdateVertexKick();
}
void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
{
if(!m_env.PRMODECONT.AC)
{
@ -789,7 +803,7 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
UpdateVertexKick();
}
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r)
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r)
{
if(r->TEXCLUT != m_env.TEXCLUT)
{
@ -799,7 +813,7 @@ void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r)
m_env.TEXCLUT = (GSVector4i)r->TEXCLUT;
}
void GSState::GIFRegHandlerSCANMSK(const GIFReg* r)
void GSState::GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r)
{
if(r->SCANMSK != m_env.SCANMSK)
{
@ -809,7 +823,7 @@ void GSState::GIFRegHandlerSCANMSK(const GIFReg* r)
m_env.SCANMSK = (GSVector4i)r->SCANMSK;
}
template<int i> void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
{
@ -819,7 +833,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r)
m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1;
}
template<int i> void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
{
@ -829,7 +843,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r)
m_env.CTXT[i].MIPTBP2 = (GSVector4i)r->MIPTBP2;
}
void GSState::GIFRegHandlerTEXA(const GIFReg* r)
void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r)
{
if(r->TEXA != m_env.TEXA)
{
@ -839,7 +853,7 @@ void GSState::GIFRegHandlerTEXA(const GIFReg* r)
m_env.TEXA = (GSVector4i)r->TEXA;
}
void GSState::GIFRegHandlerFOGCOL(const GIFReg* r)
void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
{
if(r->FOGCOL != m_env.FOGCOL)
{
@ -849,12 +863,12 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* r)
m_env.FOGCOL = (GSVector4i)r->FOGCOL;
}
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r)
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
{
// TRACE(_T("TEXFLUSH\n"));
}
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
{
@ -866,7 +880,7 @@ template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
m_env.CTXT[i].UpdateScissor();
}
template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
{
ASSERT(r->ALPHA.A != 3);
ASSERT(r->ALPHA.B != 3);
@ -885,7 +899,7 @@ template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* r)
m_env.CTXT[i].ALPHA.u32[0] = ((~m_env.CTXT[i].ALPHA.u32[0] >> 1) | 0xAA) & m_env.CTXT[i].ALPHA.u32[0];
}
void GSState::GIFRegHandlerDIMX(const GIFReg* r)
void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r)
{
bool update = false;
@ -904,7 +918,7 @@ void GSState::GIFRegHandlerDIMX(const GIFReg* r)
}
}
void GSState::GIFRegHandlerDTHE(const GIFReg* r)
void GSState::GIFRegHandlerDTHE(const GIFReg* RESTRICT r)
{
if(r->DTHE != m_env.DTHE)
{
@ -914,7 +928,7 @@ void GSState::GIFRegHandlerDTHE(const GIFReg* r)
m_env.DTHE = (GSVector4i)r->DTHE;
}
void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* r)
void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r)
{
if(r->COLCLAMP != m_env.COLCLAMP)
{
@ -927,7 +941,7 @@ void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* r)
#endif
}
template<int i> void GSState::GIFRegHandlerTEST(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEST(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST)
{
@ -940,7 +954,7 @@ template<int i> void GSState::GIFRegHandlerTEST(const GIFReg* r)
#endif
}
void GSState::GIFRegHandlerPABE(const GIFReg* r)
void GSState::GIFRegHandlerPABE(const GIFReg* RESTRICT r)
{
if(r->PABE != m_env.PABE)
{
@ -950,7 +964,7 @@ void GSState::GIFRegHandlerPABE(const GIFReg* r)
m_env.PABE = (GSVector4i)r->PABE;
}
template<int i> void GSState::GIFRegHandlerFBA(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA)
{
@ -960,7 +974,7 @@ template<int i> void GSState::GIFRegHandlerFBA(const GIFReg* r)
m_env.CTXT[i].FBA = (GSVector4i)r->FBA;
}
template<int i> void GSState::GIFRegHandlerFRAME(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
{
if(PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME)
{
@ -980,7 +994,7 @@ template<int i> void GSState::GIFRegHandlerFRAME(const GIFReg* r)
#endif
}
template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
{
GIFRegZBUF ZBUF = r->ZBUF;
@ -1015,7 +1029,7 @@ template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* r)
m_env.CTXT[i].ZBUF = (GSVector4i)ZBUF;
}
void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* r)
void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r)
{
if(r->BITBLTBUF != m_env.BITBLTBUF)
{
@ -1035,7 +1049,7 @@ void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* r)
}
}
void GSState::GIFRegHandlerTRXPOS(const GIFReg* r)
void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r)
{
if(r->TRXPOS != m_env.TRXPOS)
{
@ -1045,7 +1059,7 @@ void GSState::GIFRegHandlerTRXPOS(const GIFReg* r)
m_env.TRXPOS = (GSVector4i)r->TRXPOS;
}
void GSState::GIFRegHandlerTRXREG(const GIFReg* r)
void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r)
{
if(r->TRXREG != m_env.TRXREG)
{
@ -1055,7 +1069,7 @@ void GSState::GIFRegHandlerTRXREG(const GIFReg* r)
m_env.TRXREG = (GSVector4i)r->TRXREG;
}
void GSState::GIFRegHandlerTRXDIR(const GIFReg* r)
void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r)
{
Flush();
@ -1075,17 +1089,19 @@ void GSState::GIFRegHandlerTRXDIR(const GIFReg* r)
case 3:
ASSERT(0);
break;
default:
__assume(0);
}
}
void GSState::GIFRegHandlerHWREG(const GIFReg* r)
void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r)
{
ASSERT(m_env.TRXDIR.XDIR == 0); // host => local
Write((uint8*)r, 8); // haunting ground
}
void GSState::GIFRegHandlerSIGNAL(const GIFReg* r)
void GSState::GIFRegHandlerSIGNAL(const GIFReg* RESTRICT r)
{
m_regs->SIGLBLID.SIGID = (m_regs->SIGLBLID.SIGID & ~r->SIGNAL.IDMSK) | (r->SIGNAL.ID & r->SIGNAL.IDMSK);
@ -1093,13 +1109,13 @@ void GSState::GIFRegHandlerSIGNAL(const GIFReg* r)
if(!m_regs->IMR.SIGMSK && m_irq) m_irq();
}
void GSState::GIFRegHandlerFINISH(const GIFReg* r)
void GSState::GIFRegHandlerFINISH(const GIFReg* RESTRICT r)
{
if(m_regs->CSR.wFINISH) m_regs->CSR.rFINISH = 1;
if(!m_regs->IMR.FINISHMSK && m_irq) m_irq();
}
void GSState::GIFRegHandlerLABEL(const GIFReg* r)
void GSState::GIFRegHandlerLABEL(const GIFReg* RESTRICT r)
{
m_regs->SIGLBLID.LBLID = (m_regs->SIGLBLID.LBLID & ~r->LABEL.IDMSK) | (r->LABEL.ID & r->LABEL.IDMSK);
}
@ -1119,7 +1135,16 @@ void GSState::FlushWrite()
if(len <= 0) return;
int y = m_tr.y;
GSVector4i r;
r.left = m_env.TRXPOS.DSAX;
r.top = m_env.TRXPOS.DSAY;
r.right = r.left + m_env.TRXREG.RRW;
r.bottom = r.top + m_env.TRXREG.RRH;
InvalidateVideoMem(m_env.BITBLTBUF, r);
//int y = m_tr.y;
GSLocalMemory::writeImage wi = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wi;
@ -1129,6 +1154,7 @@ void GSState::FlushWrite()
m_perfmon.Put(GSPerfMon::Swizzle, len);
/*
GSVector4i r;
r.left = m_env.TRXPOS.DSAX;
@ -1137,6 +1163,7 @@ void GSState::FlushWrite()
r.bottom = std::min<int>(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1);
InvalidateVideoMem(m_env.BITBLTBUF, r);
*/
/*
static int n = 0;
string s;
@ -1174,12 +1201,6 @@ void GSState::Write(const uint8* mem, int len)
// printf("%d >= %d\n", len, m_tr.total);
(m_mem.*psm.wi)(m_tr.x, m_tr.y, mem, m_tr.total, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
m_tr.start = m_tr.end = m_tr.total;
m_perfmon.Put(GSPerfMon::Swizzle, len);
GSVector4i r;
r.left = m_env.TRXPOS.DSAX;
@ -1188,6 +1209,12 @@ void GSState::Write(const uint8* mem, int len)
r.bottom = r.top + m_env.TRXREG.RRH;
InvalidateVideoMem(m_env.BITBLTBUF, r);
(m_mem.*psm.wi)(m_tr.x, m_tr.y, mem, m_tr.total, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
m_tr.start = m_tr.end = m_tr.total;
m_perfmon.Put(GSPerfMon::Swizzle, len);
}
else
{
@ -1456,7 +1483,7 @@ void GSState::SoftReset(uint32 mask)
void GSState::ReadFIFO(uint8* mem, int size)
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
Flush();
@ -1477,7 +1504,7 @@ template void GSState::Transfer<3>(const uint8* mem, uint32 size);
template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
{
GSPerfMonAutoTimer pmat(m_perfmon);
GSPerfMonAutoTimer pmat(&m_perfmon);
const uint8* start = mem;
@ -2546,6 +2573,9 @@ bool GSC_SimpsonsGame(const GSFrameInfo& fi, int& skip)
bool GSC_Genji(const GSFrameInfo& fi, int& skip)
{
if( !skip && fi.TME && (fi.FBP == 0x700 || fi.FBP == 0x0) && fi.TBP0 == 0x1500 && fi.TPSM )
skip=1;
if(skip == 0)
{
if(fi.TME && fi.FBP == 0x01500 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00e00 && fi.TPSM == PSM_PSMZ16)
@ -3211,9 +3241,7 @@ bool GSC_JamesBondEverythingOrNothing(const GSFrameInfo& fi, int& skip)
return true;
}
//#define USE_DYNAMIC_CRC_HACK
#ifdef USE_DYNAMIC_CRC_HACK
#ifdef ENABLE_DYNAMIC_CRC_HACK
#define DYNA_DLL_PATH "c:/dev/pcsx2/trunk/tools/dynacrchack/DynaCrcHack.dll"
@ -3443,7 +3471,7 @@ bool GSState::IsBadFrame(int& skip, int UserHacks_SkipDraw)
GetSkipCount gsc = map[m_game.title];
g_crc_region = m_game.region;
#ifdef USE_DYNAMIC_CRC_HACK
#ifdef ENABLE_DYNAMIC_CRC_HACK
bool res=false; if(IsInvokedDynamicCrcHack(fi, skip, g_crc_region, res)){ if( !res ) return false; } else
#endif
if(gsc && !gsc(fi, skip))

View File

@ -37,70 +37,72 @@
class GSState : public GSAlignedClass<32>
{
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* r);
// RESTRICT prevents multiple loads of the same part of the register when accessing its bitfields (the compiler is happy to know that memory writes in-between will not go there)
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r);
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
void GIFPackedRegHandlerNull(const GIFPackedReg* r);
void GIFPackedRegHandlerRGBA(const GIFPackedReg* r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* r);
void GIFPackedRegHandlerUV(const GIFPackedReg* r);
void GIFPackedRegHandlerXYZF2(const GIFPackedReg* r);
void GIFPackedRegHandlerXYZ2(const GIFPackedReg* r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* r);
void GIFPackedRegHandlerA_D(const GIFPackedReg* r);
void GIFPackedRegHandlerNOP(const GIFPackedReg* r);
void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
typedef void (GSState::*GIFRegHandler)(const GIFReg* r);
typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r);
GIFRegHandler m_fpGIFRegHandlers[256];
void ApplyTEX0(int i, GIFRegTEX0& TEX0);
void ApplyPRIM(const GIFRegPRIM& PRIM);
void GIFRegHandlerNull(const GIFReg* r);
void GIFRegHandlerPRIM(const GIFReg* r);
void GIFRegHandlerRGBAQ(const GIFReg* r);
void GIFRegHandlerST(const GIFReg* r);
void GIFRegHandlerUV(const GIFReg* r);
void GIFRegHandlerXYZF2(const GIFReg* r);
void GIFRegHandlerXYZ2(const GIFReg* r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* r);
template<int i> void GIFRegHandlerCLAMP(const GIFReg* r);
void GIFRegHandlerFOG(const GIFReg* r);
void GIFRegHandlerXYZF3(const GIFReg* r);
void GIFRegHandlerXYZ3(const GIFReg* r);
void GIFRegHandlerNOP(const GIFReg* r);
template<int i> void GIFRegHandlerTEX1(const GIFReg* r);
template<int i> void GIFRegHandlerTEX2(const GIFReg* r);
template<int i> void GIFRegHandlerXYOFFSET(const GIFReg* r);
void GIFRegHandlerPRMODECONT(const GIFReg* r);
void GIFRegHandlerPRMODE(const GIFReg* r);
void GIFRegHandlerTEXCLUT(const GIFReg* r);
void GIFRegHandlerSCANMSK(const GIFReg* r);
template<int i> void GIFRegHandlerMIPTBP1(const GIFReg* r);
template<int i> void GIFRegHandlerMIPTBP2(const GIFReg* r);
void GIFRegHandlerTEXA(const GIFReg* r);
void GIFRegHandlerFOGCOL(const GIFReg* r);
void GIFRegHandlerTEXFLUSH(const GIFReg* r);
template<int i> void GIFRegHandlerSCISSOR(const GIFReg* r);
template<int i> void GIFRegHandlerALPHA(const GIFReg* r);
void GIFRegHandlerDIMX(const GIFReg* r);
void GIFRegHandlerDTHE(const GIFReg* r);
void GIFRegHandlerCOLCLAMP(const GIFReg* r);
template<int i> void GIFRegHandlerTEST(const GIFReg* r);
void GIFRegHandlerPABE(const GIFReg* r);
template<int i> void GIFRegHandlerFBA(const GIFReg* r);
template<int i> void GIFRegHandlerFRAME(const GIFReg* r);
template<int i> void GIFRegHandlerZBUF(const GIFReg* r);
void GIFRegHandlerBITBLTBUF(const GIFReg* r);
void GIFRegHandlerTRXPOS(const GIFReg* r);
void GIFRegHandlerTRXREG(const GIFReg* r);
void GIFRegHandlerTRXDIR(const GIFReg* r);
void GIFRegHandlerHWREG(const GIFReg* r);
void GIFRegHandlerSIGNAL(const GIFReg* r);
void GIFRegHandlerFINISH(const GIFReg* r);
void GIFRegHandlerLABEL(const GIFReg* r);
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
void GIFRegHandlerPRIM(const GIFReg* RESTRICT r);
void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r);
void GIFRegHandlerST(const GIFReg* RESTRICT r);
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF3(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ3(const GIFReg* RESTRICT r);
void GIFRegHandlerNOP(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX1(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r);
void GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r);
void GIFRegHandlerPRMODE(const GIFReg* RESTRICT r);
void GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r);
void GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r);
void GIFRegHandlerTEXA(const GIFReg* RESTRICT r);
void GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r);
void GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerALPHA(const GIFReg* RESTRICT r);
void GIFRegHandlerDIMX(const GIFReg* RESTRICT r);
void GIFRegHandlerDTHE(const GIFReg* RESTRICT r);
void GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEST(const GIFReg* RESTRICT r);
void GIFRegHandlerPABE(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerFBA(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerFRAME(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerZBUF(const GIFReg* RESTRICT r);
void GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r);
void GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r);
void GIFRegHandlerTRXREG(const GIFReg* RESTRICT r);
void GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r);
void GIFRegHandlerHWREG(const GIFReg* RESTRICT r);
void GIFRegHandlerSIGNAL(const GIFReg* RESTRICT r);
void GIFRegHandlerFINISH(const GIFReg* RESTRICT r);
void GIFRegHandlerLABEL(const GIFReg* RESTRICT r);
int m_version;
int m_sssize;
@ -206,7 +208,7 @@ public:
virtual void FlushPrim() = 0;
virtual void ResetPrim() = 0;
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}
void Move();
void Write(const uint8* mem, int len);

View File

@ -88,7 +88,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
Target* dst = NULL;
#ifdef HW_NO_TEXTURE_CACHE
#ifdef DISABLE_HW_TEXTURE_CACHE
if( 0 )
#else
if(src == NULL)
@ -97,11 +97,14 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM;
// This should get looked at if you feel like hackfixing the texture cache.
// Checking for type < 1 (so no only RenderTarget, not DepthStencil get checked), it fixes the fog in Arc the Lad.
// Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.
for(int type = 0; type < 2 && dst == NULL; type++)
// Arc the Lad finds the wrong surface here when looking for a depth stencil.
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
// (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.)
//for(int type = 0; type < 2 && dst == NULL; type++)
for(int type = 0; type < 1 && dst == NULL; type++) // Only look for render target, no depth stencil
{
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{
@ -191,7 +194,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
if(multiplier > 1) // it's limited to a maximum of 4 on reading the config
{
#if 0 //#ifdef USE_UPSCALE_HACKS //not happy with this yet..
#if 0 //#ifdef ENABLE_UPSCALE_HACKS //not happy with this yet..
float x = 1.0f;
float y = 1.0f;
@ -289,19 +292,14 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
return dst;
}
void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect, bool target)
void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, bool target)
{
// Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
if (!o) return;
if(!o) return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
uint32 bp = o->bp;
uint32 bw = o->bw;
uint32 psm = o->psm;
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
if(!target)
{
const list<Source*>& m = m_src.m_map[bp >> 5];
@ -319,60 +317,56 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
}
}
GSVector4i r;
list<uint32>* pages = o->GetPages(rect, &r);
bool found = false;
for(int y = r.top; y < r.bottom; y += bs.y)
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
{
uint32 base = o->block.row[y >> 3];
uint32 page = *p;
for(int x = r.left; x < r.right; x += bs.x)
const list<Source*>& m = m_src.m_map[page];
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
list<Source*>::const_iterator j = i++;
if(page < MAX_PAGES)
Source* s = *j;
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{
const list<Source*>& m = m_src.m_map[page];
bool b = bp == s->m_TEX0.TBP0;
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
if(!s->m_target)
{
list<Source*>::const_iterator j = i++;
Source* s = *j;
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
if(s->m_repeating)
{
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target)
{
if(s->m_repeating)
{
list<GSVector2i>& l = s->m_p2t[page];
list<GSVector2i>& l = s->m_p2t[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
s->m_valid[k->x] &= ~k->y;
}
}
else
{
s->m_valid[page] = 0;
}
s->m_complete = false;
found = b;
}
else
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
// TODO
if(b)
{
m_src.RemoveAt(s);
}
s->m_valid[k->x] &= k->y;
}
}
else
{
s->m_valid[page] = 0;
}
s->m_complete = false;
found = b;
}
else
{
// TODO
if(b)
{
m_src.RemoveAt(s);
}
}
}
}
@ -425,7 +419,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
}
}
void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r)
void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r)
{
uint32 bp = o->bp;
uint32 psm = o->psm;

View File

@ -127,14 +127,14 @@ protected:
// TODO: virtual void Write(Source* s, const GSVector4i& r) = 0;
// TODO: virtual void Write(Target* t, const GSVector4i& r) = 0;
#ifndef HW_NO_TEXTURE_CACHE
#ifndef DISABLE_HW_TEXTURE_CACHE
virtual void Read(Target* t, const GSVector4i& r) = 0;
#endif
public:
GSTextureCache(GSRenderer* r);
virtual ~GSTextureCache();
#ifdef HW_NO_TEXTURE_CACHE
#ifdef DISABLE_HW_TEXTURE_CACHE
virtual void Read(Target* t, const GSVector4i& r) = 0;
#endif
void RemoveAll();
@ -143,8 +143,8 @@ public:
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(const GSOffset* o, const GSVector4i& r);
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(GSOffset* o, const GSVector4i& r);
void IncAge();
bool UserHacks_HalfPixelOffset;

View File

@ -32,7 +32,7 @@ GSTextureCacheSW::~GSTextureCacheSW()
RemoveAll();
}
const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, uint32 tw0)
GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
@ -70,116 +70,48 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
if(t == NULL)
{
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
t = new Texture(m_state, o, tw0, TEX0, TEXA);
t = new Texture(m_state, tw0, TEX0, TEXA);
m_textures.insert(t);
__aligned(uint32, 16) pages[16];
((GSVector4i*)pages)[0] = GSVector4i::zero();
((GSVector4i*)pages)[1] = GSVector4i::zero();
((GSVector4i*)pages)[2] = GSVector4i::zero();
((GSVector4i*)pages)[3] = GSVector4i::zero();
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
for(int y = 0; y < th; y += bs.y)
for(list<uint32>::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
{
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
pages[page >> 5] |= 1 << (page & 31);
}
}
m_map[*i].push_front(t);
}
for(int i = 0; i < countof(pages); i++)
{
uint32 p = pages[i];
if(p != 0)
{
list<Texture*>* m = &m_map[i << 5];
unsigned long j;
while(_BitScanForward(&j, p))
{
p ^= 1 << j;
m[j].push_front(t);
}
}
}
}
if(!t->Update(r))
{
printf("!@#$\n"); // memory allocation may fail if the game is too hungry (tales of legendia fight transition/scene)
RemoveAt(t);
t = NULL;
}
return t;
}
void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect)
void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm)
{
uint32 bp = o->bp;
uint32 bw = o->bw;
uint32 psm = o->psm;
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
for(list<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++)
{
uint32 base = o->block.row[y >> 3];
uint32 page = *p;
for(int x = r.left; x < r.right; x += bs.x)
const list<Texture*>& map = m_map[page];
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
Texture* t = *i;
if(page < MAX_PAGES)
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
const list<Texture*>& map = m_map[page];
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
if(t->m_repeating)
{
Texture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
if(t->m_repeating)
{
list<GSVector2i>& l = t->m_p2t[page];
list<GSVector2i>& l = t->m_p2t[page];
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
t->m_valid[j->x] &= ~j->y;
}
}
else
{
t->m_valid[page] = 0;
}
t->m_complete = false;
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
t->m_valid[j->x] &= j->y;
}
}
else
{
t->m_valid[page] = 0;
}
t->m_complete = false;
}
}
}
@ -233,9 +165,8 @@ void GSTextureCacheSW::IncAge()
//
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
: m_state(state)
, m_offset(offset)
, m_buff(NULL)
, m_tw(tw0)
, m_age(0)
@ -246,7 +177,20 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3
m_TEXA = TEXA;
memset(m_valid, 0, sizeof(m_valid));
memset(m_pages.bm, 0, sizeof(m_pages.bm));
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
uint32 page = *i;
m_pages.bm[page >> 5] |= 1 << (page & 31);
m_pages.n.push_back(page);
}
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
if(m_repeating)

View File

@ -30,7 +30,7 @@ public:
{
public:
GSState* m_state;
const GSOffset* m_offset;
GSOffset* m_offset;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
void* m_buff;
@ -39,13 +39,14 @@ public:
bool m_complete;
bool m_repeating;
list<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES];
uint32 m_valid[MAX_PAGES];
struct {uint32 bm[16]; list<uint32> n;} m_pages;
// m_valid
// fast mode: each uint32 bits map to the 32 blocks of that page
// repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8))
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
virtual ~Texture();
bool Update(const GSVector4i& r);
@ -61,9 +62,9 @@ public:
GSTextureCacheSW(GSState* state);
virtual ~GSTextureCacheSW();
const Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, uint32 tw0 = 0);
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r);
void InvalidatePages(const list<uint32>* pages, uint32 psm);
void RemoveAll();
void RemoveAt(Texture* t);

View File

@ -22,6 +22,44 @@
#include "stdafx.h"
#include "GSThread.h"
#ifdef _WINDOWS
InitializeConditionVariablePtr pInitializeConditionVariable;
WakeConditionVariablePtr pWakeConditionVariable;
WakeAllConditionVariablePtr pWakeAllConditionVariable;
SleepConditionVariableSRWPtr pSleepConditionVariableSRW;
InitializeSRWLockPtr pInitializeSRWLock;;
AcquireSRWLockExclusivePtr pAcquireSRWLockExclusive;
ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
class InitCondVar
{
HMODULE m_kernel32;
public:
InitCondVar()
{
m_kernel32 = LoadLibrary("kernel32.dll"); // should not call LoadLibrary from DllMain, but kernel32.dll is the only one guaranteed to be loaded already
pInitializeConditionVariable = (InitializeConditionVariablePtr)GetProcAddress(m_kernel32, "InitializeConditionVariable");
pWakeConditionVariable = (WakeConditionVariablePtr)GetProcAddress(m_kernel32, "WakeConditionVariable");
pWakeAllConditionVariable = (WakeAllConditionVariablePtr)GetProcAddress(m_kernel32, "WakeAllConditionVariable");
pSleepConditionVariableSRW = (SleepConditionVariableSRWPtr)GetProcAddress(m_kernel32, "SleepConditionVariableSRW");
pInitializeSRWLock = (InitializeSRWLockPtr)GetProcAddress(m_kernel32, "InitializeSRWLock");
pAcquireSRWLockExclusive = (AcquireSRWLockExclusivePtr)GetProcAddress(m_kernel32, "AcquireSRWLockExclusive");
pReleaseSRWLockExclusive = (ReleaseSRWLockExclusivePtr)GetProcAddress(m_kernel32, "ReleaseSRWLockExclusive");
}
virtual ~InitCondVar()
{
FreeLibrary(m_kernel32);
}
};
static InitCondVar s_icv;
#endif
GSThread::GSThread()
{
#ifdef _WINDOWS

View File

@ -23,6 +23,22 @@
#ifdef _WINDOWS
typedef void (WINAPI * InitializeConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
typedef void (WINAPI * WakeConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
typedef void (WINAPI * WakeAllConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
typedef void (WINAPI * SleepConditionVariableSRWPtr)(CONDITION_VARIABLE* ConditionVariable, SRWLOCK* SRWLock, DWORD dwMilliseconds, ULONG Flags);
typedef void (WINAPI * InitializeSRWLockPtr)(SRWLOCK* SRWLock);
typedef void (WINAPI * AcquireSRWLockExclusivePtr)(SRWLOCK* SRWLock);
typedef void (WINAPI * ReleaseSRWLockExclusivePtr)(SRWLOCK* SRWLock);
extern InitializeConditionVariablePtr pInitializeConditionVariable;
extern WakeConditionVariablePtr pWakeConditionVariable;
extern WakeAllConditionVariablePtr pWakeAllConditionVariable;
extern SleepConditionVariableSRWPtr pSleepConditionVariableSRW;
extern InitializeSRWLockPtr pInitializeSRWLock;;
extern AcquireSRWLockExclusivePtr pAcquireSRWLockExclusive;
extern ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
class GSThread
{
DWORD m_ThreadId;
@ -54,37 +70,51 @@ public:
void Unlock() {LeaveCriticalSection(&m_cs);}
};
class GSAutoResetEvent
class GSEvent
{
protected:
HANDLE m_hEvent;
public:
GSAutoResetEvent() {m_hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);}
~GSAutoResetEvent() {CloseHandle(m_hEvent);}
GSEvent(bool manual = false, bool initial = false) {m_hEvent = CreateEvent(NULL, manual, initial, NULL);}
~GSEvent() {CloseHandle(m_hEvent);}
void Set() {SetEvent(m_hEvent);}
void Reset() {ResetEvent(m_hEvent);}
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
};
/*
class GSAutoResetEvent
class GSCondVarLock
{
protected:
long m_sync;
SRWLOCK m_lock;
public:
GSAutoResetEvent() {m_sync = 0;}
~GSAutoResetEvent() {}
GSCondVarLock() {pInitializeSRWLock(&m_lock);}
void Set() {_interlockedbittestandset(&m_sync, 0);}
bool Wait() {while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause(); return true;}
void Lock() {pAcquireSRWLockExclusive(&m_lock);}
void Unlock() {pReleaseSRWLockExclusive(&m_lock);}
operator SRWLOCK* () {return &m_lock;}
};
class GSCondVar
{
CONDITION_VARIABLE m_cv;
public:
GSCondVar() {pInitializeConditionVariable(&m_cv);}
void Set() {pWakeConditionVariable(&m_cv);}
void Wait(GSCondVarLock& lock) {pSleepConditionVariableSRW(&m_cv, lock, INFINITE, 0);}
operator CONDITION_VARIABLE* () {return &m_cv;}
};
*/
#else
#include <pthread.h>
#include <semaphore.h>
#include "GSdx.h"
class GSThread
{
@ -113,7 +143,7 @@ public:
GSCritSec()
{
pthread_mutexattr_init(&m_mutex_attr);
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_RECURSIVE);
pthread_mutex_init(&m_mutex, &m_mutex_attr);
}
@ -128,19 +158,67 @@ public:
void Unlock() {pthread_mutex_unlock(&m_mutex);}
};
class GSAutoResetEvent
class GSEvent
{
protected:
sem_t m_sem;
public:
GSAutoResetEvent() {sem_init(&m_sem, 0, 0);}
~GSAutoResetEvent() {sem_destroy(&m_sem);}
GSEvent() {sem_init(&m_sem, 0, 0);}
~GSEvent() {sem_destroy(&m_sem);}
void Set() {sem_post(&m_sem);}
bool Wait() {return sem_wait(&m_sem) == 0;}
};
// Note except the mutex attribute the code is same as GSCritSec object
class GSCondVarLock
{
pthread_mutexattr_t m_mutex_attr;
pthread_mutex_t m_mutex;
public:
GSCondVarLock()
{
pthread_mutexattr_init(&m_mutex_attr);
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_NORMAL);
pthread_mutex_init(&m_mutex, &m_mutex_attr);
}
virtual ~GSCondVarLock()
{
pthread_mutex_destroy(&m_mutex);
pthread_mutexattr_destroy(&m_mutex_attr);
}
void Lock() {pthread_mutex_lock(&m_mutex);}
void Unlock() {pthread_mutex_unlock(&m_mutex);}
operator pthread_mutex_t* () {return &m_mutex;}
};
class GSCondVar
{
pthread_cond_t m_cv;
pthread_condattr_t m_cv_attr;
public:
GSCondVar()
{
pthread_condattr_init(&m_cv_attr);
pthread_cond_init(&m_cv, &m_cv_attr);
}
virtual ~GSCondVar()
{
pthread_condattr_destroy(&m_cv_attr);
pthread_cond_destroy(&m_cv);
}
void Set() {pthread_cond_signal(&m_cv);}
void Wait(GSCondVarLock& lock) {pthread_cond_wait(&m_cv, lock);}
operator pthread_cond_t* () {return &m_cv;}
};
#endif
class GSAutoLock
@ -152,3 +230,196 @@ public:
GSAutoLock(GSCritSec* cs) {m_cs = cs; m_cs->Lock();}
~GSAutoLock() {m_cs->Unlock();}
};
class GSEventSpin
{
protected:
volatile long m_sync;
volatile bool m_manual;
public:
GSEventSpin(bool manual = false, bool initial = false) {m_sync = initial ? 1 : 0; m_manual = manual;}
~GSEventSpin() {}
void Set() {_interlockedbittestandset(&m_sync, 0);}
void Reset() {_interlockedbittestandreset(&m_sync, 0);}
bool Wait()
{
if(m_manual) while(!m_sync) _mm_pause();
else while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause();
return true;
}
};
template<class T> class GSJobQueue : private GSThread
{
protected:
int m_count;
queue<T> m_queue;
volatile bool m_exit;
struct {GSCritSec lock; GSEvent notempty; volatile long count;} m_ev;
struct {GSCondVar notempty, empty; GSCondVarLock lock; bool available;} m_cv;
void ThreadProc()
{
if(m_cv.available)
{
m_cv.lock.Lock();
while(true)
{
while(m_queue.empty())
{
m_cv.notempty.Wait(m_cv.lock);
if(m_exit) {m_cv.lock.Unlock(); return;}
}
{
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();
m_cv.lock.Unlock();
Process(item);
m_cv.lock.Lock();
m_queue.pop();
}
if(m_queue.empty())
{
m_cv.empty.Set();
}
}
}
else
{
m_ev.lock.Lock();
while(true)
{
while(m_queue.empty())
{
m_ev.lock.Unlock();
m_ev.notempty.Wait();
if(m_exit) {return;}
m_ev.lock.Lock();
}
{
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();
m_ev.lock.Unlock();
Process(item);
m_ev.lock.Lock();
m_queue.pop();
}
_InterlockedDecrement(&m_ev.count);
}
}
}
public:
GSJobQueue()
: m_count(0)
, m_exit(false)
{
m_ev.count = 0;
#ifdef _WINDOWS
m_cv.available = pInitializeConditionVariable != NULL;
#elif defined(_LINUX)
//m_cv.available = true;
m_cv.available = !!theApp.GetConfig("condvar", 1);
#endif
CreateThread();
}
virtual ~GSJobQueue()
{
m_exit = true;
if(m_cv.available)
{
m_cv.notempty.Set();
}
else
{
m_ev.notempty.Set();
}
}
int GetCount() const
{
return m_count;
}
virtual void Push(const T& item)
{
if(m_cv.available)
{
m_cv.lock.Lock();
m_queue.push(item);
m_cv.lock.Unlock();
m_cv.notempty.Set();
}
else
{
GSAutoLock l(&m_ev.lock);
m_queue.push(item);
_InterlockedIncrement(&m_ev.count);
m_ev.notempty.Set();
}
m_count++;
}
virtual void Wait()
{
if(m_cv.available)
{
m_cv.lock.Lock();
while(!m_queue.empty())
{
m_cv.empty.Wait(m_cv.lock);
}
m_cv.lock.Unlock();
}
else
{
// NOTE: it is the safest to have our own counter because m_queue.pop() might decrement its own before the last item runs out of its scope and gets destroyed (implementation dependent)
while(m_ev.count > 0) _mm_pause();
}
m_count++;
}
virtual void Process(T& item) = 0;
};

View File

@ -118,6 +118,8 @@
<Unit filename="GSRasterizer.h" />
<Unit filename="GSRenderer.cpp" />
<Unit filename="GSRenderer.h" />
<Unit filename="GSRendererHW.cpp" />
<Unit filename="GSRendererHW.h" />
<Unit filename="GSRendererNull.cpp" />
<Unit filename="GSRendererNull.h" />
<Unit filename="GSRendererSW.cpp" />

View File

@ -13,13 +13,11 @@
#undef APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
// English (U.S.) resources
// English (United States) resources
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
#ifdef _WIN32
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
#pragma code_page(1252)
#endif //_WIN32
#ifdef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
@ -58,7 +56,7 @@ IDR_CONVERT_FX RCDATA "res\\convert.fx"
IDR_TFX_FX RCDATA "res\\tfx.fx"
IDR_MERGE_FX RCDATA "res\\merge.fx"
IDR_INTERLACE_FX RCDATA "res\\interlace.fx"
IDR_FXAA_FX RCDATA "res\\fxaa.fx"
IDR_FXAA_FX RCDATA "res\\fxaa.fx"
/////////////////////////////////////////////////////////////////////////////
//
@ -104,9 +102,9 @@ BEGIN
CONTROL "",IDC_MSAA,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,109,261,11,14
LTEXT "HW Anti Aliasing",IDC_STATIC_TEXT_HWAA,18,261,53,8
GROUPBOX "D3D Enhancements (can cause glitches)",IDC_STATIC,7,117,175,66
LTEXT "SW rend. threads:",IDC_STATIC,7,189,60,8
EDITTEXT IDC_SWTHREADS_EDIT,71,187,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,190,11,14
LTEXT "Extra rendering threads:",IDC_STATIC,7,189,80,8
EDITTEXT IDC_SWTHREADS_EDIT,89,187,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,117,190,11,14
CONTROL "Texture filtering",IDC_FILTER,"Button",BS_AUTO3STATE | WS_TABSTOP,7,203,67,10
CONTROL "Logarithmic Z",IDC_LOGZ,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,204,58,10
CONTROL "Allow 8-bit textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,217,82,10
@ -144,23 +142,23 @@ FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
CONTROL 2021,IDC_LOGO9,"Static",SS_BITMAP,7,7,175,44
LTEXT "Resolution:",IDC_STATIC,7,59,37,8
COMBOBOX IDC_RESOLUTION,78,57,104,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
COMBOBOX IDC_RESOLUTION,80,57,102,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Renderer:",IDC_STATIC,7,74,34,8
COMBOBOX IDC_RENDERER,78,72,104,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
COMBOBOX IDC_RENDERER,80,72,102,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Texture Filter (Del):",IDC_STATIC,7,90,64,8
COMBOBOX IDC_FILTER,78,87,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
COMBOBOX IDC_FILTER,80,87,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Dithering (End):",IDC_STATIC,7,105,52,8
COMBOBOX IDC_DITHERING,78,102,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
COMBOBOX IDC_DITHERING,80,102,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Aspect Ratio (PgDn):",IDC_STATIC,7,120,68,8
COMBOBOX IDC_ASPECTRATIO,78,117,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Rendering Threads:",IDC_STATIC,7,157,64,8
EDITTEXT IDC_SWTHREADS_EDIT,78,155,35,13,ES_AUTOHSCROLL | ES_NUMBER
COMBOBOX IDC_ASPECTRATIO,80,117,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Extra Rend. Threads:",IDC_STATIC,7,157,70,8
EDITTEXT IDC_SWTHREADS_EDIT,80,155,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,161,11,14
DEFPUSHBUTTON "OK",IDOK,43,178,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,178,50,14
CONTROL 2022,IDC_LOGO11,"Static",SS_BITMAP,7,7,173,42
LTEXT "Internal Resolution:",IDC_STATIC,7,135,64,8
COMBOBOX IDC_SCALE,78,132,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
COMBOBOX IDC_SCALE,80,132,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
CONTROL "Windowed",IDC_WINDOWED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,129,157,49,10
END
@ -181,9 +179,9 @@ BEGIN
EDITTEXT IDC_RESY_EDIT,130,132,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_RESY,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,154,130,11,14
CONTROL "Native",IDC_NATIVERES,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,105,33,10
LTEXT "Rendering threads:",IDC_STATIC,19,214,63,8
EDITTEXT IDC_SWTHREADS_EDIT,87,212,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,115,215,11,14
LTEXT "Extra rendering threads:",IDC_STATIC,19,214,80,8
EDITTEXT IDC_SWTHREADS_EDIT,102,212,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,135,215,11,14
COMBOBOX IDC_UPSCALE_MULTIPLIER,92,117,74,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Or use Scaling:",IDC_STATIC,38,120,49,8
LTEXT "Original PS2 resolution :",IDC_STATIC,10,105,80,8
@ -215,7 +213,7 @@ END
//
#ifdef APSTUDIO_INVOKED
GUIDELINES DESIGNINFO
GUIDELINES DESIGNINFO
BEGIN
IDD_CONFIG, DIALOG
BEGIN
@ -241,7 +239,7 @@ BEGIN
BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 182
VERTGUIDE, 78
VERTGUIDE, 80
VERTGUIDE, 182
TOPMARGIN, 7
BOTTOMMARGIN, 192
@ -298,7 +296,7 @@ BEGIN
END
END
#endif // English (U.S.) resources
#endif // English (United States) resources
/////////////////////////////////////////////////////////////////////////////
@ -312,7 +310,6 @@ END
#include "res/convert.fx"
#include "res/interlace.fx"
#include "res/merge.fx"
#include "res/fxaa.fx"
/////////////////////////////////////////////////////////////////////////////
#endif // not APSTUDIO_INVOKED

View File

@ -1618,6 +1618,7 @@
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="config.h" />
<ClInclude Include="GPU.h" />
<ClInclude Include="GPUDrawingEnvironment.h" />
<ClInclude Include="GPUDrawScanline.h" />

View File

@ -644,6 +644,9 @@
<ClInclude Include="GSDeviceSDL.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="config.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="res\logo10.bmp">

23
plugins/GSdx/config.h Normal file
View File

@ -0,0 +1,23 @@
#pragma once
//#define ENABLE_VTUNE
#define ENABLE_JIT_RASTERIZER
//#define ENABLE_DYNAMIC_CRC_HACK
#define ENABLE_UPSCALE_HACKS // Hacks intended to fix upscaling / rendering glitches in HW renderers
//#define DISABLE_HW_TEXTURE_CACHE // Slow but fixes a lot of bugs
//#define DISABLE_CRC_HACKS // Disable all game specific hacks
#if defined(DISABLE_HW_TEXTURE_CACHE) && !defined(DISABLE_CRC_HACKS)
#define DISABLE_CRC_HACKS
#endif
//#define DISABLE_BITMASKING
//#define DISABLE_COLCLAMP
//#define DISABLE_DATE

View File

@ -80,7 +80,7 @@
#define IDR_MERGE_FX 10002
#define IDR_INTERLACE_FX 10003
#define IDD_CONFIG2 10004
#define IDR_FXAA_FX 10005
#define IDR_FXAA_FX 10005
// Next default values for new objects
//

View File

@ -25,6 +25,8 @@
#pragma once
#include "config.h"
#ifdef _WINDOWS
// The following macros define the minimum required platform. The minimum required platform
@ -69,6 +71,17 @@
#endif
// put these into vc9/common7/ide/usertype.dat to have them highlighted
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned long long uint64;
typedef signed long long int64;
// stdc
#include <stddef.h>
@ -85,40 +98,106 @@
#include <list>
#include <map>
#include <set>
#include <queue>
#include <algorithm>
using namespace std;
#if defined(_MSC_VER) && _MSC_VER >= 1500 && _MSC_VER < 1600
#include <memory>
using namespace std::tr1;
#endif
#ifdef __GNUC__
#include <memory>
#endif
#ifdef _WINDOWS
#include <hash_map>
#include <hash_set>
#include <hash_map>
#include <hash_set>
using namespace stdext;
using namespace stdext;
#define vsnprintf _vsnprintf
#define snprintf _snprintf
// hashing algoritms at: http://www.cris.com/~Ttwang/tech/inthash.htm
// default hash_compare does ldiv and other crazy stuff to reduce speed
#define DIRECTORY_SEPARATOR '\\'
template<> class hash_compare<uint32>
{
public:
enum {bucket_size = 1};
size_t operator()(uint32 key) const
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return (size_t)key;
}
bool operator()(uint32 a, uint32 b) const
{
return a < b;
}
};
template<> class hash_compare<uint64>
{
public:
enum {bucket_size = 1};
size_t operator()(uint64 key) const
{
key += ~(key << 32);
key ^= (key >> 22);
key += ~(key << 13);
key ^= (key >> 8);
key += (key << 3);
key ^= (key >> 15);
key += ~(key << 27);
key ^= (key >> 31);
return (size_t)key;
}
bool operator()(uint64 a, uint64 b) const
{
return a < b;
}
};
#define vsnprintf _vsnprintf
#define snprintf _snprintf
#define DIRECTORY_SEPARATOR '\\'
#else
#define _BACKWARD_BACKWARD_WARNING_H
#define _BACKWARD_BACKWARD_WARNING_H
#define hash_map map
#define hash_set set
#define hash_map map
#define hash_set set
//#include <ext/hash_map>
//#include <ext/hash_set>
//#include <ext/hash_map>
//#include <ext/hash_set>
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glx.h>
#include <GL/glext.h>
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glx.h>
#include <GL/glext.h>
//using namespace __gnu_cxx;
//using namespace __gnu_cxx;
#define DIRECTORY_SEPARATOR '/'
#define DIRECTORY_SEPARATOR '/'
#endif
@ -157,19 +236,6 @@ struct aligned_free_object {template<class T> void operator()(T& p) {_aligned_fr
struct aligned_free_first {template<class T> void operator()(T& p) {_aligned_free(p.first);}};
struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_free(p.second);}};
// syntactic sugar
// put these into vc9/common7/ide/usertype.dat to have them highlighted
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned long long uint64;
typedef signed long long int64;
#define countof(a) (sizeof(a) / sizeof(a[0]))
#define ALIGN_STACK(n) __aligned(int, n) __dummy;
@ -281,61 +347,100 @@ typedef signed long long int64;
#if !defined(_MSC_VER)
#if !defined(HAVE_ALIGNED_MALLOC)
#if !defined(HAVE_ALIGNED_MALLOC)
extern void* _aligned_malloc(size_t size, size_t alignment);
extern void _aligned_free(void* p);
extern void* _aligned_malloc(size_t size, size_t alignment);
extern void _aligned_free(void* p);
#endif
#endif
// http://svn.reactos.org/svn/reactos/trunk/reactos/include/crt/mingw32/intrin_x86.h?view=markup
// - the other intrin_x86.h of pcsx2 is not up to date, its _interlockedbittestandreset simply does not work.
// http://svn.reactos.org/svn/reactos/trunk/reactos/include/crt/mingw32/intrin_x86.h?view=markup
// - the other intrin_x86.h of pcsx2 is not up to date, its _interlockedbittestandreset simply does not work.
__forceinline unsigned char _BitScanForward(unsigned long* const Index, const unsigned long Mask)
{
__asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
return Mask ? 1 : 0;
}
__forceinline unsigned char _BitScanForward(unsigned long* const Index, const unsigned long Mask)
{
__asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
return Mask ? 1 : 0;
}
__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
#ifdef __GNUC__
__forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value)
{
long retval = Value;
__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
{
long retval = Value;
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline unsigned long long __rdtsc()
{
#if defined(__amd64__) || defined(__x86_64__)
unsigned long long low, high;
__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
return low | (high << 32);
#else
unsigned long long retval;
__asm__ __volatile__("rdtsc" : "=A"(retval));
return retval;
#endif
}
__forceinline long _InterlockedDecrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, -1) - 1;
}
__forceinline long _InterlockedIncrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, 1) + 1;
}
__forceinline short _InterlockedDecrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
}
__forceinline short _InterlockedIncrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
}
#endif
#ifdef __GNUC__
__forceinline unsigned long long __rdtsc()
{
#if defined(__amd64__) || defined(__x86_64__)
unsigned long long low, high;
__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
return low | (high << 32);
#else
unsigned long long retval;
__asm__ __volatile__("rdtsc" : "=A"(retval));
return retval;
#endif
}
#endif
#endif
extern void* vmalloc(size_t size, bool code);
extern void vmfree(void* ptr, size_t size);
#define USE_UPSCALE_HACKS // Hacks intended to fix upscaling / rendering glitches in HW renderers
//#define HW_NO_TEXTURE_CACHE // Slow but fixes a lot of bugs
//#define NO_CRC_HACKS // Disable all game specific hacks
#ifdef HW_NO_TEXTURE_CACHE
#define NO_CRC_HACKS
#ifdef _WINDOWS
#ifdef ENABLE_VTUNE
#include <JITProfiling.h>
#pragma comment(lib, "jitprofiling.lib")
#endif
#endif

View File

@ -14,7 +14,7 @@
<WarningLevel>Level4</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<DisableSpecificWarnings>4996;4995;4324;4100;4101;4201;4556;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<AdditionalIncludeDirectories>$(DXSDK_DIR)include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(DXSDK_DIR)include;$(VTUNE_AMPLIFIER_XE_2011_DIR)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<Link>
@ -23,6 +23,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Windows</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<AdditionalLibraryDirectories>$(VTUNE_AMPLIFIER_XE_2011_DIR)lib32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>.\postBuild.cmd "$(TargetPath)" "$(TargetName)" $(TargetExt) $(PcsxSubsection)</Command>

View File

@ -1,26 +0,0 @@
#!/bin/sh
curdir=`pwd`
buildplugin() {
if [ -d ${curdir}/$1 ]
then
cd ${curdir}/$1
sh build.sh $2
if [ $? -ne 0 ]
then
exit 1
fi
fi
}
buildplugin zerogs $@
buildplugin zzogl $@
buildplugin zzogl-pg $@
buildplugin zeropad $@
buildplugin PeopsSPU2 $@
buildplugin CDVDisoEFP $@
buildplugin CDVDlinuz $@