gsdx-ogl: linux only (merge from trunk 5022:5068)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5069 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2012-01-15 14:51:49 +00:00
commit 0ad5982364
124 changed files with 42486 additions and 36408 deletions

View File

@ -43,7 +43,7 @@ SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
@ -60,14 +60,14 @@ INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_TODOLIST = NO
GENERATE_TESTLIST = NO
GENERATE_BUGLIST = NO
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_DIRECTORIES = NO
SHOW_DIRECTORIES = YES
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
@ -83,15 +83,14 @@ WARN_LOGFILE =
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = doc/src \
src \
include \
test
examples
FILE_PATTERNS = *.h \
*.c \
*.cpp \
*.dox
RECURSIVE = YES
EXCLUDE =
EXCLUDE = src/hostapi/wasapi/mingw-include
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
@ -104,7 +103,7 @@ FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES

241
3rdparty/portaudio/Doxyfile.developer vendored Normal file
View File

@ -0,0 +1,241 @@
# Doxyfile 1.4.6
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = PortAudio
PROJECT_NUMBER = 2.0
OUTPUT_DIRECTORY = ./doc/
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = NO
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF = "The $name class" \
"The $name widget" \
"The $name file" \
is \
provides \
specifies \
contains \
represents \
a \
an \
the
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 8
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
BUILTIN_STL_SUPPORT = NO
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = YES
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS = INTERNAL
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_DIRECTORIES = YES
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = doc/src \
include \
examples \
src \
test \
qa
FILE_PATTERNS = *.h \
*.c \
*.cpp \
*.dox
RECURSIVE = YES
EXCLUDE = src/hostapi/wasapi/mingw-include
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH = doc/src/images
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = NO
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = NO
HIDE_UNDOC_RELATIONS = NO
HAVE_DOT = NO
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = YES
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
DOT_PATH =
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 1000
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

View File

@ -18,7 +18,7 @@ libdir = @libdir@
includedir = @includedir@
CC = @CC@
CXX = @CXX@
CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src/common -I$(top_srcdir)/src/os/unix @CFLAGS@ @DEFS@
CFLAGS = @CFLAGS@ @DEFS@
LIBS = @LIBS@
AR = @AR@
RANLIB = @RANLIB@
@ -67,9 +67,23 @@ LOOPBACK_OBJS = \
qa/loopback/src/write_wav.o \
qa/loopback/src/paqa.o
TESTS = \
EXAMPLES = \
bin/pa_devs \
bin/pa_fuzz \
bin/paex_pink \
bin/paex_read_write_wire \
bin/paex_record \
bin/paex_saw \
bin/paex_sine \
bin/paex_write_sine \
bin/paex_write_sine_nonint
SELFTESTS = \
bin/paqa_devs \
bin/paqa_errs \
bin/paqa_latency
TESTS = \
bin/patest1 \
bin/patest_buffer \
bin/patest_callbackstop \
@ -85,15 +99,9 @@ TESTS = \
bin/patest_mono \
bin/patest_multi_sine \
bin/patest_out_underflow \
bin/patest_pink \
bin/patest_prime \
bin/patest_read_record \
bin/patest_read_write_wire \
bin/patest_record \
bin/patest_ringmix \
bin/patest_saw \
bin/patest_sine8 \
bin/patest_sine \
bin/patest_sine_channelmaps \
bin/patest_sine_formats \
bin/patest_sine_time \
@ -105,10 +113,6 @@ TESTS = \
bin/patest_two_rates \
bin/patest_underflow \
bin/patest_wire \
bin/patest_write_sine \
bin/patest_write_sine_nonint \
bin/pa_devs \
bin/pa_fuzz \
bin/pa_minlat
# Most of these don't compile yet. Put them in TESTS, above, if
@ -151,10 +155,14 @@ SRC_DIRS = \
SUBDIRS =
@ENABLE_CXX_TRUE@SUBDIRS += bindings/cpp
all: lib/$(PALIB) all-recursive tests
all: lib/$(PALIB) all-recursive tests examples selftests
tests: bin-stamp $(TESTS)
examples: bin-stamp $(EXAMPLES)
selftests: bin-stamp $(SELFTESTS)
loopback: bin-stamp bin/paloopback
# With ASIO enabled we must link libportaudio and all test programs with CXX
@ -166,6 +174,14 @@ $(ALL_TESTS): bin/%: lib/$(PALIB) $(MAKEFILE) $(PAINC) test/%.c
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/test/$*.c lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/test/$*.c lib/$(PALIB) $(LIBS)
$(EXAMPLES): bin/%: lib/$(PALIB) $(MAKEFILE) $(PAINC) examples/%.c
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/examples/$*.c lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/examples/$*.c lib/$(PALIB) $(LIBS)
$(SELFTESTS): bin/%: lib/$(PALIB) $(MAKEFILE) $(PAINC) qa/%.c
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/qa/$*.c lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/qa/$*.c lib/$(PALIB) $(LIBS)
bin/paloopback: lib/$(PALIB) $(MAKEFILE) $(PAINC) $(LOOPBACK_OBJS)
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(LOOPBACK_OBJS) lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(LOOPBACK_OBJS) lib/$(PALIB) $(LIBS)

File diff suppressed because it is too large Load Diff

View File

@ -1,78 +0,0 @@
# Project: portaudio-dll
# Makefile created by Dev-C++ 4.9.8.2
CPP = g++.exe
CC = gcc.exe
WINDRES = windres.exe
RES =
OBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LINKOBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LIBS = -L"C:/Dev-CPP/lib" -fmessage-length=0 --no-export-all-symbols --add-stdcall-alias ../../../asiosdk2/asiosdk2.a -lkernel32 -luser32 -lgdi32 -lwinspool -lcomdlg32 -ladvapi32 -lshell32 -lole32 -loleaut32 -luuid -lodbc32 -lodbccp32 -lwinmm -O3 -s
INCS = -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
CXXINCS = -I"C:/Dev-CPP/include/c++" -I"C:/Dev-CPP/include/c++/mingw32" -I"C:/Dev-CPP/include/c++/backward" -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
BIN = portaudio-dll.dll
CXXFLAGS = $(CXXINCS)-O3 -fmessage-length=0 -Wall
CFLAGS = $(INCS)-DBUILDING_DLL=1 -O3 -fmessage-length=0 -Wall
.PHONY: all all-before all-after clean clean-custom
all: all-before portaudio-dll.dll all-after
clean: clean-custom
rm -f $(OBJ) $(BIN)
DLLWRAP=dllwrap.exe
DEFFILE=libportaudio-dll.def
STATICLIB=libportaudio-dll.a
$(BIN): $(LINKOBJ)
$(DLLWRAP) --output-def $(DEFFILE) --driver-name c++ --implib $(STATICLIB) $(LINKOBJ) $(LIBS) -o $(BIN)
./pa_hostapi_skeleton.o: ../../hostapi/skeleton/pa_hostapi_skeleton.c
$(CPP) -c ../../hostapi/skeleton/pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CXXFLAGS)
./pa_stream.o: ../../common/pa_stream.c
$(CPP) -c ../../common/pa_stream.c -o ./pa_stream.o $(CXXFLAGS)
./pa_trace.o: ../../common/pa_trace.c
$(CPP) -c ../../common/pa_trace.c -o ./pa_trace.o $(CXXFLAGS)
./pa_allocation.o: ../../common/pa_allocation.c
$(CPP) -c ../../common/pa_allocation.c -o ./pa_allocation.o $(CXXFLAGS)
./pa_converters.o: ../../common/pa_converters.c
$(CPP) -c ../../common/pa_converters.c -o ./pa_converters.o $(CXXFLAGS)
./pa_cpuload.o: ../../common/pa_cpuload.c
$(CPP) -c ../../common/pa_cpuload.c -o ./pa_cpuload.o $(CXXFLAGS)
./pa_dither.o: ../../common/pa_dither.c
$(CPP) -c ../../common/pa_dither.c -o ./pa_dither.o $(CXXFLAGS)
./pa_front.o: ../../common/pa_front.c
$(CPP) -c ../../common/pa_front.c -o ./pa_front.o $(CXXFLAGS)
./pa_process.o: ../../common/pa_process.c
$(CPP) -c ../../common/pa_process.c -o ./pa_process.o $(CXXFLAGS)
./pa_asio.o: ../../pa_asio/pa_asio.cpp
$(CPP) -c ../../pa_asio/pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
./pa_win_util.o: ../pa_win_util.c
$(CPP) -c ../pa_win_util.c -o ./pa_win_util.o $(CXXFLAGS)
./pa_win_hostapis.o: ../pa_win_hostapis.c
$(CPP) -c ../pa_win_hostapis.c -o ./pa_win_hostapis.o $(CXXFLAGS)
./pa_win_ds.o: ../../pa_win_ds/pa_win_ds.c
$(CPP) -c ../../pa_win_ds/pa_win_ds.c -o ./pa_win_ds.o $(CXXFLAGS)
./dsound_wrapper.o: ../../pa_win_ds/dsound_wrapper.c
$(CPP) -c ../../pa_win_ds/dsound_wrapper.c -o ./dsound_wrapper.o $(CXXFLAGS)
./pa_win_wmme.o: ../../pa_win_wmme/pa_win_wmme.c
$(CPP) -c ../../pa_win_wmme/pa_win_wmme.c -o ./pa_win_wmme.o $(CXXFLAGS)
./iasiothiscallresolver.o: ../../pa_asio/iasiothiscallresolver.cpp
$(CPP) -c ../../pa_asio/iasiothiscallresolver.cpp -o ./iasiothiscallresolver.o $(CXXFLAGS)

View File

@ -1,75 +0,0 @@
# Project: portaudio-static
# Makefile created by Dev-C++ 4.9.8.2
CPP = g++.exe
CC = gcc.exe
WINDRES = windres.exe
RES =
OBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LINKOBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LIBS = -L"C:/Dev-CPP/lib" -fmessage-length=0 -O3 -s
INCS = -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
CXXINCS = -I"C:/Dev-CPP/include/c++" -I"C:/Dev-CPP/include/c++/mingw32" -I"C:/Dev-CPP/include/c++/backward" -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
BIN = portaudio-static.a
CXXFLAGS = $(CXXINCS)-O3 -fmessage-length=0 -Wall
CFLAGS = $(INCS)-O3 -fmessage-length=0 -Wall
.PHONY: all all-before all-after clean clean-custom
all: all-before portaudio-static.a all-after
clean: clean-custom
rm -f $(OBJ) $(BIN)
$(BIN): $(LINKOBJ)
ar r $(BIN) $(LINKOBJ)
ranlib $(BIN)
./pa_hostapi_skeleton.o: ../../hostapi/skeleton/pa_hostapi_skeleton.c
$(CPP) -c ../../hostapi/skeleton/pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CXXFLAGS)
./pa_stream.o: ../../common/pa_stream.c
$(CPP) -c ../../common/pa_stream.c -o ./pa_stream.o $(CXXFLAGS)
./pa_trace.o: ../../common/pa_trace.c
$(CPP) -c ../../common/pa_trace.c -o ./pa_trace.o $(CXXFLAGS)
./pa_allocation.o: ../../common/pa_allocation.c
$(CPP) -c ../../common/pa_allocation.c -o ./pa_allocation.o $(CXXFLAGS)
./pa_converters.o: ../../common/pa_converters.c
$(CPP) -c ../../common/pa_converters.c -o ./pa_converters.o $(CXXFLAGS)
./pa_cpuload.o: ../../common/pa_cpuload.c
$(CPP) -c ../../common/pa_cpuload.c -o ./pa_cpuload.o $(CXXFLAGS)
./pa_dither.o: ../../common/pa_dither.c
$(CPP) -c ../../common/pa_dither.c -o ./pa_dither.o $(CXXFLAGS)
./pa_front.o: ../../common/pa_front.c
$(CPP) -c ../../common/pa_front.c -o ./pa_front.o $(CXXFLAGS)
./pa_process.o: ../../common/pa_process.c
$(CPP) -c ../../common/pa_process.c -o ./pa_process.o $(CXXFLAGS)
./pa_asio.o: ../../pa_asio/pa_asio.cpp
$(CPP) -c ../../pa_asio/pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
./pa_win_util.o: ../pa_win_util.c
$(CPP) -c ../pa_win_util.c -o ./pa_win_util.o $(CXXFLAGS)
./pa_win_hostapis.o: ../pa_win_hostapis.c
$(CPP) -c ../pa_win_hostapis.c -o ./pa_win_hostapis.o $(CXXFLAGS)
./pa_win_ds.o: ../../pa_win_ds/pa_win_ds.c
$(CPP) -c ../../pa_win_ds/pa_win_ds.c -o ./pa_win_ds.o $(CXXFLAGS)
./dsound_wrapper.o: ../../pa_win_ds/dsound_wrapper.c
$(CPP) -c ../../pa_win_ds/dsound_wrapper.c -o ./dsound_wrapper.o $(CXXFLAGS)
./pa_win_wmme.o: ../../pa_win_wmme/pa_win_wmme.c
$(CPP) -c ../../pa_win_wmme/pa_win_wmme.c -o ./pa_win_wmme.o $(CXXFLAGS)
./iasiothiscallresolver.o: ../../pa_asio/iasiothiscallresolver.cpp
$(CPP) -c ../../pa_asio/iasiothiscallresolver.cpp -o ./iasiothiscallresolver.o $(CXXFLAGS)

View File

@ -1,209 +0,0 @@
[Project]
FileName=portaudio-dll.dev
Name=portaudio-dll
UnitCount=16
Type=3
Ver=1
ObjFiles=
Includes=..\..\..\asiosdk2;..\..\..\asiosdk2\common;..\..\..\asiosdk2\host;..\..\..\asiosdk2\host\pc;..\..\common
Libs=
PrivateResource=
ResourceIncludes=
MakeIncludes=
Compiler=-DBUILDING_DLL=1_@@_-O3_@@_
CppCompiler=-O3_@@_
Linker=--no-export-all-symbols --add-stdcall-alias_@@_../../../asiosdk2/asiosdk2.a_@@_-lkernel32 -luser32 -lgdi32 -lwinspool -lcomdlg32 -ladvapi32 -lshell32 -lole32 -loleaut32 -luuid -lodbc32 -lodbccp32 -lwinmm_@@_-O3 -s_@@_
IsCpp=1
Icon=
ExeOutput=.
ObjectOutput=.
OverrideOutput=0
OverrideOutputName=portaudio.a
HostApplication=
Folders=
CommandLine=
IncludeVersionInfo=0
SupportXPThemes=0
CompilerSet=0
CompilerSettings=0000000000000000000
UseCustomMakefile=0
CustomMakefile=
[Unit1]
FileName=..\..\hostapi\skeleton\pa_hostapi_skeleton.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CFLAGS)
[Unit2]
FileName=..\..\common\pa_stream.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_stream.c -o ./pa_stream.o $(CFLAGS)
[Unit3]
FileName=..\..\common\pa_trace.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_trace.c -o ./pa_trace.o $(CFLAGS)
[Unit4]
FileName=..\..\common\pa_allocation.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_allocation.c -o ./pa_allocation.o $(CFLAGS)
[Unit5]
FileName=..\..\common\pa_converters.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_converters.c -o ./pa_converters.o $(CFLAGS)
[Unit6]
FileName=..\..\common\pa_cpuload.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_cpuload.c -o ./pa_cpuload.o $(CFLAGS)
[Unit7]
FileName=..\..\common\pa_dither.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_dither.c -o ./pa_dither.o $(CFLAGS)
[Unit8]
FileName=..\..\common\pa_front.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_front.c -o ./pa_front.o $(CFLAGS)
[Unit9]
FileName=..\..\common\pa_process.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_process.c -o ./pa_process.o $(CFLAGS)
[VersionInfo]
Major=0
Minor=1
Release=1
Build=1
LanguageID=1033
CharsetID=1252
CompanyName=
FileVersion=
FileDescription=Developed using the Dev-C++ IDE
InternalName=
LegalCopyright=
LegalTrademarks=
OriginalFilename=
ProductName=
ProductVersion=
AutoIncBuildNr=0
[Unit10]
FileName=..\..\pa_asio\pa_asio.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CPP) -c pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
[Unit11]
FileName=..\pa_win_util.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_util.c -o ./pa_win_util.o $(CFLAGS)
[Unit12]
FileName=..\pa_win_hostapis.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_hostapis.c -o ./pa_win_hostapis.o $(CFLAGS)
[Unit13]
FileName=..\..\pa_win_ds\pa_win_ds.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_ds.c -o ./pa_win_ds.o $(CFLAGS)
[Unit14]
FileName=..\..\pa_win_ds\dsound_wrapper.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c dsound_wrapper.c -o ./dsound_wrapper.o $(CFLAGS)
[Unit15]
FileName=..\..\pa_win_wmme\pa_win_wmme.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_wmme.c -o ./pa_win_wmme.o $(CFLAGS)
[Unit16]
FileName=..\..\pa_asio\iasiothiscallresolver.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=

View File

@ -1,209 +0,0 @@
[Project]
FileName=portaudio-static.dev
Name=portaudio-static
UnitCount=16
Type=2
Ver=1
ObjFiles=
Includes=..\..\..\asiosdk2;..\..\..\asiosdk2\common;..\..\..\asiosdk2\host;..\..\..\asiosdk2\host\pc;..\..\common
Libs=
PrivateResource=
ResourceIncludes=
MakeIncludes=
Compiler=-O3_@@_
CppCompiler=-O3_@@_
Linker=-O3 -s_@@_
IsCpp=1
Icon=
ExeOutput=.
ObjectOutput=.
OverrideOutput=0
OverrideOutputName=portaudio.a
HostApplication=
Folders=
CommandLine=
IncludeVersionInfo=0
SupportXPThemes=0
CompilerSet=0
CompilerSettings=0000000000000000000
UseCustomMakefile=0
CustomMakefile=
[Unit1]
FileName=..\..\hostapi\skeleton\pa_hostapi_skeleton.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CFLAGS)
[Unit2]
FileName=..\..\common\pa_stream.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_stream.c -o ./pa_stream.o $(CFLAGS)
[Unit3]
FileName=..\..\common\pa_trace.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_trace.c -o ./pa_trace.o $(CFLAGS)
[Unit4]
FileName=..\..\common\pa_allocation.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_allocation.c -o ./pa_allocation.o $(CFLAGS)
[Unit5]
FileName=..\..\common\pa_converters.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_converters.c -o ./pa_converters.o $(CFLAGS)
[Unit6]
FileName=..\..\common\pa_cpuload.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_cpuload.c -o ./pa_cpuload.o $(CFLAGS)
[Unit7]
FileName=..\..\common\pa_dither.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_dither.c -o ./pa_dither.o $(CFLAGS)
[Unit8]
FileName=..\..\common\pa_front.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_front.c -o ./pa_front.o $(CFLAGS)
[Unit9]
FileName=..\..\common\pa_process.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_process.c -o ./pa_process.o $(CFLAGS)
[VersionInfo]
Major=0
Minor=1
Release=1
Build=1
LanguageID=1033
CharsetID=1252
CompanyName=
FileVersion=
FileDescription=Developed using the Dev-C++ IDE
InternalName=
LegalCopyright=
LegalTrademarks=
OriginalFilename=
ProductName=
ProductVersion=
AutoIncBuildNr=0
[Unit10]
FileName=..\..\pa_asio\pa_asio.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CPP) -c pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
[Unit11]
FileName=..\..\pa_win\pa_win_util.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_util.c -o ./pa_win_util.o $(CFLAGS)
[Unit12]
FileName=..\..\pa_win\pa_win_hostapis.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_hostapis.c -o ./pa_win_hostapis.o $(CFLAGS)
[Unit13]
FileName=..\..\pa_win_ds\pa_win_ds.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_ds.c -o ./pa_win_ds.o $(CFLAGS)
[Unit14]
FileName=..\..\pa_win_ds\dsound_wrapper.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c dsound_wrapper.c -o ./dsound_wrapper.o $(CFLAGS)
[Unit15]
FileName=..\..\pa_win_wmme\pa_win_wmme.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_wmme.c -o ./pa_win_wmme.o $(CFLAGS)
[Unit16]
FileName=..\..\pa_asio\iasiothiscallresolver.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=

View File

@ -1,23 +0,0 @@
From: "Peter L Jones"
Sent: Wednesday, September 17, 2003 5:18 AM
Subject: Dev-C++ project files
I attach two project files intended for portaudio/pa_win/dev-cpp (i.e. in
parallel with the msvc directory), if you want them. One is for a static
library build and one for a DLL. I've used the static library (in building
a single monolithic DLL) but I can't guarantee the DLL version will build a
working library (I think it's mostly there, though!).
I also attach the resulting makefiles, which may be of use to other MinGW
users.
They're rooted in the directory given above and drop their object and
library files in the same place. They assume the asiosdk2 files are in the
same directory as portaudio/ in a sub-directory called asiosdk2/. Oh! The
DLL is built against a static asiosdk2.a library... maybe not the best way
to do it... I ought to figure out how to link against a "home made" dll in
Dev-C++, I guess ;-)
Cheers,
-- Peter

View File

@ -1,10 +1,10 @@
#! /bin/sh
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
# Free Software Foundation, Inc.
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# 2011 Free Software Foundation, Inc.
timestamp='2009-12-30'
timestamp='2011-05-11'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@ -57,7 +57,7 @@ GNU config.guess ($timestamp)
Originally written by Per Bothner.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
@ -270,7 +270,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
exit ;;
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
exit $exitcode ;;
Alpha\ *:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem?
# Should we change UNAME_MACHINE based on the output of uname instead
@ -552,7 +555,7 @@ EOF
echo rs6000-ibm-aix3.2
fi
exit ;;
*:AIX:*:[456])
*:AIX:*:[4567])
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000
@ -879,7 +882,13 @@ EOF
then
echo ${UNAME_MACHINE}-unknown-linux-gnu
else
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_PCS_VFP
then
echo ${UNAME_MACHINE}-unknown-linux-gnueabi
else
echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
fi
fi
exit ;;
avr32*:Linux:*:*)
@ -968,6 +977,9 @@ EOF
sparc:Linux:*:* | sparc64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
tile*:Linux:*:*)
echo ${UNAME_MACHINE}-tilera-linux-gnu
exit ;;
vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-gnu
exit ;;
@ -1231,6 +1243,9 @@ EOF
*:QNX:*:4*)
echo i386-pc-qnx
exit ;;
NEO-?:NONSTOP_KERNEL:*:*)
echo neo-tandem-nsk${UNAME_RELEASE}
exit ;;
NSE-?:NONSTOP_KERNEL:*:*)
echo nse-tandem-nsk${UNAME_RELEASE}
exit ;;

View File

@ -1,10 +1,10 @@
#! /bin/sh
# Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
# Free Software Foundation, Inc.
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# 2011 Free Software Foundation, Inc.
timestamp='2010-01-22'
timestamp='2011-03-23'
# This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software
@ -76,7 +76,7 @@ version="\
GNU config.sub ($timestamp)
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
@ -124,8 +124,9 @@ esac
# Here we must recognize all the valid KERNEL-OS combinations.
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in
nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
knetbsd*-gnu* | netbsd*-gnu* | \
kopensolaris*-gnu* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
@ -282,11 +283,13 @@ case $basic_machine in
| moxie \
| mt \
| msp430 \
| nds32 | nds32le | nds32be \
| nios | nios2 \
| ns16k | ns32k \
| open8 \
| or32 \
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
| powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \
| rx \
| score \
@ -294,15 +297,24 @@ case $basic_machine in
| sh64 | sh64le \
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
| spu | strongarm \
| tahoe | thumb | tic4x | tic80 | tron \
| spu \
| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
| ubicom32 \
| v850 | v850e \
| we32k \
| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
| x86 | xc16x | xstormy16 | xtensa \
| z8k | z80)
basic_machine=$basic_machine-unknown
;;
c54x)
basic_machine=tic54x-unknown
;;
c55x)
basic_machine=tic55x-unknown
;;
c6x)
basic_machine=tic6x-unknown
;;
m6811 | m68hc11 | m6812 | m68hc12 | picochip)
# Motorola 68HC11/12.
basic_machine=$basic_machine-unknown
@ -314,6 +326,18 @@ case $basic_machine in
basic_machine=mt-unknown
;;
strongarm | thumb | xscale)
basic_machine=arm-unknown
;;
xscaleeb)
basic_machine=armeb-unknown
;;
xscaleel)
basic_machine=armel-unknown
;;
# We use `pc' rather than `unknown'
# because (1) that's what they normally are, and
# (2) the word "unknown" tends to confuse beginning users.
@ -334,7 +358,7 @@ case $basic_machine in
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \
| bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* \
| clipper-* | craynv-* | cydra-* \
| d10v-* | d30v-* | dlx-* \
| elxsi-* \
@ -368,26 +392,28 @@ case $basic_machine in
| mmix-* \
| mt-* \
| msp430-* \
| nds32-* | nds32le-* | nds32be-* \
| nios-* | nios2-* \
| none-* | np1-* | ns16k-* | ns32k-* \
| open8-* \
| orion-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
| pyramid-* \
| romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
| sparclite-* \
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
| tahoe-* | thumb-* \
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
| tahoe-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
| tile-* | tilegx-* \
| tron-* \
| ubicom32-* \
| v850-* | v850e-* | vax-* \
| we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
| x86-* | x86_64-* | xc16x-* | xps100-* \
| xstormy16-* | xtensa*-* \
| ymp-* \
| z8k-* | z80-*)
@ -482,6 +508,15 @@ case $basic_machine in
basic_machine=powerpc-ibm
os=-cnk
;;
c54x-*)
basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c55x-*)
basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c6x-*)
basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c90)
basic_machine=c90-cray
os=-unicos
@ -518,7 +553,7 @@ case $basic_machine in
basic_machine=craynv-cray
os=-unicosmp
;;
cr16)
cr16 | cr16-*)
basic_machine=cr16-unknown
os=-elf
;;
@ -841,6 +876,12 @@ case $basic_machine in
np1)
basic_machine=np1-gould
;;
neo-tandem)
basic_machine=neo-tandem
;;
nse-tandem)
basic_machine=nse-tandem
;;
nsr-tandem)
basic_machine=nsr-tandem
;;
@ -923,9 +964,10 @@ case $basic_machine in
;;
power) basic_machine=power-ibm
;;
ppc) basic_machine=powerpc-unknown
ppc | ppcbe) basic_machine=powerpc-unknown
;;
ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
ppc-* | ppcbe-*)
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
ppcle | powerpclittle | ppc-le | powerpc-little)
basic_machine=powerpcle-unknown
@ -1019,6 +1061,9 @@ case $basic_machine in
basic_machine=i860-stratus
os=-sysv4
;;
strongarm-* | thumb-*)
basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
sun2)
basic_machine=m68000-sun
;;
@ -1075,18 +1120,6 @@ case $basic_machine in
basic_machine=t90-cray
os=-unicos
;;
tic54x | c54x*)
basic_machine=tic54x-unknown
os=-coff
;;
tic55x | c55x*)
basic_machine=tic55x-unknown
os=-coff
;;
tic6x | c6x*)
basic_machine=tic6x-unknown
os=-coff
;;
# This must be matched before tile*.
tilegx*)
basic_machine=tilegx-unknown
@ -1163,6 +1196,9 @@ case $basic_machine in
xps | xps100)
basic_machine=xps100-honeywell
;;
xscale-* | xscalee[bl]-*)
basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
;;
ymp)
basic_machine=ymp-cray
os=-unicos
@ -1301,7 +1337,8 @@ case $os in
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
| -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
| -mingw32* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
@ -1484,6 +1521,15 @@ case $basic_machine in
c4x-* | tic4x-*)
os=-coff
;;
tic54x-*)
os=-coff
;;
tic55x-*)
os=-coff
;;
tic6x-*)
os=-coff
;;
# This must come before the *-dec entry.
pdp10-*)
os=-tops20

File diff suppressed because it is too large Load Diff

View File

@ -97,6 +97,10 @@ AC_ARG_WITH(host_os, [], host_os=$withval)
dnl Checks for programs.
AC_PROG_CC
dnl ASIO and CXX bindings need a C++ compiler
if [[ "$with_asio" = "yes" ] || [ "$enable_cxx" = "yes" ]] ; then
AC_PROG_CXX
fi
AC_LIBTOOL_WIN32_DLL
AC_PROG_LIBTOOL
AC_PROG_INSTALL
@ -191,13 +195,16 @@ add_objects()
INCLUDES=portaudio.h
dnl Include directories needed by all implementations
CFLAGS="$CFLAGS -I\$(top_srcdir)/include -I\$(top_srcdir)/src/common"
case "${host_os}" in
darwin* )
dnl Mac OS X configuration
AC_DEFINE(PA_USE_COREAUDIO)
AC_DEFINE(PA_USE_COREAUDIO,1)
CFLAGS="$CFLAGS -Werror"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix -Werror"
LIBS="-framework CoreAudio -framework AudioToolbox -framework AudioUnit -framework Carbon"
if test "x$enable_mac_universal" = "xyes" ; then
@ -235,7 +242,7 @@ case "${host_os}" in
PADLL="portaudio.dll"
THREAD_CFLAGS="-mthreads"
SHARED_FLAGS="-shared"
CFLAGS="$CFLAGS -I\$(top_srcdir)/include -DPA_USE_WMME=0 -DPA_USE_ASIO=0 -DPA_USE_WDMKS=0 -DPA_USE_DS=0 -DPA_USE_WASAPI=0"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/win -DPA_USE_WMME=0 -DPA_USE_ASIO=0 -DPA_USE_WDMKS=0 -DPA_USE_DS=0 -DPA_USE_WASAPI=0"
if [[ "x$with_directx" = "xyes" ]]; then
DXDIR="$with_dxdir"
@ -244,7 +251,7 @@ case "${host_os}" in
DLL_LIBS="${DLL_LIBS} -lwinmm -lm -L$DXDIR/lib -ldsound -lole32"
#VC98="\"/c/Program Files/Microsoft Visual Studio/VC98/Include\""
#CFLAGS="$CFLAGS -I$VC98 -DPA_NO_WMME -DPA_NO_ASIO"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/win -I$DXDIR/include -UPA_USE_DS -DPA_USE_DS=1"
CFLAGS="$CFLAGS -I$DXDIR/include -UPA_USE_DS -DPA_USE_DS=1"
fi
if [[ "x$with_asio" = "xyes" ]]; then
@ -252,7 +259,14 @@ case "${host_os}" in
add_objects src/hostapi/asio/pa_asio.o src/common/pa_ringbuffer.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_coinitialize.o src/hostapi/asio/iasiothiscallresolver.o $ASIODIR/common/asio.o $ASIODIR/host/asiodrivers.o $ASIODIR/host/pc/asiolist.o
LIBS="-lwinmm -lm -lole32 -luuid"
DLL_LIBS="${DLL_LIBS} -lwinmm -lm -lole32 -luuid"
CFLAGS="$CFLAGS -ffast-math -fomit-frame-pointer -I\$(top_srcdir)/src/common -I\$(top_srcdir)/src/hostapi/asio -I$ASIODIR/host/pc -I$ASIODIR/common -I$ASIODIR/host -UPA_USE_ASIO -DPA_USE_ASIO=1 -DWINDOWS"
CFLAGS="$CFLAGS -ffast-math -fomit-frame-pointer -I\$(top_srcdir)/src/hostapi/asio -I$ASIODIR/host/pc -I$ASIODIR/common -I$ASIODIR/host -UPA_USE_ASIO -DPA_USE_ASIO=1 -DWINDOWS"
dnl Setting the windows version flags below resolves a conflict between Interlocked*
dnl definitions in mingw winbase.h and Interlocked* hacks in ASIO SDK combase.h
dnl combase.h is included by asiodrvr.h
dnl PortAudio does not actually require Win XP (winver 501) APIs
CFLAGS="$CFLAGS -D_WIN32_WINNT=0x0501 -DWINVER=0x0501"
CXXFLAGS="$CFLAGS"
fi
@ -263,21 +277,21 @@ case "${host_os}" in
DLL_LIBS="${DLL_LIBS} -lwinmm -lm -L$DXDIR/lib -luuid -lsetupapi -lole32"
#VC98="\"/c/Program Files/Microsoft Visual Studio/VC98/Include\""
#CFLAGS="$CFLAGS -I$VC98 -DPA_NO_WMME -DPA_NO_ASIO"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/common -I$DXDIR/include -UPA_USE_WDMKS -DPA_USE_WDMKS=1"
CFLAGS="$CFLAGS -I$DXDIR/include -UPA_USE_WDMKS -DPA_USE_WDMKS=1"
fi
if [[ "x$with_wmme" = "xyes" ]]; then
add_objects src/hostapi/wmme/pa_win_wmme.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_waveformat.o
LIBS="-lwinmm -lm -lole32 -luuid"
DLL_LIBS="${DLL_LIBS} -lwinmm"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/common -UPA_USE_WMME -DPA_USE_WMME=1"
CFLAGS="$CFLAGS -UPA_USE_WMME -DPA_USE_WMME=1"
fi
if [[ "x$with_wasapi" = "xyes" ]]; then
add_objects src/hostapi/wasapi/pa_win_wasapi.o src/common/pa_ringbuffer.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_coinitialize.o src/os/win/pa_win_waveformat.o
LIBS="-lwinmm -lm -lole32 -luuid"
DLL_LIBS="${DLL_LIBS} -lwinmm -lole32"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/common -I\$(top_srcdir)/src/hostapi/wasapi/mingw-include -UPA_USE_WASAPI -DPA_USE_WASAPI=1"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/hostapi/wasapi/mingw-include -UPA_USE_WASAPI -DPA_USE_WASAPI=1"
fi
;;
@ -285,7 +299,7 @@ case "${host_os}" in
dnl Cygwin configuration
OTHER_OBJS="src/hostapi/wmme/pa_win_wmme.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_waveformat.o"
CFLAGS="$CFLAGS -DPA_USE_DS=0 -DPA_USE_WDMKS=0 -DPA_USE_ASIO=0 -DPA_USE_WASAPI=0 -DPA_USE_WMME=1"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/win -DPA_USE_DS=0 -DPA_USE_WDMKS=0 -DPA_USE_ASIO=0 -DPA_USE_WASAPI=0 -DPA_USE_WMME=1"
LIBS="-lwinmm -lm"
PADLL="portaudio.dll"
THREAD_CFLAGS="-mthreads"
@ -304,7 +318,9 @@ case "${host_os}" in
dnl See the '#ifdef PA_USE_SGI' in file pa_unix/pa_unix_hostapis.c
dnl which selects the appropriate PaXXX_Initialize() function.
dnl
AC_DEFINE(PA_USE_SGI)
AC_DEFINE(PA_USE_SGI,1)
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix"
dnl The _REENTRANT option for pthread safety. Perhaps not necessary but it 'll do no harm.
dnl
@ -322,6 +338,8 @@ case "${host_os}" in
*)
dnl Unix configuration
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix"
AC_CHECK_LIB(pthread, pthread_create,[have_pthread="yes"],
AC_MSG_ERROR([libpthread not found!]))
@ -330,7 +348,7 @@ case "${host_os}" in
LIBS="$LIBS -lasound"
OTHER_OBJS="$OTHER_OBJS src/hostapi/alsa/pa_linux_alsa.o"
INCLUDES="$INCLUDES pa_linux_alsa.h"
AC_DEFINE(PA_USE_ALSA)
AC_DEFINE(PA_USE_ALSA,1)
fi
if [[ "$have_jack" = "yes" ] && [ "$with_jack" != "no" ]] ; then
@ -338,7 +356,7 @@ case "${host_os}" in
CFLAGS="$CFLAGS $JACK_CFLAGS"
OTHER_OBJS="$OTHER_OBJS src/hostapi/jack/pa_jack.o src/common/pa_ringbuffer.o"
INCLUDES="$INCLUDES pa_jack.h"
AC_DEFINE(PA_USE_JACK)
AC_DEFINE(PA_USE_JACK,1)
fi
if [[ "$with_oss" != "no" ]] ; then
@ -347,14 +365,14 @@ case "${host_os}" in
DLL_LIBS="$DLL_LIBS -lossaudio"
LIBS="$LIBS -lossaudio"
fi
AC_DEFINE(PA_USE_OSS)
AC_DEFINE(PA_USE_OSS,1)
fi
if [[ "$have_asihpi" = "yes" ] && [ "$with_asihpi" != "no" ]] ; then
LIBS="$LIBS -lhpi"
DLL_LIBS="$DLL_LIBS -lhpi"
OTHER_OBJS="$OTHER_OBJS src/hostapi/asihpi/pa_linux_asihpi.o"
AC_DEFINE(PA_USE_ASIHPI)
AC_DEFINE(PA_USE_ASIHPI,1)
fi
DLL_LIBS="$DLL_LIBS -lm -lpthread"
@ -368,7 +386,7 @@ case "${host_os}" in
THREAD_CFLAGS="-mt"
;;
*)
SHARED_FLAGS="-shared -fPIC"
SHARED_FLAGS="-fPIC"
THREAD_CFLAGS="-pthread"
;;
esac

View File

@ -1,7 +1,7 @@
/** @page License PortAudio License
PortAudio Portable Real-Time Audio Library <br>
Copyright (c) 1999-2006 Ross Bencina, Phil Burk
Copyright (c) 1999-2011 Ross Bencina, Phil Burk

View File

@ -1,35 +1,60 @@
/* doxygen index page */
/** @mainpage
@section overview Overview
PortAudio is a cross-platform, open-source C language library for real-time audio input and output. The library provides functions that allow your software to acquire and output real-time audio streams from your computer's hardware audio interfaces. It is designed to simplify writing cross-platform audio applications, and also to simplify the development of audio software in general by hiding the complexities of dealing directly with each native audio API. PortAudio is used to implement sound recording, editing and mixing applications, software synthesizers, effects processors, music players, internet telephony applications, software defined radios and more. Supported platforms include MS Windows, Mac OS X and Linux. Third-party language bindings make it possible to call PortAudio from other programming languages including C++, C#, Python, PureBasic, FreePascal and Lazarus.
See the PortAudio website for further information http://www.portaudio.com
Read the @ref api_overview for a top-down view of the PortAudio API, its capabilities, functions and data structures. The documentation for PortAudio's main header file portaudio.h details the individual data types and functions that make up the API.
@section start_here Start here
To get started writing code check out the tutorials on the PortAudio Wiki:
http://www.portaudio.com/trac/wiki/TutorialDir/TutorialStart
- @ref api_overview<br>
A top-down view of the PortAudio API, its capabilities, functions and data structures
- <a href="http://www.portaudio.com/trac/wiki/TutorialDir/TutorialStart">PortAudio Tutorials</a><br>
Get started writing code with PortAudio tutorials
- @ref examples_src "Examples"<br>
Simple example programs demonstrating PortAudio usage
- @ref License<br>
PortAudio is licenced under the MIT Expat open source licence. We make a non-binding request for you to contribute your changes back to the project.
@section reference API Reference
- portaudio.h Portable API<br>
Detailed documentation for each portable API function and data type
- @ref public_header "Host API Specific Extensions"<br>
Documentation for non-portable platform-specific host API extensions
@section resources Resources
- <a href="http://www.portaudio.com">The PortAudio website</a>
- <a href="http://music.columbia.edu/mailman/listinfo/portaudio/">Our mailing list for users and developers</a><br>
- <a href="http://www.portaudio.com/trac">The PortAudio wiki</a>
@section developer_resources Developer Resources
@if INTERNAL
- @ref srcguide
@endif
- <a href="http://www.portaudio.com/trac">Our Trac wiki and issue tracking system</a>
- <a href="http://www.portaudio.com/docs/proposals/014-StyleGuide.html">Coding guidelines</a>
If you're interested in helping out with PortAudio development we're more than happy for you to be involved. Just drop by the PortAudio mailing list and ask how you can help. Or <a href="http://www.portaudio.com/trac/report/3">check out the starter tickets in Trac</a>.
@section older_api_versions Older API Versions
This documentation covers the current API version: PortAudio V19, API version 2.0. API 2.0 differs in a number of ways from previous versions (most often encountered in PortAudio V18), please consult the enhancement proposals for details of what was added/changed for V19:
http://www.portaudio.com/docs/proposals/index.html
You might also be interested in:
- @ref srcguide
- The @ref License
- Our mailing list for users and developers:
http://music.columbia.edu/mailman/listinfo/portaudio/
- Our issue tracking system:
http://www.portaudio.com/trac
- Coding guidelines:
http://www.portaudio.com/docs/proposals/014-StyleGuide.html
If you're interested in helping out with PortAudio development we're more than happy for you to be involved. Just drop by the PortAudio mailing list and ask how you can help.
*/

View File

@ -7,33 +7,45 @@
*/
/**
@internal
@defgroup common_src Source code common to all implementations
*/
/**
@internal
@defgroup win_src Source code common to all Windows implementations
*/
/**
@internal
@defgroup unix_src Source code common to all Unix implementations
*/
/**
@internal
@defgroup macosx_src Source code common to all Macintosh implementations
*/
/**
@internal
@defgroup hostapi_src Source code for specific Host APIs
*/
/**
@defgroup test_src Test and example programs
@internal
@defgroup test_src Test programs
*/
/**
@page srcguide A guide to the PortAudio sources.
@defgroup examples_src Example programs demonstrating PortAudio usage
*/
/**
@internal
@page srcguide A guide to the PortAudio sources
- \ref public_header
- \ref examples_src
- \ref common_src
- \ref win_src
- \ref unix_src

View File

@ -61,24 +61,21 @@ typedef struct PaWinDirectSoundStreamInfo{
PaHostApiTypeId hostApiType; /**< paDirectSound */
unsigned long version; /**< 2 */
unsigned long flags;
unsigned long flags; /**< enable other features of this struct */
/* low-level latency setting support
Control the size of host buffers in order to set latency. They will
be used instead of the generic parameters to Pa_OpenStream() if
flags contains the paWinDirectSoundUseLowLevelLatencyParameters
flag.
/**
low-level latency setting support
Sets the size of the DirectSound host buffer.
When flags contains the paWinDirectSoundUseLowLevelLatencyParameters
this size will be used instead of interpreting the generic latency
parameters to Pa_OpenStream(). If the flag is not set this value is ignored.
If PaWinDirectSoundStreamInfo structures with paWinDirectSoundUseLowLevelLatencyParameters
are supplied for both input and output in a full duplex stream, then the
input and output framesPerBuffer must be the same, or the larger of the
two must be a multiple of the smaller, otherwise a
paIncompatibleHostApiSpecificStreamInfo error will be returned from
Pa_OpenStream().
If the stream is a full duplex stream the implementation requires that
the values of framesPerBuffer for input and output match (if both are specified).
*/
unsigned long framesPerBuffer; /* NOT IMPLEMENTED see http://www.portaudio.com/trac/ticket/129 */
unsigned long framesPerBuffer;
/*
/**
support for WAVEFORMATEXTENSIBLE channel masks. If flags contains
paWinDirectSoundUseChannelMask this allows you to specify which speakers
to address in a multichannel stream. Constants for channelMask

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* $Id: pa_linux_alsa.c 1691 2011-05-26 20:19:19Z aknudsen $
* $Id: pa_linux_alsa.c 1798 2011-12-08 19:43:29Z alan_horstmann $
* PortAudio Portable Real-Time Audio Library
* Latest Version at: http://www.portaudio.com
* ALSA implementation by Joshua Haberman and Arve Knudsen
@ -957,10 +957,10 @@ typedef struct
HwDevInfo predefinedNames[] = {
{ "center_lfe", NULL, 0, 1, 0 },
/* { "default", NULL, 0, 1, 0 }, */
/* { "dmix", NULL, 0, 1, 0 }, */
/* { "default", NULL, 0, 1, 1 }, */
{ "dmix", NULL, 0, 1, 0 },
/* { "dpl", NULL, 0, 1, 0 }, */
/* { "dsnoop", NULL, 0, 1, 0 }, */
/* { "dsnoop", NULL, 0, 0, 1 }, */
{ "front", NULL, 0, 1, 0 },
{ "iec958", NULL, 0, 1, 0 },
/* { "modem", NULL, 0, 1, 0 }, */

View File

@ -1,12 +1,16 @@
/*
* $Id:$
* PortAudio Portable Real-Time Audio Library
* Latest Version at: http://www.portaudio.com
* AudioScience HPI implementation by Fred Gleason, Ludwig Schwardt and
* Eliot Blennerhassett
*
* PortAudio v18 version of AudioScience HPI driver by Fred Gleason <fredg@salemradiolabs.com>
* PortAudio v19 version of AudioScience HPI driver by Ludwig Schwardt <schwardt@sun.ac.za>
* Copyright (c) 2003 Fred Gleason <fredg@salemradiolabs.com>
* Copyright (c) 2005,2006 Ludwig Schwardt <schwardt@sun.ac.za>
* Copyright (c) 2011 Eliot Blennerhassett <eblennerhassett@audioscience.com>
*
* Copyright (c) 2003 Fred Gleason
* Copyright (c) 2005,2006 Ludwig Schwardt
* Based on the Open Source API proposed by Ross Bencina
* Copyright (c) 1999-2008 Ross Bencina, Phil Burk
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
@ -61,7 +65,7 @@
Documentation for the HPI API can be found at:
http://www.audioscience.com/internet/download/sdk/spchpi.pdf
http://www.audioscience.com/internet/download/sdk/hpi_usermanual_html/html/index.html
The Linux HPI driver itself (a kernel module + library) can be downloaded from:
@ -134,8 +138,6 @@
Output buffer priming via the user callback (i.e. paPrimeOutputBuffersUsingStreamCallback
and friends) is not implemented yet. All output is primed with silence.
Please send bug reports etc. to Ludwig Schwardt <schwardt@sun.ac.za>
*/
#include <unistd.h>
@ -193,7 +195,7 @@
/** Check return value of HPI function, and map it to PaError */
#define PA_ASIHPI_UNLESS_(expr, paError) \
do { \
HW16 hpiError = (expr); \
hpi_err_t hpiError = (expr); \
/* If HPI error occurred */ \
if( UNLIKELY( hpiError ) ) \
{ \
@ -265,8 +267,6 @@ typedef struct PaAsiHpiHostApiRepresentation
/* implementation specific data goes here */
PaHostApiIndex hostApiIndex;
/** HPI subsystem pointer */
HPI_HSUBSYS *subSys;
}
PaAsiHpiHostApiRepresentation;
@ -280,20 +280,18 @@ typedef struct PaAsiHpiDeviceInfo
/* implementation specific data goes here */
/** HPI subsystem (required for most HPI calls) */
HPI_HSUBSYS *subSys;
/** Adapter index */
HW16 adapterIndex;
uint16_t adapterIndex;
/** Adapter model number (hex) */
HW16 adapterType;
uint16_t adapterType;
/** Adapter HW/SW version */
HW16 adapterVersion;
uint16_t adapterVersion;
/** Adapter serial number */
HW32 adapterSerialNumber;
uint32_t adapterSerialNumber;
/** Stream number */
HW16 streamIndex;
uint16_t streamIndex;
/** 0=Input, 1=Output (HPI streams are either input or output but not both) */
HW16 streamIsOutput;
uint16_t streamIsOutput;
}
PaAsiHpiDeviceInfo;
@ -328,27 +326,25 @@ typedef struct PaAsiHpiStreamComponent
{
/** Device information (HPI handles, etc) */
PaAsiHpiDeviceInfo *hpiDevice;
/** Stream handle, as passed to HPI interface.
HACK: we assume types HPI_HISTREAM and HPI_HOSTREAM are the same...
(both are HW32 up to version 3.00 of ASIHPI, and hopefully they stay that way) */
HPI_HISTREAM hpiStream;
/** Stream handle, as passed to HPI interface. */
hpi_handle_t hpiStream;
/** Stream format, as passed to HPI interface */
HPI_FORMAT hpiFormat;
struct hpi_format hpiFormat;
/** Number of bytes per frame, derived from hpiFormat and saved for convenience */
HW32 bytesPerFrame;
uint32_t bytesPerFrame;
/** Size of hardware (on-card) buffer of stream in bytes */
HW32 hardwareBufferSize;
uint32_t hardwareBufferSize;
/** Size of host (BBM) buffer of stream in bytes (if used) */
HW32 hostBufferSize;
uint32_t hostBufferSize;
/** Upper limit on the utilization of output stream buffer (both hardware and host).
This prevents large latencies in an output-only stream with a potentially huge buffer
and a fast data generator, which would otherwise keep the hardware buffer filled to
capacity. See also the "Hardware Buffering=off" option in the AudioScience WAV driver. */
HW32 outputBufferCap;
uint32_t outputBufferCap;
/** Sample buffer (halfway station between HPI and buffer processor) */
HW8 *tempBuffer;
uint8_t *tempBuffer;
/** Sample buffer size, in bytes */
HW32 tempBufferSize;
uint32_t tempBufferSize;
}
PaAsiHpiStreamComponent;
@ -369,7 +365,7 @@ typedef struct PaAsiHpiStream
PaAsiHpiStreamComponent *input, *output;
/** Polling interval (in milliseconds) */
HW32 pollingInterval;
uint32_t pollingInterval;
/** Are we running in callback mode? */
int callbackMode;
/** Number of frames to transfer at a time to/from HPI */
@ -401,23 +397,23 @@ PaAsiHpiStream;
typedef struct PaAsiHpiStreamInfo
{
/** HPI stream state (HPI_STATE_STOPPED, HPI_STATE_PLAYING, etc.) */
HW16 state;
uint16_t state;
/** Size (in bytes) of recording/playback data buffer in HPI driver */
HW32 bufferSize;
uint32_t bufferSize;
/** Amount of data (in bytes) available in the buffer */
HW32 dataSize;
uint32_t dataSize;
/** Number of frames played/recorded since last stream reset */
HW32 frameCounter;
uint32_t frameCounter;
/** Amount of data (in bytes) in hardware (on-card) buffer.
This differs from dataSize if bus mastering (BBM) is used, which introduces another
driver-level buffer to which dataSize/bufferSize then refers. */
HW32 auxDataSize;
uint32_t auxDataSize;
/** Total number of data frames currently buffered by HPI driver (host + hw buffers) */
HW32 totalBufferedData;
uint32_t totalBufferedData;
/** Size of immediately available data (for input) or space (for output) in frames.
This only checks the first-level buffer (typically host buffer). This amount can be
transferred immediately. */
HW32 availableFrames;
uint32_t availableFrames;
/** Indicates that hardware buffer is getting too full */
int overflow;
/** Indicates that hardware buffer is getting too empty */
@ -479,21 +475,21 @@ static void *CallbackThreadFunc( void *userData );
/* Functions specific to this API */
static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostApi );
static HW16 PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat );
static PaSampleFormat PaAsiHpi_HpiToPaFormat( HW16 hpiFormat );
static uint16_t PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat );
static PaSampleFormat PaAsiHpi_HpiToPaFormat( uint16_t hpiFormat );
static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostApi,
const PaStreamParameters *parameters, double sampleRate,
PaAsiHpiDeviceInfo **hpiDevice, HPI_FORMAT *hpiFormat );
PaAsiHpiDeviceInfo **hpiDevice, struct hpi_format *hpiFormat );
static PaError PaAsiHpi_OpenInput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat,
HPI_HISTREAM *hpiStream );
const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
hpi_handle_t *hpiStream );
static PaError PaAsiHpi_OpenOutput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat,
HPI_HOSTREAM *hpiStream );
const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
hpi_handle_t *hpiStream );
static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStreamInfo *info );
static void PaAsiHpi_StreamComponentDump( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStream *stream );
static void PaAsiHpi_StreamDump( PaAsiHpiStream *stream );
static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32 pollingInterval,
static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, uint32_t pollingInterval,
unsigned long framesPerPaHostBuffer, PaTime suggestedLatency );
static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream );
static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed );
@ -529,43 +525,38 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
PaUtilHostApiRepresentation *hostApi = &hpiHostApi->baseHostApiRep;
PaHostApiInfo *baseApiInfo = &hostApi->info;
PaAsiHpiDeviceInfo *hpiDeviceList;
HW16 adapterList[ HPI_MAX_ADAPTERS ];
HW16 numAdapters;
HW16 hpiError = 0;
int numAdapters;
hpi_err_t hpiError = 0;
int i, j, deviceCount = 0, deviceIndex = 0;
assert( hpiHostApi );
assert( hpiHostApi->subSys );
/* Look for adapters (not strictly necessary, as AdapterOpen can do the same, but this */
/* way we have less errors since we do not try to open adapters we know aren't there) */
/* Errors not considered critical here (subsystem may report 0 devices), but report them */
/* in debug mode. */
PA_ASIHPI_UNLESS_( HPI_SubSysFindAdapters( hpiHostApi->subSys, &numAdapters,
adapterList, HPI_MAX_ADAPTERS ), paNoError );
PA_ASIHPI_UNLESS_( HPI_SubSysGetNumAdapters( NULL, &numAdapters), paNoError );
/* First open and count the number of devices (= number of streams), to ease memory allocation */
for( i=0; i < HPI_MAX_ADAPTERS; ++i )
for( i=0; i < numAdapters; ++i )
{
HW16 inStreams, outStreams;
HW16 version;
HW32 serial;
HW16 type;
uint16_t inStreams, outStreams;
uint16_t version;
uint32_t serial;
uint16_t type;
uint32_t idx;
/* If no adapter found at this index, skip it */
if( adapterList[i] == 0 )
hpiError = HPI_SubSysGetAdapter(NULL, i, &idx, &type);
if (hpiError)
continue;
/* Try to open adapter */
hpiError = HPI_AdapterOpen( hpiHostApi->subSys, i );
hpiError = HPI_AdapterOpen( NULL, idx );
/* Report error and skip to next device on failure */
if( hpiError )
{
PA_ASIHPI_REPORT_ERROR_( hpiError );
continue;
}
hpiError = HPI_AdapterGetInfo( hpiHostApi->subSys, i,
&outStreams, &inStreams, &version, &serial, &type );
hpiError = HPI_AdapterGetInfo( NULL, idx, &outStreams, &inStreams,
&version, &serial, &type );
/* Skip to next device on failure */
if( hpiError )
{
@ -597,19 +588,20 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
paInsufficientMemory );
/* Now query devices again for information */
for( i=0; i < HPI_MAX_ADAPTERS; ++i )
for( i=0; i < numAdapters; ++i )
{
HW16 inStreams, outStreams;
HW16 version;
HW32 serial;
HW16 type;
uint16_t inStreams, outStreams;
uint16_t version;
uint32_t serial;
uint16_t type;
uint32_t idx;
/* If no adapter found at this index, skip it */
if( adapterList[i] == 0 )
hpiError = HPI_SubSysGetAdapter( NULL, i, &idx, &type );
if (hpiError)
continue;
/* Assume adapter is still open from previous round */
hpiError = HPI_AdapterGetInfo( hpiHostApi->subSys, i,
hpiError = HPI_AdapterGetInfo( NULL, idx,
&outStreams, &inStreams, &version, &serial, &type );
/* Report error and skip to next device on failure */
if( hpiError )
@ -620,7 +612,7 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
else
{
PA_DEBUG(( "Found HPI Adapter ID=%4X Idx=%d #In=%d #Out=%d S/N=%d HWver=%c%d DSPver=%03d\n",
type, i, inStreams, outStreams, serial,
type, idx, inStreams, outStreams, serial,
((version>>3)&0xf)+'A', /* Hw version major */
version&0x7, /* Hw version minor */
((version>>13)*100)+((version>>7)&0x3f) /* DSP code version */
@ -637,8 +629,7 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
memset( hpiDevice, 0, sizeof(PaAsiHpiDeviceInfo) );
/* Set implementation-specific device details */
hpiDevice->subSys = hpiHostApi->subSys;
hpiDevice->adapterIndex = i;
hpiDevice->adapterIndex = idx;
hpiDevice->adapterType = type;
hpiDevice->adapterVersion = version;
hpiDevice->adapterSerialNumber = serial;
@ -680,8 +671,7 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
memset( hpiDevice, 0, sizeof(PaAsiHpiDeviceInfo) );
/* Set implementation-specific device details */
hpiDevice->subSys = hpiHostApi->subSys;
hpiDevice->adapterIndex = i;
hpiDevice->adapterIndex = idx;
hpiDevice->adapterType = type;
hpiDevice->adapterVersion = version;
hpiDevice->adapterSerialNumber = serial;
@ -740,32 +730,31 @@ PaError PaAsiHpi_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiInd
PaAsiHpiHostApiRepresentation *hpiHostApi = NULL;
PaHostApiInfo *baseApiInfo;
/* Try to initialize HPI subsystem */
if (!HPI_SubSysCreate())
{
/* the V19 development docs say that if an implementation
* detects that it cannot be used, it should return a NULL
* interface and paNoError */
PA_DEBUG(( "Could not open HPI interface\n" ));
*hostApi = NULL;
return paNoError;
}
else
{
uint32_t hpiVersion;
PA_ASIHPI_UNLESS_( HPI_SubSysGetVersionEx( NULL, &hpiVersion ), paUnanticipatedHostError );
PA_DEBUG(( "HPI interface v%d.%02d.%02d\n",
hpiVersion >> 16, (hpiVersion >> 8) & 0x0F, (hpiVersion & 0x0F) ));
}
/* Allocate host API structure */
PA_UNLESS_( hpiHostApi = (PaAsiHpiHostApiRepresentation*) PaUtil_AllocateMemory(
sizeof(PaAsiHpiHostApiRepresentation) ), paInsufficientMemory );
PA_UNLESS_( hpiHostApi->allocations = PaUtil_CreateAllocationGroup(), paInsufficientMemory );
hpiHostApi->hostApiIndex = hostApiIndex;
hpiHostApi->subSys = NULL;
/* Try to initialize HPI subsystem */
if( ( hpiHostApi->subSys = HPI_SubSysCreate() ) == NULL)
{
/* the V19 development docs say that if an implementation
* detects that it cannot be used, it should return a NULL
* interface and paNoError */
PA_DEBUG(( "Could not open HPI interface\n" ));
result = paNoError;
*hostApi = NULL;
goto error;
}
else
{
HW32 hpiVersion;
PA_ASIHPI_UNLESS_( HPI_SubSysGetVersion( hpiHostApi->subSys, &hpiVersion ), paUnanticipatedHostError );
PA_DEBUG(( "HPI interface v%d.%02d\n",
hpiVersion >> 8, 10*((hpiVersion & 0xF0) >> 4) + (hpiVersion & 0x0F) ));
}
*hostApi = &hpiHostApi->baseHostApiRep;
baseApiInfo = &((*hostApi)->info);
@ -799,8 +788,8 @@ PaError PaAsiHpi_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiInd
return result;
error:
/* Clean up memory */
Terminate( (PaUtilHostApiRepresentation *)hpiHostApi );
if (hpiHostApi)
PaUtil_FreeMemory( hpiHostApi );
return result;
}
@ -820,9 +809,7 @@ static void Terminate( struct PaUtilHostApiRepresentation *hostApi )
if( hpiHostApi )
{
/* Get rid of HPI-specific structures */
if( hpiHostApi->subSys )
{
HW16 lastAdapterIndex = HPI_MAX_ADAPTERS;
uint16_t lastAdapterIndex = HPI_MAX_ADAPTERS;
/* Iterate through device list and close adapters */
for( i=0; i < hostApi->info.deviceCount; ++i )
{
@ -831,14 +818,13 @@ static void Terminate( struct PaUtilHostApiRepresentation *hostApi )
if( hpiDevice->adapterIndex != lastAdapterIndex )
{
/* Ignore errors (report only during debugging) */
PA_ASIHPI_UNLESS_( HPI_AdapterClose( hpiHostApi->subSys,
PA_ASIHPI_UNLESS_( HPI_AdapterClose( NULL,
hpiDevice->adapterIndex ), paNoError );
lastAdapterIndex = hpiDevice->adapterIndex;
}
}
/* Finally dismantle HPI subsystem */
HPI_SubSysFree( hpiHostApi->subSys );
}
HPI_SubSysFree( NULL );
if( hpiHostApi->allocations )
{
@ -859,7 +845,7 @@ error:
@return HPI sample format
*/
static HW16 PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat )
static uint16_t PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat )
{
/* Ignore interleaving flag */
switch( paFormat & ~paNonInterleaved )
@ -893,7 +879,7 @@ static HW16 PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat )
@return PortAudio sample format
*/
static PaSampleFormat PaAsiHpi_HpiToPaFormat( HW16 hpiFormat )
static PaSampleFormat PaAsiHpi_HpiToPaFormat( uint16_t hpiFormat )
{
switch( hpiFormat )
{
@ -938,11 +924,11 @@ static PaSampleFormat PaAsiHpi_HpiToPaFormat( HW16 hpiFormat )
*/
static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostApi,
const PaStreamParameters *parameters, double sampleRate,
PaAsiHpiDeviceInfo **hpiDevice, HPI_FORMAT *hpiFormat )
PaAsiHpiDeviceInfo **hpiDevice, struct hpi_format *hpiFormat )
{
int maxChannelCount = 0;
PaSampleFormat hostSampleFormat = 0;
HW16 hpiError = 0;
hpi_err_t hpiError = 0;
/* Unless alternate device specification is supported, reject the use of
paUseHostApiSpecificDeviceSpecification */
@ -979,9 +965,9 @@ static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostAp
hostSampleFormat = PaUtil_SelectClosestAvailableFormat(PA_ASIHPI_AVAILABLE_FORMATS_,
parameters->sampleFormat );
/* Setup format + info objects */
hpiError = HPI_FormatCreate( hpiFormat, (HW16)parameters->channelCount,
hpiError = HPI_FormatCreate( hpiFormat, (uint16_t)parameters->channelCount,
PaAsiHpi_PaToHpiFormat( hostSampleFormat ),
(HW32)sampleRate, 0, 0 );
(uint32_t)sampleRate, 0, 0 );
if( hpiError )
{
PA_ASIHPI_REPORT_ERROR_( hpiError );
@ -1016,25 +1002,25 @@ static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostAp
@return PortAudio error code (typically indicating a problem with stream format or device)
*/
static PaError PaAsiHpi_OpenInput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat,
HPI_HISTREAM *hpiStream )
const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
hpi_handle_t *hpiStream )
{
PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi;
PaError result = paNoError;
HW16 hpiError = 0;
hpi_err_t hpiError = 0;
/* Catch misplaced output devices, as they typically have 0 input channels */
PA_UNLESS_( !hpiDevice->streamIsOutput, paInvalidChannelCount );
/* Try to open input stream */
PA_ASIHPI_UNLESS_( HPI_InStreamOpen( hpiHostApi->subSys, hpiDevice->adapterIndex,
PA_ASIHPI_UNLESS_( HPI_InStreamOpen( NULL, hpiDevice->adapterIndex,
hpiDevice->streamIndex, hpiStream ), paDeviceUnavailable );
/* Set input format (checking it in the process) */
/* Could also use HPI_InStreamQueryFormat, but this economizes the process */
hpiError = HPI_InStreamSetFormat( hpiHostApi->subSys, *hpiStream, (HPI_FORMAT*)hpiFormat );
hpiError = HPI_InStreamSetFormat( NULL, *hpiStream, (struct hpi_format*)hpiFormat );
if( hpiError )
{
PA_ASIHPI_REPORT_ERROR_( hpiError );
PA_ASIHPI_UNLESS_( HPI_InStreamClose( hpiHostApi->subSys, *hpiStream ), paNoError );
PA_ASIHPI_UNLESS_( HPI_InStreamClose( NULL, *hpiStream ), paNoError );
switch( hpiError )
{
case HPI_ERROR_INVALID_FORMAT:
@ -1071,25 +1057,25 @@ error:
@return PortAudio error code (typically indicating a problem with stream format or device)
*/
static PaError PaAsiHpi_OpenOutput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat,
HPI_HOSTREAM *hpiStream )
const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
hpi_handle_t *hpiStream )
{
PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi;
PaError result = paNoError;
HW16 hpiError = 0;
hpi_err_t hpiError = 0;
/* Catch misplaced input devices, as they typically have 0 output channels */
PA_UNLESS_( hpiDevice->streamIsOutput, paInvalidChannelCount );
/* Try to open output stream */
PA_ASIHPI_UNLESS_( HPI_OutStreamOpen( hpiHostApi->subSys, hpiDevice->adapterIndex,
PA_ASIHPI_UNLESS_( HPI_OutStreamOpen( NULL, hpiDevice->adapterIndex,
hpiDevice->streamIndex, hpiStream ), paDeviceUnavailable );
/* Check output format (format is set on first write to output stream) */
hpiError = HPI_OutStreamQueryFormat( hpiHostApi->subSys, *hpiStream, (HPI_FORMAT*)hpiFormat );
hpiError = HPI_OutStreamQueryFormat( NULL, *hpiStream, (struct hpi_format*)hpiFormat );
if( hpiError )
{
PA_ASIHPI_REPORT_ERROR_( hpiError );
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( hpiHostApi->subSys, *hpiStream ), paNoError );
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( NULL, *hpiStream ), paNoError );
switch( hpiError )
{
case HPI_ERROR_INVALID_FORMAT:
@ -1135,12 +1121,12 @@ static PaError IsFormatSupported( struct PaUtilHostApiRepresentation *hostApi,
PaError result = paFormatIsSupported;
PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi;
PaAsiHpiDeviceInfo *hpiDevice = NULL;
HPI_FORMAT hpiFormat;
struct hpi_format hpiFormat;
/* Input stream */
if( inputParameters )
{
HPI_HISTREAM hpiStream;
hpi_handle_t hpiStream;
PA_DEBUG(( "%s: Checking input params: dev=%d, sr=%d, chans=%d, fmt=%d\n",
__FUNCTION__, inputParameters->device, (int)sampleRate,
inputParameters->channelCount, inputParameters->sampleFormat ));
@ -1150,13 +1136,13 @@ static PaError IsFormatSupported( struct PaUtilHostApiRepresentation *hostApi,
/* Open stream to further check format */
PA_ENSURE_( PaAsiHpi_OpenInput( hostApi, hpiDevice, &hpiFormat, &hpiStream ) );
/* Close stream again */
PA_ASIHPI_UNLESS_( HPI_InStreamClose( hpiHostApi->subSys, hpiStream ), paNoError );
PA_ASIHPI_UNLESS_( HPI_InStreamClose( NULL, hpiStream ), paNoError );
}
/* Output stream */
if( outputParameters )
{
HPI_HOSTREAM hpiStream;
hpi_handle_t hpiStream;
PA_DEBUG(( "%s: Checking output params: dev=%d, sr=%d, chans=%d, fmt=%d\n",
__FUNCTION__, outputParameters->device, (int)sampleRate,
outputParameters->channelCount, outputParameters->sampleFormat ));
@ -1166,7 +1152,7 @@ static PaError IsFormatSupported( struct PaUtilHostApiRepresentation *hostApi,
/* Open stream to further check format */
PA_ENSURE_( PaAsiHpi_OpenOutput( hostApi, hpiDevice, &hpiFormat, &hpiStream ) );
/* Close stream again */
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( hpiHostApi->subSys, hpiStream ), paNoError );
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( NULL, hpiStream ), paNoError );
}
error:
@ -1188,9 +1174,9 @@ error:
static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStreamInfo *info )
{
PaError result = paDeviceUnavailable;
HW16 state;
HW32 bufferSize, dataSize, frameCounter, auxDataSize, threshold;
HW32 hwBufferSize, hwDataSize;
uint16_t state;
uint32_t bufferSize, dataSize, frameCounter, auxDataSize, threshold;
uint32_t hwBufferSize, hwDataSize;
assert( streamComp );
assert( info );
@ -1212,14 +1198,14 @@ static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAs
/* Obtain detailed stream info (either input or output) */
if( streamComp->hpiDevice->streamIsOutput )
{
PA_ASIHPI_UNLESS_( HPI_OutStreamGetInfoEx( streamComp->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_OutStreamGetInfoEx( NULL,
streamComp->hpiStream,
&state, &bufferSize, &dataSize, &frameCounter,
&auxDataSize ), paUnanticipatedHostError );
}
else
{
PA_ASIHPI_UNLESS_( HPI_InStreamGetInfoEx( streamComp->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_InStreamGetInfoEx( NULL,
streamComp->hpiStream,
&state, &bufferSize, &dataSize, &frameCounter,
&auxDataSize ), paUnanticipatedHostError );
@ -1479,7 +1465,7 @@ static void PaAsiHpi_StreamDump( PaAsiHpiStream *stream )
@return PortAudio error code (possibly paBufferTooBig or paInsufficientMemory)
*/
static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32 pollingInterval,
static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, uint32_t pollingInterval,
unsigned long framesPerPaHostBuffer, PaTime suggestedLatency )
{
PaError result = paNoError;
@ -1499,8 +1485,8 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
/* Check if BBM (background bus mastering) is to be enabled */
if( PA_ASIHPI_USE_BBM_ )
{
HW32 bbmBufferSize = 0, preLatencyBufferSize = 0;
HW16 hpiError = 0;
uint32_t bbmBufferSize = 0, preLatencyBufferSize = 0;
hpi_err_t hpiError = 0;
PaTime pollingOverhead;
/* Check overhead of Pa_Sleep() call (minimum sleep duration in ms -> OS dependent) */
@ -1510,7 +1496,7 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
PA_DEBUG(( "polling overhead = %f ms (length of 0-second sleep)\n", pollingOverhead ));
/* Obtain minimum recommended size for host buffer (in bytes) */
PA_ASIHPI_UNLESS_( HPI_StreamEstimateBufferSize( &streamComp->hpiFormat,
pollingInterval + (HW32)ceil( pollingOverhead ),
pollingInterval + (uint32_t)ceil( pollingOverhead ),
&bbmBufferSize ), paUnanticipatedHostError );
/* BBM places more stringent requirements on buffer size (see description */
/* of HPI_StreamEstimateBufferSize in HPI API document) */
@ -1528,27 +1514,26 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
{
/* Save old buffer size, to be retried if new size proves too big */
preLatencyBufferSize = bbmBufferSize;
bbmBufferSize = (HW32)ceil( suggestedLatency * streamComp->bytesPerFrame
bbmBufferSize = (uint32_t)ceil( suggestedLatency * streamComp->bytesPerFrame
* streamComp->hpiFormat.dwSampleRate );
}
}
/* Choose closest memory block boundary (HPI API document states that
"a buffer size of Nx4096 - 20 makes the best use of memory"
(under the entry for HPI_StreamEstimateBufferSize)) */
bbmBufferSize = ((HW32)ceil((bbmBufferSize + 20)/4096.0))*4096 - 20;
bbmBufferSize = ((uint32_t)ceil((bbmBufferSize + 20)/4096.0))*4096 - 20;
streamComp->hostBufferSize = bbmBufferSize;
/* Allocate BBM host buffer (this enables bus mastering transfers in background) */
if( streamComp->hpiDevice->streamIsOutput )
hpiError = HPI_OutStreamHostBufferAllocate( streamComp->hpiDevice->subSys,
hpiError = HPI_OutStreamHostBufferAllocate( NULL,
streamComp->hpiStream,
bbmBufferSize );
else
hpiError = HPI_InStreamHostBufferAllocate( streamComp->hpiDevice->subSys,
hpiError = HPI_InStreamHostBufferAllocate( NULL,
streamComp->hpiStream,
bbmBufferSize );
if( hpiError )
{
PA_ASIHPI_REPORT_ERROR_( hpiError );
/* Indicate that BBM is disabled */
streamComp->hostBufferSize = 0;
/* Retry with smaller buffer size (transfers will still work, but not via BBM) */
@ -1561,11 +1546,11 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
preLatencyBufferSize, bbmBufferSize ));
bbmBufferSize = preLatencyBufferSize;
if( streamComp->hpiDevice->streamIsOutput )
hpiError = HPI_OutStreamHostBufferAllocate( streamComp->hpiDevice->subSys,
hpiError = HPI_OutStreamHostBufferAllocate( NULL,
streamComp->hpiStream,
bbmBufferSize );
else
hpiError = HPI_InStreamHostBufferAllocate( streamComp->hpiDevice->subSys,
hpiError = HPI_InStreamHostBufferAllocate( NULL,
streamComp->hpiStream,
bbmBufferSize );
/* Another round of error checking */
@ -1598,8 +1583,10 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
}
/* If BBM not supported, foreground transfers will be used, but not a show-stopper */
/* Anything else is an error */
else if( hpiError != HPI_ERROR_INVALID_OPERATION )
else if (( hpiError != HPI_ERROR_INVALID_OPERATION ) &&
( hpiError != HPI_ERROR_INVALID_FUNC ))
{
PA_ASIHPI_REPORT_ERROR_( hpiError );
result = paUnanticipatedHostError;
goto error;
}
@ -1623,7 +1610,7 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
PaTime latency = suggestedLatency > 0.0 ? suggestedLatency :
streamComp->hpiDevice->baseDeviceInfo.defaultHighOutputLatency;
streamComp->outputBufferCap =
(HW32)ceil( latency * streamComp->bytesPerFrame * streamComp->hpiFormat.dwSampleRate );
(uint32_t)ceil( latency * streamComp->bytesPerFrame * streamComp->hpiFormat.dwSampleRate );
/* The cap should not be too small, to prevent underflow */
if( streamComp->outputBufferCap < 4*paHostBufferSize )
streamComp->outputBufferCap = 4*paHostBufferSize;
@ -1635,7 +1622,7 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
/* Temp buffer size should be multiple of PA host buffer size (or 1x, if using fixed blocks) */
streamComp->tempBufferSize = paHostBufferSize;
/* Allocate temp buffer */
PA_UNLESS_( streamComp->tempBuffer = (HW8 *)PaUtil_AllocateMemory( streamComp->tempBufferSize ),
PA_UNLESS_( streamComp->tempBuffer = (uint8_t *)PaUtil_AllocateMemory( streamComp->tempBufferSize ),
paInsufficientMemory );
error:
return result;
@ -1725,7 +1712,7 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
By keeping the frames a multiple of 4, this is ensured even for 8-bit mono sound. */
framesPerHostBuffer = (framesPerHostBuffer / 4) * 4;
/* Polling is based on time length (in milliseconds) of user-requested block size */
stream->pollingInterval = (HW32)ceil( 1000.0*framesPerHostBuffer/sampleRate );
stream->pollingInterval = (uint32_t)ceil( 1000.0*framesPerHostBuffer/sampleRate );
assert( framesPerHostBuffer > 0 );
/* Open underlying streams, check formats and allocate buffers */
@ -1890,7 +1877,7 @@ static PaError CloseStream( PaStream *s )
/* Close HPI stream (freeing BBM host buffer in the process, if used) */
if( stream->input->hpiStream )
{
PA_ASIHPI_UNLESS_( HPI_InStreamClose( stream->input->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_InStreamClose( NULL,
stream->input->hpiStream ), paUnanticipatedHostError );
}
/* Free temp buffer and stream component */
@ -1902,7 +1889,7 @@ static PaError CloseStream( PaStream *s )
/* Close HPI stream (freeing BBM host buffer in the process, if used) */
if( stream->output->hpiStream )
{
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( stream->output->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( NULL,
stream->output->hpiStream ), paUnanticipatedHostError );
}
/* Free temp buffer and stream component */
@ -1933,9 +1920,6 @@ static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream )
PaAsiHpiStreamComponent *out;
PaUtilZeroer *zeroer;
PaSampleFormat outputFormat;
#if (HPI_VER < HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
HPI_DATA data;
#endif
assert( stream );
out = stream->output;
/* Only continue if stream has output channels */
@ -1944,28 +1928,19 @@ static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream )
assert( out->tempBuffer );
/* Clear all existing data in hardware playback buffer */
PA_ASIHPI_UNLESS_( HPI_OutStreamReset( out->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_OutStreamReset( NULL,
out->hpiStream ), paUnanticipatedHostError );
/* Fill temp buffer with silence */
outputFormat = PaAsiHpi_HpiToPaFormat( out->hpiFormat.wFormat );
zeroer = PaUtil_SelectZeroer( outputFormat );
zeroer(out->tempBuffer, 1, out->tempBufferSize / Pa_GetSampleSize(outputFormat) );
/* Write temp buffer to hardware fifo twice, to get started */
#if (HPI_VER >= HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( out->hpiDevice->subSys, out->hpiStream,
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( NULL, out->hpiStream,
out->tempBuffer, out->tempBufferSize, &out->hpiFormat),
paUnanticipatedHostError );
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( out->hpiDevice->subSys, out->hpiStream,
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( NULL, out->hpiStream,
out->tempBuffer, out->tempBufferSize, &out->hpiFormat),
paUnanticipatedHostError );
#else
PA_ASIHPI_UNLESS_( HPI_DataCreate( &data, &out->hpiFormat, out->tempBuffer, out->tempBufferSize ),
paUnanticipatedHostError );
PA_ASIHPI_UNLESS_( HPI_OutStreamWrite( out->hpiDevice->subSys,
out->hpiStream, &data ), paUnanticipatedHostError );
PA_ASIHPI_UNLESS_( HPI_OutStreamWrite( out->hpiDevice->subSys,
out->hpiStream, &data ), paUnanticipatedHostError );
#endif
error:
return result;
}
@ -1989,7 +1964,7 @@ static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed )
if( stream->input )
{
PA_ASIHPI_UNLESS_( HPI_InStreamStart( stream->input->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_InStreamStart( NULL,
stream->input->hpiStream ), paUnanticipatedHostError );
}
if( stream->output )
@ -1999,7 +1974,7 @@ static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed )
/* Buffer isn't primed, so load stream with silence */
PA_ENSURE_( PaAsiHpi_PrimeOutputWithSilence( stream ) );
}
PA_ASIHPI_UNLESS_( HPI_OutStreamStart( stream->output->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_OutStreamStart( NULL,
stream->output->hpiStream ), paUnanticipatedHostError );
}
stream->state = paAsiHpiActiveState;
@ -2071,7 +2046,7 @@ static PaError PaAsiHpi_StopStream( PaAsiHpiStream *stream, int abort )
/* Input channels */
if( stream->input )
{
PA_ASIHPI_UNLESS_( HPI_InStreamReset( stream->input->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_InStreamReset( NULL,
stream->input->hpiStream ), paUnanticipatedHostError );
}
/* Output channels */
@ -2097,7 +2072,7 @@ static PaError PaAsiHpi_StopStream( PaAsiHpiStream *stream, int abort )
Pa_Sleep( (long)ceil( timeLeft ) );
}
}
PA_ASIHPI_UNLESS_( HPI_OutStreamReset( stream->output->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_OutStreamReset( NULL,
stream->output->hpiStream ), paUnanticipatedHostError );
}
@ -2315,7 +2290,7 @@ static PaError PaAsiHpi_WaitForFrames( PaAsiHpiStream *stream, unsigned long *fr
PaError result = paNoError;
double sampleRate;
unsigned long framesTarget;
HW32 outputData = 0, outputSpace = 0, inputData = 0, framesLeft = 0;
uint32_t outputData = 0, outputSpace = 0, inputData = 0, framesLeft = 0;
assert( stream );
assert( stream->input || stream->output );
@ -2485,10 +2460,7 @@ static PaError PaAsiHpi_BeginProcessing( PaAsiHpiStream *stream, unsigned long *
{
PaAsiHpiStreamInfo info;
#if (HPI_VER < HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
HPI_DATA data;
#endif
HW32 framesToGet = *numFrames;
uint32_t framesToGet = *numFrames;
/* Check for overflows and underflows yet again */
PA_ENSURE_( PaAsiHpi_GetStreamInfo( stream->input, &info ) );
@ -2513,22 +2485,12 @@ static PaError PaAsiHpi_BeginProcessing( PaAsiHpiStream *stream, unsigned long *
stream->input->tempBufferSize / Pa_GetSampleSize(inputFormat) );
}
#if (HPI_VER >= HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
/* Read block of data into temp buffer */
PA_ASIHPI_UNLESS_( HPI_InStreamReadBuf( stream->input->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_InStreamReadBuf( NULL,
stream->input->hpiStream,
stream->input->tempBuffer,
framesToGet * stream->input->bytesPerFrame),
paUnanticipatedHostError );
#else
/* Setup HPI data structure around temp buffer */
HPI_DataCreate( &data, &stream->input->hpiFormat, stream->input->tempBuffer,
framesToGet * stream->input->bytesPerFrame );
/* Read block of data into temp buffer */
PA_ASIHPI_UNLESS_( HPI_InStreamRead( stream->input->hpiDevice->subSys,
stream->input->hpiStream, &data ),
paUnanticipatedHostError );
#endif
/* Register temp buffer with buffer processor (always FULL buffer) */
PaUtil_SetInputFrameCount( &stream->bufferProcessor, *numFrames );
/* HPI interface only allows interleaved channels */
@ -2572,9 +2534,6 @@ static PaError PaAsiHpi_EndProcessing( PaAsiHpiStream *stream, unsigned long num
if( stream->output )
{
PaAsiHpiStreamInfo info;
#if (HPI_VER < HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
HPI_DATA data;
#endif
/* Check for underflows after the (potentially time-consuming) callback */
PA_ENSURE_( PaAsiHpi_GetStreamInfo( stream->output, &info ) );
if( info.underflow )
@ -2582,23 +2541,13 @@ static PaError PaAsiHpi_EndProcessing( PaAsiHpiStream *stream, unsigned long num
*cbFlags |= paOutputUnderflow;
}
#if (HPI_VER >= HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
/* Write temp buffer to HPI stream */
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( stream->output->hpiDevice->subSys,
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( NULL,
stream->output->hpiStream,
stream->output->tempBuffer,
numFrames * stream->output->bytesPerFrame,
&stream->output->hpiFormat),
paUnanticipatedHostError );
#else
/* Setup HPI data structure around temp buffer */
HPI_DataCreate( &data, &stream->output->hpiFormat, stream->output->tempBuffer,
numFrames * stream->output->bytesPerFrame );
/* Write temp buffer to HPI stream */
PA_ASIHPI_UNLESS_( HPI_OutStreamWrite( stream->output->hpiDevice->subSys,
stream->output->hpiStream, &data ),
paUnanticipatedHostError );
#endif
}
error:

View File

@ -4,64 +4,56 @@ This document contains information to help you compile PortAudio with
ASIO support. If you find any omissions or errors in this document
please notify us on the PortAudio mailing list.
NOTE: The Macintosh sections of this document are provided for historical
reference. They refer to pre-OS X Macintosh. PortAudio no longer
supports pre-OS X Macintosh. Steinberg does not support ASIO on Mac OS X.
Building PortAudio with ASIO support
------------------------------------
To build PortAudio with ASIO support you need to compile and link with
pa_asio.c, and files from the ASIO SDK (see below), along with the common
files from src/common/ and platform specific files from src/os/win/ (for Win32)
or src/os/mac/ (for Macintosh).
PortAudio files from src/common/ and platform specific files from
src/os/win/ (for Win32).
If you are compiling with a non-Microsoft compiler on Windows, also
compile and link with iasiothiscallresolver.cpp (see below for
an explanation).
For some platforms (MingW, possibly Mac), you may simply
For some platforms (MingW, Cygwin/MingW), you may simply
be able to type:
./configure --with-host_os=mingw --with-winapi=asio [--with-asiodir=/usr/local/asiosdk2]
make
./configure --with-host_os=darwin --with-winapi=asio [--with-asiodir=/usr/local/asiosdk2]
make
and life will be good. Make sure you update the above with the correct local
path to the ASIO SDK.
For Microsoft Visual C++ there is an build tutorial here:
http://www.portaudio.com/trac/wiki/TutorialDir/Compile/WindowsASIOMSVC
and life will be good.
Obtaining the ASIO SDK
----------------------
In order to build PortAudio with ASIO support, you need to download
the ASIO SDK (version 2.0) from Steinberg. Steinberg makes the ASIO
the ASIO SDK (version 2.0 or later) from Steinberg. Steinberg makes the ASIO
SDK available to anyone free of charge, however they do not permit its
source code to be distributed.
NOTE: In some cases the ASIO SDK may require patching, see below
for further details.
http://www.steinberg.de/329+M52087573ab0.html
http://www.steinberg.net/en/company/developer.html
If the above link is broken search Google for:
"download steinberg ASIO SDK"
Building the ASIO SDK on Macintosh
----------------------------------
To build the ASIO SDK on Macintosh you need to compile and link with the
following files from the ASIO SDK:
host/asiodrivers.cpp
host/mac/asioshlib.cpp
host/mac/codefragements.cpp
You may also need to adjust your include paths to support inclusion of
header files from the above directories.
Building the ASIO SDK on Windows
--------------------------------
@ -103,12 +95,27 @@ If you use configure and make (see above), this should be handled
automatically for you.
For further information about the IASIO thiscall problem see this page:
http://www.audiomulch.com/~rossb/code/calliasio
http://www.rossbencina.com/code/iasio-thiscall-resolver
Macintosh ASIO SDK Bug Patch
----------------------------
Building the ASIO SDK on (Pre-OS X) Macintosh
---------------------------------------------
To build the ASIO SDK on Macintosh you need to compile and link with the
following files from the ASIO SDK:
host/asiodrivers.cpp
host/mac/asioshlib.cpp
host/mac/codefragements.cpp
You may also need to adjust your include paths to support inclusion of
header files from the above directories.
(Pre-OS X) Macintosh ASIO SDK Bug Patch
---------------------------------------
There is a bug in the ASIO SDK that causes the Macintosh version to
often fail during initialization. Below is a patch that you can apply.
@ -137,4 +144,4 @@ bool CodeFragments::getFrontProcessDirectory(void *specs)
}
---
###

View File

@ -1,5 +1,5 @@
/*
* $Id: pa_asio.cpp 1681 2011-05-10 15:58:15Z rossb $
* $Id: pa_asio.cpp 1778 2011-11-10 13:59:53Z rossb $
* Portable Audio I/O Library for ASIO Drivers
*
* Author: Stephane Letz
@ -1214,7 +1214,7 @@ PaError PaAsio_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex
if( foundDefaultSampleRate ){
/* calculate default latency values from bufferPreferredSize
for default low latency, and bufferPreferredSize * 3
for default low latency, and bufferMaxSize
for default high latency.
use the default sample rate to convert from samples to
seconds. Without knowing what sample rate the user will
@ -1227,14 +1227,8 @@ PaError PaAsio_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex
deviceInfo->defaultLowInputLatency = defaultLowLatency;
deviceInfo->defaultLowOutputLatency = defaultLowLatency;
long defaultHighLatencyBufferSize =
paAsioDriverInfo.bufferPreferredSize * 3;
if( defaultHighLatencyBufferSize > paAsioDriverInfo.bufferMaxSize )
defaultHighLatencyBufferSize = paAsioDriverInfo.bufferMaxSize;
double defaultHighLatency =
defaultHighLatencyBufferSize / deviceInfo->defaultSampleRate;
paAsioDriverInfo.bufferMaxSize / deviceInfo->defaultSampleRate;
if( defaultHighLatency < defaultLowLatency )
defaultHighLatency = defaultLowLatency; /* just in case the driver returns something strange */
@ -1629,33 +1623,60 @@ static void ZeroOutputBuffers( PaAsioStream *stream, long index )
}
static unsigned long SelectHostBufferSize( unsigned long suggestedLatencyFrames, unsigned long userFramesPerBuffer,
PaAsioDriverInfo *driverInfo )
/* return the next power of two >= x.
Returns the input parameter if it is already a power of two.
http://stackoverflow.com/questions/364985/algorithm-for-finding-the-smallest-power-of-two-thats-greater-or-equal-to-a-giv
*/
static unsigned long NextPowerOfTwo( unsigned long x )
{
--x;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
/* If you needed to deal with numbers > 2^32 the following would be needed.
For latencies, we don't deal with values this large.
x |= x >> 16;
*/
return x + 1;
}
static unsigned long SelectHostBufferSizeForUnspecifiedUserFramesPerBuffer(
unsigned long targetBufferingLatencyFrames, PaAsioDriverInfo *driverInfo )
{
/* Choose a host buffer size based only on targetBufferingLatencyFrames and the
device's supported buffer sizes. Always returns a valid value.
*/
unsigned long result;
if( suggestedLatencyFrames == 0 )
{
result = driverInfo->bufferPreferredSize;
}
else{
if( suggestedLatencyFrames <= (unsigned long)driverInfo->bufferMinSize )
if( targetBufferingLatencyFrames <= (unsigned long)driverInfo->bufferMinSize )
{
result = driverInfo->bufferMinSize;
}
else if( suggestedLatencyFrames >= (unsigned long)driverInfo->bufferMaxSize )
else if( targetBufferingLatencyFrames >= (unsigned long)driverInfo->bufferMaxSize )
{
result = driverInfo->bufferMaxSize;
}
else
{
if( driverInfo->bufferGranularity == -1 )
if( driverInfo->bufferGranularity == 0 ) /* single fixed host buffer size */
{
/* power-of-two */
result = 2;
/* The documentation states that bufferGranularity should be zero
when bufferMinSize, bufferMaxSize and bufferPreferredSize are the
same. We assume that is the case.
*/
while( result < suggestedLatencyFrames )
result *= 2;
result = driverInfo->bufferPreferredSize;
}
else if( driverInfo->bufferGranularity == -1 ) /* power-of-two */
{
/* We assume bufferMinSize and bufferMaxSize are powers of two. */
result = NextPowerOfTwo( targetBufferingLatencyFrames );
if( result < (unsigned long)driverInfo->bufferMinSize )
result = driverInfo->bufferMinSize;
@ -1663,36 +1684,150 @@ static unsigned long SelectHostBufferSize( unsigned long suggestedLatencyFrames,
if( result > (unsigned long)driverInfo->bufferMaxSize )
result = driverInfo->bufferMaxSize;
}
else if( driverInfo->bufferGranularity == 0 )
else /* modulo bufferGranularity */
{
/* the documentation states that bufferGranularity should be
zero when bufferMinSize, bufferMaxSize and
bufferPreferredSize are the same. We assume that is the case.
*/
/* round up to the next multiple of granularity */
unsigned long n = (targetBufferingLatencyFrames + driverInfo->bufferGranularity - 1)
/ driverInfo->bufferGranularity;
result = driverInfo->bufferPreferredSize;
}
else
{
/* modulo granularity */
result = n * driverInfo->bufferGranularity;
unsigned long remainder =
suggestedLatencyFrames % driverInfo->bufferGranularity;
if( remainder == 0 )
{
result = suggestedLatencyFrames;
}
else
{
result = suggestedLatencyFrames
+ (driverInfo->bufferGranularity - remainder);
if( result < (unsigned long)driverInfo->bufferMinSize )
result = driverInfo->bufferMinSize;
if( result > (unsigned long)driverInfo->bufferMaxSize )
result = driverInfo->bufferMaxSize;
}
}
return result;
}
static unsigned long SelectHostBufferSizeForSpecifiedUserFramesPerBuffer(
unsigned long targetBufferingLatencyFrames, unsigned long userFramesPerBuffer,
PaAsioDriverInfo *driverInfo )
{
/* Select a host buffer size conforming to targetBufferingLatencyFrames
and the device's supported buffer sizes.
The return value will always be a multiple of userFramesPerBuffer.
If a valid buffer size can not be found the function returns 0.
The current implementation uses a simple iterative search for clarity.
Feel free to suggest a closed form solution.
*/
unsigned long result = 0;
assert( userFramesPerBuffer != 0 );
if( driverInfo->bufferGranularity == 0 ) /* single fixed host buffer size */
{
/* The documentation states that bufferGranularity should be zero
when bufferMinSize, bufferMaxSize and bufferPreferredSize are the
same. We assume that is the case.
*/
if( (driverInfo->bufferPreferredSize % userFramesPerBuffer) == 0 )
result = driverInfo->bufferPreferredSize;
}
else if( driverInfo->bufferGranularity == -1 ) /* power-of-two */
{
/* We assume bufferMinSize and bufferMaxSize are powers of two. */
/* Search all powers of two in the range [bufferMinSize,bufferMaxSize]
for multiples of userFramesPerBuffer. We prefer the first multiple
that is equal or greater than targetBufferingLatencyFrames, or
failing that, the largest multiple less than
targetBufferingLatencyFrames.
*/
unsigned long x = (unsigned long)driverInfo->bufferMinSize;
do {
if( (x % userFramesPerBuffer) == 0 )
{
/* any power-of-two multiple of userFramesPerBuffer is acceptable */
result = x;
if( result >= targetBufferingLatencyFrames )
break; /* stop. a value >= to targetBufferingLatencyFrames is ideal. */
}
x *= 2;
} while( x <= (unsigned long)driverInfo->bufferMaxSize );
}
else /* modulo granularity */
{
/* We assume bufferMinSize is a multiple of bufferGranularity. */
/* Search all multiples of bufferGranularity in the range
[bufferMinSize,bufferMaxSize] for multiples of userFramesPerBuffer.
We prefer the first multiple that is equal or greater than
targetBufferingLatencyFrames, or failing that, the largest multiple
less than targetBufferingLatencyFrames.
*/
unsigned long x = (unsigned long)driverInfo->bufferMinSize;
do {
if( (x % userFramesPerBuffer) == 0 )
{
/* any power-of-two multiple of userFramesPerBuffer is acceptable */
result = x;
if( result >= targetBufferingLatencyFrames )
break; /* stop. a value >= to targetBufferingLatencyFrames is ideal. */
}
x += driverInfo->bufferGranularity;
} while( x <= (unsigned long)driverInfo->bufferMaxSize );
}
return result;
}
static unsigned long SelectHostBufferSize(
unsigned long targetBufferingLatencyFrames,
unsigned long userFramesPerBuffer, PaAsioDriverInfo *driverInfo )
{
unsigned long result = 0;
/* We select a host buffer size based on the following requirements
(in priority order):
1. The host buffer size must be permissible according to the ASIO
driverInfo buffer size constraints (min, max, granularity or
powers-of-two).
2. If the user specifies a non-zero framesPerBuffer parameter
(userFramesPerBuffer here) the host buffer should be a multiple of
this (subject to the constraints in (1) above).
[NOTE: Where no permissible host buffer size is a multiple of
userFramesPerBuffer, we choose a value as if userFramesPerBuffer were
zero (i.e. we ignore it). This strategy is open for review ~ perhaps
there are still "more optimal" buffer sizes related to
userFramesPerBuffer that we could use.]
3. The host buffer size should be greater than or equal to
targetBufferingLatencyFrames, subject to (1) and (2) above. Where it
is not possible to select a host buffer size equal or greater than
targetBufferingLatencyFrames, the highest buffer size conforming to
(1) and (2) should be chosen.
*/
if( userFramesPerBuffer != 0 )
{
/* userFramesPerBuffer is specified, try to find a buffer size that's
a multiple of it */
result = SelectHostBufferSizeForSpecifiedUserFramesPerBuffer(
targetBufferingLatencyFrames, userFramesPerBuffer, driverInfo );
}
if( result == 0 )
{
/* either userFramesPerBuffer was not specified, or we couldn't find a
host buffer size that is a multiple of it. Select a host buffer size
according to targetBufferingLatencyFrames and the ASIO driverInfo
buffer size constraints.
*/
result = SelectHostBufferSizeForUnspecifiedUserFramesPerBuffer(
targetBufferingLatencyFrames, driverInfo );
}
return result;
@ -2101,10 +2236,29 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
}
else /* Using callback interface... */
{
framesPerHostBuffer = SelectHostBufferSize(
/* Select the host buffer size based on user framesPerBuffer and the
maximum of suggestedInputLatencyFrames and
suggestedOutputLatencyFrames.
We should subtract any fixed known driver latency from
suggestedLatencyFrames before computing the host buffer size.
However, the ASIO API doesn't provide a method for determining fixed
latencies independent of the host buffer size. ASIOGetLatencies()
only returns latencies after the buffer size has been configured, so
we can't reliably use it to determine fixed latencies here.
We could set the preferred buffer size and then subtract it from
the values returned from ASIOGetLatencies, but this would not be 100%
reliable, so we don't do it.
*/
unsigned long targetBufferingLatencyFrames =
(( suggestedInputLatencyFrames > suggestedOutputLatencyFrames )
? suggestedInputLatencyFrames : suggestedOutputLatencyFrames), framesPerBuffer,
driverInfo );
? suggestedInputLatencyFrames
: suggestedOutputLatencyFrames);
framesPerHostBuffer = SelectHostBufferSize( targetBufferingLatencyFrames,
framesPerBuffer, driverInfo );
}
@ -2254,7 +2408,8 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
stream->outputBufferConverter = 0;
}
/* Values returned by ASIOGetLatencies() include the latency introduced by
the ASIO double buffer. */
ASIOGetLatencies( &stream->asioInputLatencyFrames, &stream->asioOutputLatencyFrames );

View File

@ -990,18 +990,19 @@ static void UpdateTimeStampOffsets( PaMacCoreStream *stream )
}
/* ================================================================================= */
/* Query sample rate property. */
static OSStatus UpdateSampleRateFromDeviceProperty( PaMacCoreStream *stream, AudioDeviceID deviceID, Boolean isInput )
/* can be used to update from nominal or actual sample rate */
static OSStatus UpdateSampleRateFromDeviceProperty( PaMacCoreStream *stream, AudioDeviceID deviceID, Boolean isInput, AudioDevicePropertyID sampleRatePropertyID )
{
PaMacCoreDeviceProperties * deviceProperties = isInput ? &stream->inputProperties : &stream->outputProperties;
/* FIXME: not sure if this should be the sample rate of the output device or the output unit */
Float64 actualSampleRate = deviceProperties->sampleRate;
Float64 sampleRate = 0.0;
UInt32 propSize = sizeof(Float64);
OSStatus osErr = AudioDeviceGetProperty( deviceID, 0, isInput, kAudioDevicePropertyActualSampleRate, &propSize, &actualSampleRate);
if( (osErr == noErr) && (actualSampleRate > 1000.0) ) // avoid divide by zero if there's an error
OSStatus osErr = AudioDeviceGetProperty( deviceID, 0, isInput, sampleRatePropertyID, &propSize, &sampleRate);
if( (osErr == noErr) && (sampleRate > 1000.0) ) /* avoid divide by zero if there's an error */
{
deviceProperties->sampleRate = actualSampleRate;
deviceProperties->samplePeriod = 1.0 / actualSampleRate;
deviceProperties->sampleRate = sampleRate;
deviceProperties->samplePeriod = 1.0 / sampleRate;
}
return osErr;
}
@ -1013,7 +1014,7 @@ static OSStatus AudioDevicePropertyActualSampleRateListenerProc( AudioDeviceID i
// Make sure the callback is operating on a stream that is still valid!
assert( stream->streamRepresentation.magic == PA_STREAM_MAGIC );
OSStatus osErr = UpdateSampleRateFromDeviceProperty( stream, inDevice, isInput );
OSStatus osErr = UpdateSampleRateFromDeviceProperty( stream, inDevice, isInput, kAudioDevicePropertyActualSampleRate );
if( osErr == noErr )
{
UpdateTimeStampOffsets( stream );
@ -1077,9 +1078,6 @@ static OSStatus SetupDevicePropertyListeners( PaMacCoreStream *stream, AudioDevi
OSStatus osErr = noErr;
PaMacCoreDeviceProperties *deviceProperties = isInput ? &stream->inputProperties : &stream->outputProperties;
// Start with the current values for the device properties.
UpdateSampleRateFromDeviceProperty( stream, deviceID, isInput );
if( (osErr = QueryUInt32DeviceProperty( deviceID, isInput,
kAudioDevicePropertyLatency, &deviceProperties->deviceLatency )) != noErr ) return osErr;
if( (osErr = QueryUInt32DeviceProperty( deviceID, isInput,
@ -1579,7 +1577,7 @@ static UInt32 CalculateOptimalBufferSize( PaMacAUHAL *auhalHostApi,
double sampleRate,
UInt32 requestedFramesPerBuffer )
{
UInt32 suggested = 0;
UInt32 resultBufferSizeFrames = 0;
// Use maximum of suggested input and output latencies.
if( inputParameters )
{
@ -1588,28 +1586,25 @@ static UInt32 CalculateOptimalBufferSize( PaMacAUHAL *auhalHostApi,
SInt32 variableLatencyFrames = suggestedLatencyFrames - fixedInputLatency;
// Prevent negative latency.
variableLatencyFrames = MAX( variableLatencyFrames, 0 );
suggested = MAX( suggested, (UInt32) variableLatencyFrames );
resultBufferSizeFrames = MAX( resultBufferSizeFrames, (UInt32) variableLatencyFrames );
}
if( outputParameters )
{
UInt32 suggestedLatencyFrames = outputParameters->suggestedLatency * sampleRate;
SInt32 variableLatencyFrames = suggestedLatencyFrames - fixedOutputLatency;
variableLatencyFrames = MAX( variableLatencyFrames, 0 );
suggested = MAX( suggested, (UInt32) variableLatencyFrames );
resultBufferSizeFrames = MAX( resultBufferSizeFrames, (UInt32) variableLatencyFrames );
}
VDBUG( ("Block Size unspecified. Based on Latency, the user wants a Block Size near: %ld.\n",
suggested ) );
if( requestedFramesPerBuffer != paFramesPerBufferUnspecified )
{
if( suggested > (requestedFramesPerBuffer + 1) )
{
// If the user asks for higher latency than the requested buffer size would provide
// then put multiple user buffers in one host buffer.
UInt32 userBuffersPerHostBuffer = (suggested + (requestedFramesPerBuffer - 1)) / requestedFramesPerBuffer;
suggested = userBuffersPerHostBuffer * requestedFramesPerBuffer;
}
// make host buffer the next highest integer multiple of user frames per buffer
UInt32 n = (resultBufferSizeFrames + requestedFramesPerBuffer - 1) / requestedFramesPerBuffer;
resultBufferSizeFrames = n * requestedFramesPerBuffer;
}else{
VDBUG( ("Block Size unspecified. Based on Latency, the user wants a Block Size near: %ld.\n",
resultBufferSizeFrames ) );
}
// Clip to the capabilities of the device.
@ -1617,16 +1612,16 @@ static UInt32 CalculateOptimalBufferSize( PaMacAUHAL *auhalHostApi,
{
ClipToDeviceBufferSize( auhalHostApi->devIds[inputParameters->device],
true, // In the old code isInput was false!
suggested, &suggested );
resultBufferSizeFrames, &resultBufferSizeFrames );
}
if( outputParameters )
{
ClipToDeviceBufferSize( auhalHostApi->devIds[outputParameters->device],
false, suggested, &suggested );
false, resultBufferSizeFrames, &resultBufferSizeFrames );
}
VDBUG(("After querying hardware, setting block size to %ld.\n", suggested));
VDBUG(("After querying hardware, setting block size to %ld.\n", resultBufferSizeFrames));
return suggested;
return resultBufferSizeFrames;
}
/* =================================================================================================== */
@ -1997,52 +1992,47 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
stream->streamRepresentation.streamInfo.sampleRate = sampleRate;
stream->sampleRate = sampleRate;
stream->outDeviceSampleRate = 0;
if( stream->outputUnit ) {
Float64 rate;
UInt32 size = sizeof( rate );
result = ERR( AudioDeviceGetProperty( stream->outputDevice,
0,
FALSE,
kAudioDevicePropertyNominalSampleRate,
&size, &rate ) );
if( result )
goto error;
stream->outDeviceSampleRate = rate;
}
stream->inDeviceSampleRate = 0;
if( stream->inputUnit ) {
Float64 rate;
UInt32 size = sizeof( rate );
result = ERR( AudioDeviceGetProperty( stream->inputDevice,
0,
TRUE,
kAudioDevicePropertyNominalSampleRate,
&size, &rate ) );
if( result )
goto error;
stream->inDeviceSampleRate = rate;
}
stream->userInChan = inputChannelCount;
stream->userOutChan = outputChannelCount;
// Setup property listeners for timestamp and latency calculations.
pthread_mutex_init( &stream->timingInformationMutex, NULL );
stream->timingInformationMutexIsInitialized = 1;
InitializeDeviceProperties( &stream->inputProperties );
InitializeDeviceProperties( &stream->outputProperties );
InitializeDeviceProperties( &stream->inputProperties ); // zeros the struct. doesn't actually init it to useful values
InitializeDeviceProperties( &stream->outputProperties ); // zeros the struct. doesn't actually init it to useful values
if( stream->outputUnit )
{
Boolean isInput = FALSE;
// Start with the current values for the device properties.
// Init with nominal sample rate. Use actual sample rate where available
result = ERR( UpdateSampleRateFromDeviceProperty(
stream, stream->outputDevice, isInput, kAudioDevicePropertyNominalSampleRate ) );
if( result )
goto error; /* fail if we can't even get a nominal device sample rate */
UpdateSampleRateFromDeviceProperty( stream, stream->outputDevice, isInput, kAudioDevicePropertyActualSampleRate );
SetupDevicePropertyListeners( stream, stream->outputDevice, isInput );
}
if( stream->inputUnit )
{
Boolean isInput = TRUE;
// as above
result = ERR( UpdateSampleRateFromDeviceProperty(
stream, stream->inputDevice, isInput, kAudioDevicePropertyNominalSampleRate ) );
if( result )
goto error;
UpdateSampleRateFromDeviceProperty( stream, stream->inputDevice, isInput, kAudioDevicePropertyActualSampleRate );
SetupDevicePropertyListeners( stream, stream->inputDevice, isInput );
}
UpdateTimeStampOffsets( stream );
// Setup copies to be used by audio callback.
// Setup timestamp copies to be used by audio callback.
stream->timestampOffsetCombined_ioProcCopy = stream->timestampOffsetCombined;
stream->timestampOffsetInputDevice_ioProcCopy = stream->timestampOffsetInputDevice;
stream->timestampOffsetOutputDevice_ioProcCopy = stream->timestampOffsetOutputDevice;

View File

@ -120,7 +120,11 @@ typedef struct PaMacCoreDeviceProperties
UInt32 bufferFrameSize;
// UInt32 streamLatency; // Seems to be the same as deviceLatency!?
UInt32 deviceLatency;
/* Current device sample rate. May change! */
/* Current device sample rate. May change!
These are initialized to the nominal device sample rate,
and updated with the actual sample rate, when/where available.
Note that these are the *device* sample rates, prior to any required
SR conversion. */
Float64 sampleRate;
Float64 samplePeriod; // reciprocal
}
@ -166,10 +170,6 @@ typedef struct PaMacCoreStream
ACTIVE = 3 /* The stream is active and running. */
} state;
double sampleRate;
//these may be different from the stream sample rate due to SR conversion:
double outDeviceSampleRate;
double inDeviceSampleRate;
PaMacCoreDeviceProperties inputProperties;
PaMacCoreDeviceProperties outputProperties;

View File

@ -1,5 +1,5 @@
/*
* $Id: pa_win_ds.c 1744 2011-08-25 15:59:32Z rossb $
* $Id: pa_win_ds.c 1794 2011-11-24 18:11:33Z rossb $
* Portable Audio I/O Library DirectSound implementation
*
* Authors: Phil Burk, Robert Marsanyi & Ross Bencina
@ -152,6 +152,13 @@ PA_THREAD_FUNC ProcessingThreadProc( void *pArg );
#define PA_DS_WIN_WDM_DEFAULT_LATENCY_ (.120)
/* we allow the polling period to range between 1 and 100ms.
prior to August 2011 we limited the minimum polling period to 10ms.
*/
#define PA_DS_MINIMUM_POLLING_PERIOD_SECONDS (0.001) /* 1ms */
#define PA_DS_MAXIMUM_POLLING_PERIOD_SECONDS (0.100) /* 100ms */
#define PA_DS_POLLING_JITTER_SECONDS (0.001) /* 1ms */
#define SECONDS_PER_MSEC (0.001)
#define MSECS_PER_SECOND (1000)
@ -1336,6 +1343,13 @@ static PaError ValidateWinDirectSoundSpecificStreamInfo(
{
return paIncompatibleHostApiSpecificStreamInfo;
}
if( streamInfo->flags & paWinDirectSoundUseLowLevelLatencyParameters )
{
if( streamInfo->framesPerBuffer <= 0 )
return paIncompatibleHostApiSpecificStreamInfo;
}
}
return paNoError;
@ -1541,7 +1555,13 @@ static HRESULT InitFullDuplexInputOutputBuffers( PaWinDsStream *stream,
#endif /* PAWIN_USE_DIRECTSOUNDFULLDUPLEXCREATE */
static HRESULT InitInputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device, PaSampleFormat sampleFormat, unsigned long nFrameRate, WORD nChannels, int bytesPerBuffer, PaWinWaveFormatChannelMask channelMask )
static HRESULT InitInputBuffer( PaWinDsStream *stream,
PaWinDsDeviceInfo *device,
PaSampleFormat sampleFormat,
unsigned long nFrameRate,
WORD nChannels,
int bytesPerBuffer,
PaWinWaveFormatChannelMask channelMask )
{
DSCBUFFERDESC captureDesc;
PaWinWaveFormat waveFormat;
@ -1582,7 +1602,10 @@ static HRESULT InitInputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device
}
static HRESULT InitOutputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device, PaSampleFormat sampleFormat, unsigned long nFrameRate, WORD nChannels, int bytesPerBuffer, PaWinWaveFormatChannelMask channelMask )
static HRESULT InitOutputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device,
PaSampleFormat sampleFormat, unsigned long nFrameRate,
WORD nChannels, int bytesPerBuffer,
PaWinWaveFormatChannelMask channelMask )
{
HRESULT result;
HWND hWnd;
@ -1680,18 +1703,15 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
unsigned long suggestedOutputLatencyFrames,
double sampleRate, unsigned long userFramesPerBuffer )
{
/* we allow the polling period to range between 1 and 100ms.
prior to August 2011 we limited the minimum polling period to 10ms.
*/
unsigned long minimumPollingPeriodFrames = sampleRate / 1000; /* 1ms */
unsigned long maximumPollingPeriodFrames = sampleRate / 10; /* 100ms */
unsigned long pollingJitterFrames = sampleRate / 1000; /* 1ms */
unsigned long minimumPollingPeriodFrames = sampleRate * PA_DS_MINIMUM_POLLING_PERIOD_SECONDS;
unsigned long maximumPollingPeriodFrames = sampleRate * PA_DS_MAXIMUM_POLLING_PERIOD_SECONDS;
unsigned long pollingJitterFrames = sampleRate * PA_DS_POLLING_JITTER_SECONDS;
if( userFramesPerBuffer == paFramesPerBufferUnspecified )
{
unsigned long suggestedLatencyFrames = max( suggestedInputLatencyFrames, suggestedOutputLatencyFrames );
unsigned long targetBufferingLatencyFrames = max( suggestedInputLatencyFrames, suggestedOutputLatencyFrames );
*pollingPeriodFrames = suggestedLatencyFrames / 4;
*pollingPeriodFrames = targetBufferingLatencyFrames / 4;
if( *pollingPeriodFrames < minimumPollingPeriodFrames )
{
*pollingPeriodFrames = minimumPollingPeriodFrames;
@ -1702,14 +1722,14 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
}
*hostBufferSizeFrames = *pollingPeriodFrames
+ max( *pollingPeriodFrames + pollingJitterFrames, suggestedLatencyFrames);
+ max( *pollingPeriodFrames + pollingJitterFrames, targetBufferingLatencyFrames);
}
else
{
unsigned long suggestedLatencyFrames = suggestedInputLatencyFrames;
unsigned long targetBufferingLatencyFrames = suggestedInputLatencyFrames;
if( isFullDuplex )
{
/* in full duplex streams we know that the buffer adapter adds userFramesPerBuffer
/* In full duplex streams we know that the buffer adapter adds userFramesPerBuffer
extra fixed latency. so we subtract it here as a fixed latency before computing
the buffer size. being careful not to produce an unrepresentable negative result.
@ -1723,21 +1743,21 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
suggestedOutputLatencyFrames - userFramesPerBuffer;
/* maximum of input and adjusted output suggested latency */
if( adjustedSuggestedOutputLatencyFrames > suggestedInputLatencyFrames )
suggestedLatencyFrames = adjustedSuggestedOutputLatencyFrames;
if( adjustedSuggestedOutputLatencyFrames > targetBufferingLatencyFrames )
targetBufferingLatencyFrames = adjustedSuggestedOutputLatencyFrames;
}
}
else
{
/* maximum of input and output suggested latency */
if( suggestedOutputLatencyFrames > suggestedInputLatencyFrames )
suggestedLatencyFrames = suggestedOutputLatencyFrames;
targetBufferingLatencyFrames = suggestedOutputLatencyFrames;
}
*hostBufferSizeFrames = userFramesPerBuffer
+ max( userFramesPerBuffer + pollingJitterFrames, suggestedLatencyFrames);
+ max( userFramesPerBuffer + pollingJitterFrames, targetBufferingLatencyFrames);
*pollingPeriodFrames = max( max(1, userFramesPerBuffer / 4), suggestedLatencyFrames / 16 );
*pollingPeriodFrames = max( max(1, userFramesPerBuffer / 4), targetBufferingLatencyFrames / 16 );
if( *pollingPeriodFrames > maximumPollingPeriodFrames )
{
@ -1747,6 +1767,23 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
}
static void CalculatePollingPeriodFrames( unsigned long hostBufferSizeFrames,
unsigned long *pollingPeriodFrames,
double sampleRate, unsigned long userFramesPerBuffer )
{
unsigned long minimumPollingPeriodFrames = sampleRate * PA_DS_MINIMUM_POLLING_PERIOD_SECONDS;
unsigned long maximumPollingPeriodFrames = sampleRate * PA_DS_MAXIMUM_POLLING_PERIOD_SECONDS;
unsigned long pollingJitterFrames = sampleRate * PA_DS_POLLING_JITTER_SECONDS;
*pollingPeriodFrames = max( max(1, userFramesPerBuffer / 4), hostBufferSizeFrames / 16 );
if( *pollingPeriodFrames > maximumPollingPeriodFrames )
{
*pollingPeriodFrames = maximumPollingPeriodFrames;
}
}
static void SetStreamInfoLatencies( PaWinDsStream *stream,
unsigned long userFramesPerBuffer,
unsigned long pollingPeriodFrames,
@ -1808,6 +1845,8 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
int inputChannelCount, outputChannelCount;
PaSampleFormat inputSampleFormat, outputSampleFormat;
PaSampleFormat hostInputSampleFormat, hostOutputSampleFormat;
int userRequestedHostInputBufferSizeFrames = 0;
int userRequestedHostOutputBufferSizeFrames = 0;
unsigned long suggestedInputLatencyFrames, suggestedOutputLatencyFrames;
PaWinDirectSoundStreamInfo *inputStreamInfo, *outputStreamInfo;
PaWinWaveFormatChannelMask inputChannelMask, outputChannelMask;
@ -1840,6 +1879,9 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
result = ValidateWinDirectSoundSpecificStreamInfo( inputParameters, inputStreamInfo );
if( result != paNoError ) return result;
if( inputStreamInfo && inputStreamInfo->flags & paWinDirectSoundUseLowLevelLatencyParameters )
userRequestedHostInputBufferSizeFrames = inputStreamInfo->framesPerBuffer;
if( inputStreamInfo && inputStreamInfo->flags & paWinDirectSoundUseChannelMask )
inputChannelMask = inputStreamInfo->channelMask;
else
@ -1877,6 +1919,9 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
result = ValidateWinDirectSoundSpecificStreamInfo( outputParameters, outputStreamInfo );
if( result != paNoError ) return result;
if( outputStreamInfo && outputStreamInfo->flags & paWinDirectSoundUseLowLevelLatencyParameters )
userRequestedHostOutputBufferSizeFrames = outputStreamInfo->framesPerBuffer;
if( outputStreamInfo && outputStreamInfo->flags & paWinDirectSoundUseChannelMask )
outputChannelMask = outputStreamInfo->channelMask;
else
@ -1889,6 +1934,16 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
suggestedOutputLatencyFrames = 0;
}
/*
If low level host buffer size is specified for both input and output
the current code requires the sizes to match.
*/
if( (userRequestedHostInputBufferSizeFrames > 0 && userRequestedHostOutputBufferSizeFrames > 0)
&& userRequestedHostInputBufferSizeFrames != userRequestedHostOutputBufferSizeFrames )
return paIncompatibleHostApiSpecificStreamInfo;
/*
IMPLEMENT ME:
@ -2027,14 +2082,34 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
/* set up i/o parameters */
if( userRequestedHostInputBufferSizeFrames > 0 || userRequestedHostOutputBufferSizeFrames > 0 )
{
/* use low level parameters */
/* since we use the same host buffer size for input and output
we choose the highest user specified value.
*/
stream->hostBufferSizeFrames = max( userRequestedHostInputBufferSizeFrames, userRequestedHostOutputBufferSizeFrames );
CalculatePollingPeriodFrames(
stream->hostBufferSizeFrames, &pollingPeriodFrames,
sampleRate, framesPerBuffer );
}
else
{
CalculateBufferSettings( &stream->hostBufferSizeFrames, &pollingPeriodFrames,
/* isFullDuplex = */ (inputParameters && outputParameters),
suggestedInputLatencyFrames,
suggestedOutputLatencyFrames,
sampleRate, framesPerBuffer );
}
stream->pollingPeriodSeconds = pollingPeriodFrames / sampleRate;
DBUG(("DirectSound host buffer size frames: %d, polling period seconds: %f, @ sr: %f\n",
stream->hostBufferSizeFrames, stream->pollingPeriodSeconds, sampleRate ));
/* ------------------ OUTPUT */
if( outputParameters )
{
@ -2348,7 +2423,8 @@ static int TimeSlice( PaWinDsStream *stream )
long bytesProcessed;
HRESULT hresult;
double outputLatency = 0;
PaStreamCallbackTimeInfo timeInfo = {0,0,0}; /** @todo implement inputBufferAdcTime */
double inputLatency = 0;
PaStreamCallbackTimeInfo timeInfo = {0,0,0};
/* Input */
LPBYTE lpInBuf1 = NULL;
@ -2377,11 +2453,12 @@ static int TimeSlice( PaWinDsStream *stream )
filled = readPos - stream->readOffset;
if( filled < 0 ) filled += stream->inputBufferSizeBytes; // unwrap offset
bytesFilled = filled;
inputLatency = ((double)bytesFilled) * stream->secondsPerHostByte;
}
// FIXME: what happens if IDirectSoundCaptureBuffer_GetCurrentPosition fails?
framesToXfer = numInFramesReady = bytesFilled / stream->inputFrameSizeBytes;
outputLatency = ((double)bytesFilled) * stream->secondsPerHostByte; // FIXME: this doesn't look right. we're calculating output latency in input branch. also secondsPerHostByte is only initialized for the output stream
/** @todo Check for overflow */
}
@ -2396,6 +2473,14 @@ static int TimeSlice( PaWinDsStream *stream )
/* Check for underflow */
if( stream->outputUnderflowCount != previousUnderflowCount )
stream->callbackFlags |= paOutputUnderflow;
/* We are about to compute audio into the first byte of empty space in the output buffer.
This audio will reach the DAC after all of the current (non-empty) audio
in the buffer has played. Therefore the output time is the current time
plus the time it takes to play the non-empty bytes in the buffer,
computed here:
*/
outputLatency = ((double)(stream->outputBufferSizeBytes - bytesEmpty)) * stream->secondsPerHostByte;
}
/* if it's a full duplex stream, set framesToXfer to the minimum of input and output frames ready */
@ -2411,8 +2496,6 @@ static int TimeSlice( PaWinDsStream *stream )
/* The outputBufferDacTime parameter should indicates the time at which
the first sample of the output buffer is heard at the DACs. */
timeInfo.currentTime = PaUtil_GetTime();
timeInfo.outputBufferDacTime = timeInfo.currentTime + outputLatency; // FIXME: QueryOutputSpace gets the playback position, we could use that (?)
PaUtil_BeginBufferProcessing( &stream->bufferProcessor, &timeInfo, stream->callbackFlags );
stream->callbackFlags = 0;
@ -2420,6 +2503,8 @@ static int TimeSlice( PaWinDsStream *stream )
/* Input */
if( stream->bufferProcessor.inputChannelCount > 0 )
{
timeInfo.inputBufferAdcTime = timeInfo.currentTime - inputLatency;
bytesToXfer = framesToXfer * stream->inputFrameSizeBytes;
hresult = IDirectSoundCaptureBuffer_Lock ( stream->pDirectSoundInputBuffer,
stream->readOffset, bytesToXfer,
@ -2449,6 +2534,13 @@ static int TimeSlice( PaWinDsStream *stream )
/* Output */
if( stream->bufferProcessor.outputChannelCount > 0 )
{
/*
We don't currently add outputLatency here because it appears to produce worse
results than non adding it. Need to do more testing to verify this.
*/
/* timeInfo.outputBufferDacTime = timeInfo.currentTime + outputLatency; */
timeInfo.outputBufferDacTime = timeInfo.currentTime;
bytesToXfer = framesToXfer * stream->outputFrameSizeBytes;
hresult = IDirectSoundBuffer_Lock ( stream->pDirectSoundOutputBuffer,
stream->outputBufferWriteOffsetBytes, bytesToXfer,

View File

@ -125,13 +125,13 @@
#endif
#ifdef _MSC_VER
#define NOMMIDS
//#define NOMMIDS
#define DYNAMIC_GUID(data) {data}
#define _NTRTL_ /* Turn off default definition of DEFINE_GUIDEX */
#undef DEFINE_GUID
#define DEFINE_GUID(n,data) EXTERN_C const GUID n = {data}
#define DEFINE_GUID_THUNK(n,data) DEFINE_GUID(n,data)
#define DEFINE_GUIDEX(n) DEFINE_GUID_THUNK(n, STATIC_##n)
//#define _NTRTL_ /* Turn off default definition of DEFINE_GUIDEX */
//#undef DEFINE_GUID
//#define DEFINE_GUID(n,data) EXTERN_C const GUID n = {data}
//#define DEFINE_GUID_THUNK(n,data) DEFINE_GUID(n,data)
//#define DEFINE_GUIDEX(n) DEFINE_GUID_THUNK(n, STATIC_##n)
#endif
#include <mmreg.h>

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/bin/sh
flags=""
args="$@"

View File

@ -135,9 +135,11 @@ void SaveStateBase::gifPathFreeze(u32 path) {
pxAssertDev(!gifPath.gsPack.readAmount, "GS Pack readAmount should be 0!");
pxAssertDev(!gifPath.GetPendingGSPackets(), "MTVU GS Pack Queue should be 0!");
if (!gifPath.isMTVU()) { // FixMe: savestate freeze bug (Gust games) with MTVU enabled
if (IsSaving()) { // Move all the buffered data to the start of buffer
gifPath.RealignPacket(); // May add readAmount which we need to clear on load
}
}
u8* bufferPtr = gifPath.buffer; // Backup current buffer ptr
Freeze(gifPath.mtvu.fakePackets);
FreezeMem(&gifPath, sizeof(gifPath) - sizeof(gifPath.mtvu));

View File

@ -176,6 +176,9 @@ SaveStateBase& SaveStateBase::FreezeMainMemory()
SaveStateBase& SaveStateBase::FreezeInternals()
{
vu1Thread.WaitVU(); // Finish VU1 just in-case...
// Print this until the MTVU problem in gifPathFreeze is taken care of (rama)
if (THREAD_VU1) Console.Warning("MTVU speedhack is enabled, saved states may not be stable");
if (IsLoading()) PreLoadPrep();
// Second Block - Various CPU Registers and States

View File

@ -870,6 +870,7 @@ bool AppConfig::IsOkApplyPreset(int n)
//Have some original and default values at hand to be used later.
Pcsx2Config::GSOptions original_GS = EmuOptions.GS;
AppConfig::FramerateOptions original_Framerate = Framerate;
AppConfig default_AppConfig;
Pcsx2Config default_Pcsx2Config;
@ -878,7 +879,7 @@ bool AppConfig::IsOkApplyPreset(int n)
// 1. The panels/entities should prevent manual modifications (by graying out) of settings which the presets control.
// 2. The panels should not apply values which the presets don't control if the value is initiated by a preset.
// Currently controlled by the presets:
// - AppConfig: Framerate, EnableSpeedHacks, EnableGameFixes.
// - AppConfig: Framerate (except turbo/slowmo factors), EnableSpeedHacks, EnableGameFixes.
// - EmuOptions: Cpu, Gamefixes, SpeedHacks, EnablePatches, GS (except for FrameLimitEnable, VsyncEnable and ManagedVsync).
//
// This essentially currently covers all the options on all the panels except for framelimiter which isn't
@ -891,6 +892,9 @@ bool AppConfig::IsOkApplyPreset(int n)
//Force some settings as a (current) base for all presets.
Framerate = default_AppConfig.Framerate;
Framerate.SlomoScalar = original_Framerate.SlomoScalar;
Framerate.TurboScalar = original_Framerate.TurboScalar;
EnableSpeedHacks = false;
EnableGameFixes = false;

View File

@ -114,19 +114,23 @@ void Panels::FramelimiterPanel::ApplyConfigToGui( AppConfig& configToApply, int
const AppConfig::FramerateOptions& appfps( configToApply.Framerate );
const Pcsx2Config::GSOptions& gsconf( configToApply.EmuOptions.GS );
if( ! (flags & AppConfig::APPLY_FLAG_FROM_PRESET) ) //Presets don't control this: only change if config doesn't come from preset.
if( ! (flags & AppConfig::APPLY_FLAG_FROM_PRESET) ){ //Presets don't control these: only change if config doesn't come from preset.
m_check_LimiterDisable->SetValue( !gsconf.FrameLimitEnable );
m_spin_NominalPct ->SetValue( appfps.NominalScalar.Raw );
m_spin_TurboPct ->SetValue( appfps.TurboScalar.Raw );
m_spin_SlomoPct ->SetValue( appfps.SlomoScalar.Raw );
m_spin_TurboPct ->Enable( 1 );
m_spin_SlomoPct ->Enable( 1 );
}
m_text_BaseNtsc ->ChangeValue( gsconf.FramerateNTSC.ToString() );
m_text_BasePal ->ChangeValue( gsconf.FrameratePAL.ToString() );
m_spin_NominalPct ->SetValue( appfps.NominalScalar.Raw );
m_spin_NominalPct ->Enable(!configToApply.EnablePresets);
m_spin_TurboPct ->Enable(!configToApply.EnablePresets);
m_spin_SlomoPct ->Enable(!configToApply.EnablePresets);
// Vsync timing controls only on devel builds / via manual ini editing
#ifdef PCSX2_DEVBUILD
m_text_BaseNtsc ->Enable(!configToApply.EnablePresets);

View File

@ -121,6 +121,9 @@ void RecentIsoManager::Repopulate()
m_Separator = m_Menu->AppendSeparator();
// The following line is important
m_Menu->Remove( m_Menu->Append( -1, wxEmptyString ) );
//Note: the internal recent iso list (m_Items) has the most recent item last (also at the INI file)
// but the menu is composed in reverse order such that the most recent item appears at the top.
for( int i=cnt-1; i>=0; --i )
@ -176,7 +179,7 @@ void RecentIsoManager::InsertIntoMenu( int id )
if (this->m_firstIdForMenuItems_or_wxID_ANY != wxID_ANY)
wxid = this->m_firstIdForMenuItems_or_wxID_ANY + id;
curitem.ItemPtr = m_Menu->Append( wxid, Path::GetFilename(curitem.Filename), curitem.Filename, wxITEM_RADIO );
curitem.ItemPtr = m_Menu->AppendRadioItem( wxid, Path::GetFilename(curitem.Filename), curitem.Filename );
bool exists = wxFileExists( curitem.Filename );
if( m_cursel == id && exists )

View File

@ -35,9 +35,9 @@ GPUDrawScanline::~GPUDrawScanline()
{
}
void GPUDrawScanline::BeginDraw(const void* param)
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
{
memcpy(&m_global, param, sizeof(m_global));
memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
if(m_global.sel.tme && m_global.sel.twin)
{
@ -83,7 +83,7 @@ void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
{
GPUScanlineSelector sel = m_global.sel;
@ -93,7 +93,7 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
{
if(sel.sprite)
{
GSVector4i t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
GSVector4i t = (GSVector4i(vertex[index[1]].t) >> 8) - GSVector4i::x00000001();
t = t.ps32(t);
t = t.upl16(t);

View File

@ -29,6 +29,25 @@
class GPUDrawScanline : public IDrawScanline
{
public:
class SharedData : public GSRasterizerData
{
public:
GPUScanlineGlobalData global;
public:
SharedData()
{
global.clut = NULL;
}
virtual ~SharedData()
{
if(global.clut) _aligned_free(global.clut);
}
};
protected:
GPUScanlineGlobalData m_global;
GPUScanlineLocalData m_local;
@ -41,12 +60,12 @@ public:
// IDrawScanline
void BeginDraw(const void* param);
void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels);
#ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);

View File

@ -69,9 +69,11 @@ GSTexture* GPURendererSW::GetOutput()
void GPURendererSW::Draw()
{
shared_ptr<GSRasterizerData> data(new GPURasterizerData());
GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData();
GPUScanlineGlobalData& gd = *(GPUScanlineGlobalData*)data->param;
shared_ptr<GSRasterizerData> data(sd);
GPUScanlineGlobalData& gd = sd->global;
const GPUDrawingEnvironment& env = m_env;
@ -114,22 +116,26 @@ void GPURendererSW::Draw()
gd.vm = m_mem.GetPixelAddress(0, 0);
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count);
data->count = m_count;
data->frame = m_perfmon.GetFrame();
data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_count;
memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count);
data->frame = m_perfmon.GetFrame();
int prims = 0;
switch(env.PRIM.TYPE)
{
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; break;
case GPU_LINE: data->primclass = GS_LINE_CLASS; break;
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; break;
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break;
case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break;
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break;
default: __assume(0);
}
@ -138,9 +144,9 @@ void GPURendererSW::Draw()
GSVector4 tl(+1e10f);
GSVector4 br(-1e10f);
GSVertexSW* v = data->vertices;
GSVertexSW* v = data->vertex;
for(int i = 0, j = m_count; i < j; i++)
for(int i = 0, j = data->vertex_count; i < j; i++)
{
GSVector4 p = v[i].p;
@ -163,9 +169,9 @@ void GPURendererSW::Draw()
m_rl->Sync();
// TODO: m_perfmon.Put(GSPerfMon::Draw, 1);
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims);
// TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, prims);
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
}
void GPURendererSW::VertexKick()

View File

@ -26,28 +26,6 @@
class GPURendererSW : public GPURendererT<GSVertexSW>
{
class GPURasterizerData : public GSRasterizerData
{
public:
GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)_aligned_malloc(sizeof(GPUScanlineGlobalData), 32);
gd->clut = NULL;
param = gd;
}
virtual ~GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
_aligned_free(gd);
}
};
protected:
IRasterizer* m_rl;
GSTexture* m_texture;

View File

@ -27,6 +27,11 @@
using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
, m_local(*(GPUScanlineLocalData*)param)
@ -50,7 +55,12 @@ void GPUSetupPrimCodeGenerator::Generate()
{
// t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
cvttps2dq(xmm1, ptr[ecx + sizeof(GSVertexSW) * 1 + offsetof(GSVertexSW, t)]);
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
cvttps2dq(xmm1, ptr[ecx + offsetof(GSVertexSW, t)]);
psrld(xmm1, 8);
psrld(xmm0, 31);
psubd(xmm1, xmm0);
@ -86,6 +96,8 @@ void GPUSetupPrimCodeGenerator::Generate()
if(m_sel.tme || m_sel.iip && m_sel.tfx != 3)
{
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 3; i++)
{
movaps(Xmm(5 + i), ptr[&m_shift[i]]);

View File

@ -33,6 +33,7 @@
#include "GSRendererDX11.h"
#include "GSDevice9.h"
#include "GSDevice11.h"
#include "GSRendererCS.h"
#include "GSSettingsDlg.h"
static HRESULT s_hr = E_FAIL;
@ -213,6 +214,28 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
s_gs = NULL;
}
if(renderer == 12)
{
#ifdef _WINDOWS
dev = new GSDevice11();
if(dev == NULL)
{
return -1;
}
if(s_gs == NULL)
{
s_gs = new GSRendererCS();
s_renderer = renderer;
}
#endif
}
else
{
switch(renderer / 3)
{
default:
@ -222,11 +245,9 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
#endif
#ifdef ENABLE_SDL_DEV
case 2: dev = new GSDeviceSDL(); break;
#else
case 2: dev = NULL; break;
#endif
case 3: dev = new GSDeviceNull(); break;
case 4: dev = new GSDeviceOGL(); break;
case 5: dev = new GSDeviceOGL(); break;
}
if(dev == NULL)
@ -257,6 +278,7 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
s_renderer = renderer;
}
}
}
catch(std::exception& ex)
{
// Allowing std exceptions to escape the scope of the plugin callstack could
@ -768,8 +790,6 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
vector<uint8> buff;
if(FILE* fp = fopen(lpszCmdLine, "rb"))
{
Console console("GSdx", true);
@ -802,10 +822,128 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
GSvsync(1);
struct Packet {uint8 type, param; uint32 size, addr; vector<uint8> buff;};
list<Packet*> packets;
vector<uint8> buff;
int type;
while((type = fgetc(fp)) != EOF)
{
Packet* p = new Packet();
p->type = (uint8)type;
switch(type)
{
case 0:
p->param = (uint8)fgetc(fp);
fread(&p->size, 4, 1, fp);
switch(p->param)
{
case 0:
p->buff.resize(0x4000);
p->addr = 0x4000 - p->size;
fread(&p->buff[p->addr], p->size, 1, fp);
break;
case 1:
case 2:
case 3:
p->buff.resize(p->size);
fread(&p->buff[0], p->size, 1, fp);
break;
}
break;
case 1:
p->param = (uint8)fgetc(fp);
break;
case 2:
fread(&p->size, 4, 1, fp);
break;
case 3:
p->buff.resize(0x2000);
fread(&p->buff[0], 0x2000, 1, fp);
break;
}
packets.push_back(p);
}
Sleep(100);
while(IsWindowVisible(hWnd))
{
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
{
Packet* p = *i;
switch(p->type)
{
case 0:
switch(p->param)
{
case 0: GSgifTransfer1(&p->buff[0], p->addr); break;
case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break;
case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break;
case 3: GSgifTransfer(&p->buff[0], p->size / 16); break;
}
break;
case 1:
GSvsync(p->param);
break;
case 2:
if(buff.size() < p->size) buff.resize(p->size);
GSreadFIFO2(&buff[0], p->size / 16);
break;
case 3:
memcpy(regs, &p->buff[0], 0x2000);
break;
}
}
}
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
{
delete *i;
}
packets.clear();
Sleep(100);
/*
vector<uint8> buff;
bool exit = false;
int round = 0;
while(!exit)
{
uint32 index;
@ -819,6 +957,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
case EOF:
fseek(fp, start, 0);
exit = !IsWindowVisible(hWnd);
//exit = ++round == 60;
break;
case 0:
@ -871,6 +1010,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
break;
}
}
*/
GSclose();
GSshutdown();

View File

@ -28,8 +28,14 @@
#define PLUGIN_VERSION 16
#define MAX_PAGES 512
#define MAX_BLOCKS 16384
#define VM_SIZE 4194304
#define PAGE_SIZE 8192
#define BLOCK_SIZE 256
#define COLUMN_SIZE 64
#define MAX_PAGES (VM_SIZE / PAGE_SIZE)
#define MAX_BLOCKS (VM_SIZE / BLOCK_SIZE)
#define MAX_COLUMNS (VM_SIZE / COLUMN_SIZE)
//if defined, will send much info in reply to the API title info queri from PCSX2
//default should be undefined
@ -638,8 +644,8 @@ REG64_(GIFReg, FINISH)
REG_END
REG64_(GIFReg, FOG)
uint8 _PAD1[4+3];
uint8 F:8;
uint8 _PAD1[7];
uint8 F;
REG_END
REG64_(GIFReg, FOGCOL)
@ -1021,7 +1027,6 @@ REG128_(GIFPacked, XYZF2)
uint16 _PAD1;
uint16 Y;
uint16 _PAD2;
uint32 _PAD3:4;
uint32 Z:24;
uint32 _PAD4:4;
@ -1030,7 +1035,9 @@ REG128_(GIFPacked, XYZF2)
uint32 _PAD6:3;
uint32 ADC:1;
uint32 _PAD7:16;
REG_END
REG_END2
uint32 Skip() const {return u32[3] & 0x8000;}
REG_END2
REG128_(GIFPacked, XYZ2)
uint16 X;
@ -1041,7 +1048,9 @@ REG128_(GIFPacked, XYZ2)
uint32 _PAD3:15;
uint32 ADC:1;
uint32 _PAD4:16;
REG_END
REG_END2
uint32 Skip() const {return u32[3] & 0x8000;}
REG_END2
REG128_(GIFPacked, FOG)
uint32 _PAD1;
@ -1093,19 +1102,24 @@ __aligned(struct, 32) GIFPath
GSVector4i::store<true>(&tag, v);
reg = 0;
regs = v.uph8(v >> 4) & 0x0f0f0f0f;
nreg = tag.NREG;
nreg = tag.NREG ? tag.NREG : 16;
nloop = tag.NLOOP;
adonly = nreg == 1 && regs.u8[0] == GIF_REG_A_D;
adonly = regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1;
}
__forceinline uint8 GetReg()
{
return regs.u8[reg]; // GET_GIF_REG(tag, reg);
return regs.u8[reg];
}
__forceinline uint8 GetReg(uint32 index)
{
return regs.u8[index];
}
__forceinline bool StepReg()
{
if((++reg & 0xf) == nreg)
if(++reg == nreg)
{
reg = 0;

View File

@ -43,16 +43,31 @@ class GSBlock
public:
template<int i, bool aligned, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{
GSVector4i v0, v1, v2, v3;
if(aligned)
{
const GSVector4i* s0 = (const GSVector4i*)&src[srcpitch * 0];
const GSVector4i* s1 = (const GSVector4i*)&src[srcpitch * 1];
GSVector4i v0 = GSVector4i::load<aligned>(&s0[0]);
GSVector4i v1 = GSVector4i::load<aligned>(&s0[1]);
GSVector4i v2 = GSVector4i::load<aligned>(&s1[0]);
GSVector4i v3 = GSVector4i::load<aligned>(&s1[1]);
v0 = GSVector4i::load<aligned>(&s0[0]);
v1 = GSVector4i::load<aligned>(&s0[1]);
v2 = GSVector4i::load<aligned>(&s1[0]);
v3 = GSVector4i::load<aligned>(&s1[1]);
GSVector4i::sw64(v0, v2, v1, v3);
}
else
{
const uint8* s0 = &src[srcpitch * 0];
const uint8* s1 = &src[srcpitch * 1];
v0 = GSVector4i::load(&s0[0], &s1[0]);
v1 = GSVector4i::load(&s0[8], &s1[8]);
v2 = GSVector4i::load(&s0[16], &s1[16]);
v3 = GSVector4i::load(&s0[24], &s1[24]);
}
if(mask == 0xffffffff)
{
@ -263,15 +278,27 @@ public:
}
template<int i, bool aligned> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
GSVector4i v0, v1, v2, v3;
if(aligned)
{
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0 = s[i * 4 + 0];
GSVector4i v1 = s[i * 4 + 1];
GSVector4i v2 = s[i * 4 + 2];
GSVector4i v3 = s[i * 4 + 3];
v0 = s[i * 4 + 0];
v1 = s[i * 4 + 1];
v2 = s[i * 4 + 2];
v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3);
}
else
{
v0 = GSVector4i::load(&src[i * 64 + 0], &src[i * 64 + 16]);
v1 = GSVector4i::load(&src[i * 64 + 32], &src[i * 64 + 48]);
v2 = GSVector4i::load(&src[i * 64 + 8], &src[i * 64 + 24]);
v3 = GSVector4i::load(&src[i * 64 + 40], &src[i * 64 + 56]);
}
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];

View File

@ -35,7 +35,8 @@ GSDevice::GSDevice()
, m_1x1(NULL)
, m_frame(0)
{
memset(&m_vertices, 0, sizeof(m_vertices));
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
}
GSDevice::~GSDevice()
@ -135,8 +136,10 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, bool msaa, int format)
void GSDevice::EndScene()
{
m_vertices.start += m_vertices.count;
m_vertices.count = 0;
m_vertex.start += m_vertex.count;
m_vertex.count = 0;
m_index.start += m_index.count;
m_index.count = 0;
}
void GSDevice::Recycle(GSTexture* t)

View File

@ -72,7 +72,8 @@ protected:
GSTexture* m_fxaa;
GSTexture* m_1x1;
GSTexture* m_current;
struct {size_t stride, start, count, limit;} m_vertices;
struct {size_t stride, start, count, limit;} m_vertex;
struct {size_t start, count, limit;} m_index;
unsigned int m_frame; // for ageing the pool
virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0;
@ -101,6 +102,7 @@ public:
virtual void BeginScene() {}
virtual void DrawPrimitive() {};
virtual void DrawIndexedPrimitive() {}
virtual void EndScene();
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}

View File

@ -144,7 +144,7 @@ bool GSDevice11::Create(GSWnd* wnd)
for(int i = 0; i < countof(m_convert.ps); i++)
{
hr = CompileShader(IDR_CONVERT_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]);
hr = CompileShader(IDR_CONVERT_FX, format("ps_main%d", i).c_str(), NULL, &m_convert.ps[i]);
}
memset(&dsd, 0, sizeof(dsd));
@ -172,7 +172,7 @@ bool GSDevice11::Create(GSWnd* wnd)
for(int i = 0; i < countof(m_merge.ps); i++)
{
hr = CompileShader(IDR_MERGE_FX, format("ps_main%d", i), NULL, &m_merge.ps[i]);
hr = CompileShader(IDR_MERGE_FX, format("ps_main%d", i).c_str(), NULL, &m_merge.ps[i]);
}
memset(&bsd, 0, sizeof(bsd));
@ -200,7 +200,7 @@ bool GSDevice11::Create(GSWnd* wnd)
for(int i = 0; i < countof(m_interlace.ps); i++)
{
hr = CompileShader(IDR_INTERLACE_FX, format("ps_main%d", i), NULL, &m_interlace.ps[i]);
hr = CompileShader(IDR_INTERLACE_FX, format("ps_main%d", i).c_str(), NULL, &m_interlace.ps[i]);
}
// fxaa
@ -352,7 +352,17 @@ void GSDevice11::Flip()
void GSDevice11::DrawPrimitive()
{
m_ctx->Draw(m_vertices.count, m_vertices.start);
m_ctx->Draw(m_vertex.count, m_vertex.start);
}
void GSDevice11::DrawIndexedPrimitive()
{
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
}
void GSDevice11::Dispatch(uint32 x, uint32 y, uint32 z)
{
m_ctx->Dispatch(x, y, z);
}
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -709,18 +719,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
}
}
void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
ASSERT(m_vertex.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride)
if(count * stride > m_vertex.limit * m_vertex.stride)
{
m_vb_old = m_vb;
m_vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 11000);
m_vertex.start = 0;
m_vertex.count = 0;
m_vertex.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_vb == NULL)
@ -730,7 +740,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = m_vertices.limit * stride;
bd.ByteWidth = m_vertex.limit * stride;
bd.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
@ -743,9 +753,9 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{
m_vertices.start = 0;
m_vertex.start = 0;
type = D3D11_MAP_WRITE_DISCARD;
}
@ -754,13 +764,13 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
{
GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride);
GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
m_ctx->Unmap(m_vb, 0);
}
m_vertices.count = count;
m_vertices.stride = stride;
m_vertex.count = count;
m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride);
}
@ -779,6 +789,70 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
}
}
void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
m_index.count = 0;
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
{
D3D11_BUFFER_DESC bd;
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = m_index.limit * sizeof(uint32);
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr;
hr = m_dev->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return;
}
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_index.start + count > m_index.limit)
{
m_index.start = 0;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m)))
{
memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32));
m_ctx->Unmap(m_ib, 0);
}
m_index.count = count;
IASetIndexBuffer(m_ib);
}
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib)
{
if(m_state.ib != ib)
{
m_state.ib = ib;
m_ctx->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0);
}
}
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{
if(m_state.layout != layout)
@ -890,6 +964,38 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
}
}
void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
{
// TODO: if(m_state.cs_srv[i] != srv)
{
// TODO: m_state.cs_srv[i] = srv;
m_ctx->CSSetShaderResources(i, 1, &srv);
}
}
void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav)
{
// TODO: if(m_state.cs_uav[i] != uav)
{
// TODO: m_state.cs_uav[i] = uav;
// uint32 count[] = {-1};
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, NULL);
}
}
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
{
if(m_state.cs != cs)
{
m_state.cs = cs;
m_ctx->CSSetShader(cs, NULL, 0);
}
}
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
{
if(m_state.dss != dss || m_state.sref != sref)
@ -958,7 +1064,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
}
}
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il)
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il)
{
HRESULT hr;
@ -968,7 +1074,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL);
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
@ -997,7 +1103,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
return hr;
}
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs)
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs)
{
HRESULT hr;
@ -1007,7 +1113,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
@ -1029,7 +1135,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
return hr;
}
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps)
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps)
{
HRESULT hr;
@ -1039,7 +1145,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
@ -1061,3 +1167,67 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
return hr;
}
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
{
HRESULT hr;
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro);
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
printf("%s\n", (const char*)error->GetBufferPointer());
}
if(FAILED(hr))
{
return hr;
}
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, cs);
if(FAILED(hr))
{
return hr;
}
return hr;
}
HRESULT GSDevice11::CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
{
HRESULT hr;
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro);
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromFile(fn, &m[0], NULL, entry, m_shader.cs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
printf("%s\n", (const char*)error->GetBufferPointer());
}
if(FAILED(hr))
{
return hr;
}
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, cs);
if(FAILED(hr))
{
return hr;
}
return hr;
}

View File

@ -45,6 +45,8 @@ class GSDevice11 : public GSDeviceDX
CComPtr<IDXGISwapChain> m_swapchain;
CComPtr<ID3D11Buffer> m_vb;
CComPtr<ID3D11Buffer> m_vb_old;
CComPtr<ID3D11Buffer> m_ib;
CComPtr<ID3D11Buffer> m_ib_old;
bool m_srv_changed, m_ss_changed;
@ -52,6 +54,7 @@ class GSDevice11 : public GSDeviceDX
{
ID3D11Buffer* vb;
size_t vb_stride;
ID3D11Buffer* ib;
ID3D11InputLayout* layout;
D3D11_PRIMITIVE_TOPOLOGY topology;
ID3D11VertexShader* vs;
@ -61,6 +64,7 @@ class GSDevice11 : public GSDeviceDX
ID3D11PixelShader* ps;
ID3D11Buffer* ps_cb;
ID3D11SamplerState* ps_ss[3];
ID3D11ComputeShader* cs;
GSVector2i viewport;
GSVector4i scissor;
ID3D11DepthStencilState* dss;
@ -141,6 +145,8 @@ public:
void SetExclusive(bool isExcl);
void DrawPrimitive();
void DrawIndexedPrimitive();
void Dispatch(uint32 x, uint32 y, uint32 z);
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
void ClearRenderTarget(GSTexture* t, uint32 c);
@ -162,8 +168,10 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(ID3D11Buffer* ib);
void IASetInputLayout(ID3D11InputLayout* layout);
void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology);
void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb);
@ -172,11 +180,14 @@ public:
void PSSetShaderResource(int i, GSTexture* sr);
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL);
void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv);
void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav);
void CSSetShader(ID3D11ComputeShader* cs);
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
void OMSetBlendState(ID3D11BlendState* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
void SetupIA(const void* vertices, int count, int prim);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
@ -189,8 +200,10 @@ public:
operator ID3D11Device*() {return m_dev;}
operator ID3D11DeviceContext*() {return m_ctx;}
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps);
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps);
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
HRESULT CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
};

View File

@ -352,8 +352,10 @@ bool GSDevice9::Reset(int w, int h)
m_vb = NULL;
m_vb_old = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertex.start = 0;
m_vertex.count = 0;
m_index.start = 0;
m_index.count = 0;
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
@ -510,25 +512,52 @@ void GSDevice9::DrawPrimitive()
switch(m_state.topology)
{
case D3DPT_TRIANGLELIST:
prims = m_vertices.count / 3;
case D3DPT_POINTLIST:
prims = m_vertex.count;
break;
case D3DPT_LINELIST:
prims = m_vertices.count / 2;
prims = m_vertex.count / 2;
break;
case D3DPT_POINTLIST:
prims = m_vertices.count;
case D3DPT_LINESTRIP:
prims = m_vertex.count - 1;
break;
case D3DPT_TRIANGLELIST:
prims = m_vertex.count / 3;
break;
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = m_vertices.count - 2;
break;
case D3DPT_LINESTRIP:
prims = m_vertices.count - 1;
prims = m_vertex.count - 2;
break;
default:
__assume(0);
}
m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims);
m_dev->DrawPrimitive(m_state.topology, m_vertex.start, prims);
}
void GSDevice9::DrawIndexedPrimitive()
{
int prims = 0;
switch(m_state.topology)
{
case D3DPT_POINTLIST:
prims = m_index.count;
break;
case D3DPT_LINELIST:
case D3DPT_LINESTRIP:
prims = m_index.count / 2;
break;
case D3DPT_TRIANGLELIST:
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = m_index.count / 3;
break;
default:
__assume(0);
}
m_dev->DrawIndexedPrimitive(m_state.topology, m_vertex.start, 0, m_index.count, m_index.start, prims);
}
void GSDevice9::EndScene()
@ -881,49 +910,49 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
}
}
void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
ASSERT(m_vertex.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride)
if(count * stride > m_vertex.limit * m_vertex.stride)
{
m_vb_old = m_vb;
m_vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
m_vertex.start = 0;
m_vertex.count = 0;
m_vertex.limit = std::max<int>(count * 3 / 2, 10000);
}
if(m_vb == NULL)
{
HRESULT hr;
hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
if(FAILED(hr)) return;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{
m_vertices.start = 0;
m_vertex.start = 0;
flags = D3DLOCK_DISCARD;
}
void* v = NULL;
void* ptr = NULL;
if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags)))
if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
{
GSVector4i::storent(v, vertices, count * stride);
GSVector4i::storent(ptr, vertex, count * stride);
m_vb->Unlock();
}
m_vertices.count = count;
m_vertices.stride = stride;
m_vertex.count = count;
m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride);
}
@ -939,6 +968,61 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
}
}
void GSDevice9::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
m_index.count = 0;
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
{
HRESULT hr;
hr = m_dev->CreateIndexBuffer(m_index.limit * sizeof(uint32), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &m_ib, NULL);
if(FAILED(hr)) return;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_index.start + count > m_index.limit)
{
m_index.start = 0;
flags = D3DLOCK_DISCARD;
}
void* ptr = NULL;
if(SUCCEEDED(m_ib->Lock(m_index.start * sizeof(uint32), count * sizeof(uint32), &ptr, flags)))
{
memcpy(ptr, index, count * sizeof(uint32));
m_ib->Unlock();
}
m_index.count = count;
IASetIndexBuffer(m_ib);
}
void GSDevice9::IASetIndexBuffer(IDirect3DIndexBuffer9* ib)
{
if(m_state.ib != ib)
{
m_state.ib = ib;
m_dev->SetIndices(ib);
}
}
void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout)
{
if(m_state.layout != layout)

View File

@ -82,6 +82,8 @@ class GSDevice9 : public GSDeviceDX
CComPtr<IDirect3DSwapChain9> m_swapchain;
CComPtr<IDirect3DVertexBuffer9> m_vb;
CComPtr<IDirect3DVertexBuffer9> m_vb_old;
CComPtr<IDirect3DIndexBuffer9> m_ib;
CComPtr<IDirect3DIndexBuffer9> m_ib_old;
bool m_lost;
D3DFORMAT m_depth_format;
@ -89,6 +91,7 @@ class GSDevice9 : public GSDeviceDX
{
IDirect3DVertexBuffer9* vb;
size_t vb_stride;
IDirect3DIndexBuffer9* ib;
IDirect3DVertexDeclaration9* layout;
D3DPRIMITIVETYPE topology;
IDirect3DVertexShader9* vs;
@ -169,6 +172,7 @@ public:
void BeginScene();
void DrawPrimitive();
void DrawIndexedPrimitive();
void EndScene();
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
@ -191,8 +195,10 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
void IASetInputLayout(IDirect3DVertexDeclaration9* layout);
void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology);
void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len);
@ -210,7 +216,7 @@ public:
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
void SetupIA(const void* vertices, int count, int prim);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel) {}
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);

View File

@ -67,18 +67,21 @@ bool GSDeviceDX::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
m_shader.vs = "vs_4_0";
m_shader.gs = "gs_4_0";
m_shader.ps = "ps_4_0";
m_shader.cs = "cs_4_0";
break;
case D3D_FEATURE_LEVEL_10_1:
m_shader.model = "0x401";
m_shader.vs = "vs_4_1";
m_shader.gs = "gs_4_1";
m_shader.ps = "ps_4_1";
m_shader.cs = "cs_4_1";
break;
case D3D_FEATURE_LEVEL_11_0:
m_shader.model = "0x500";
m_shader.vs = "vs_5_0";
m_shader.gs = "gs_5_0";
m_shader.ps = "ps_5_0";
m_shader.cs = "cs_5_0";
break;
default:
ASSERT(0);

View File

@ -266,7 +266,7 @@ public:
#pragma pack(pop)
protected:
struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader;
struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps, cs;} m_shader;
uint32 m_msaa;
DXGI_SAMPLE_DESC m_msaa_desc;
@ -277,8 +277,9 @@ public:
virtual ~GSDeviceDX();
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;}
virtual void SetupIA(const void* vertices, int count, int prim) = 0;
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
virtual void SetupGS(GSSelector sel) = 0;
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;

View File

@ -36,9 +36,9 @@ GSDrawScanline::~GSDrawScanline()
{
}
void GSDrawScanline::BeginDraw(const void* param)
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
{
memcpy(&m_global, param, sizeof(m_global));
memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
if(m_global.sel.mmin && m_global.sel.lcm)
{
@ -87,7 +87,7 @@ void GSDrawScanline::BeginDraw(const void* param)
sel.tcc = m_global.sel.tcc;
sel.fst = m_global.sel.fst;
sel.fge = m_global.sel.fge;
sel.sprite = m_global.sel.sprite;
sel.prim = m_global.sel.prim;
sel.fb = m_global.sel.fb;
sel.zb = m_global.sel.zb;
sel.zoverflow = m_global.sel.zoverflow;
@ -102,7 +102,9 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
// FIXME: something's not right with the sky in burnout 3
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
{
GSScanlineSelector sel = m_global.sel;
@ -115,7 +117,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
if(has_z || has_f)
{
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
if(has_f)
{
@ -145,12 +147,12 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
{
if(has_f)
{
m_local.p.f = GSVector4i(vertices[0].p).zzzzh().zzzz();
m_local.p.f = GSVector4i(vertex[index[1]].p).zzzzh().zzzz();
}
if(has_z)
{
m_local.p.z = vertices[0].t.u32[3]; // uint32 z is bypassed in t.w
m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
}
}
}
@ -234,7 +236,17 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
}
else
{
GSVector4i c = GSVector4i(vertices[0].c);
int last = 0;
switch(sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
GSVector4i c = GSVector4i(vertex[index[last]].c);
c = c.upl16(c.zwxy());
@ -271,7 +283,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
if(sel.fwrite && sel.fge)
{
@ -300,7 +312,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s);
GSVector4i v = vt.yyyy();
if(!sel.sprite || sel.mmin)
if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
{
v += GSVector4i::cast(m_local.d[skip].t);
}
@ -354,7 +366,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{
za = fza_base->y + fza_offset->y;
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
GSVector4 z = scan.p.zzzz() + zo;
@ -754,7 +766,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{
uf = u.xxzzlh().srl16(1);
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
vf = v.xxzzlh().srl16(1);
}
@ -936,7 +948,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(sel.fwrite && sel.fge)
{
GSVector4i fog = !sel.sprite ? f : m_local.p.f;
GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f;
rb = m_global.frb.lerp16<0>(rb, fog);
ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga);
@ -1211,7 +1223,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
fza_offset++;
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
if(sel.zb)
{
@ -1234,7 +1246,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx());
if(!sel.sprite || sel.mmin)
if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
{
t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy());
}

View File

@ -29,6 +29,14 @@
class GSDrawScanline : public IDrawScanline
{
public:
class SharedData : public GSRasterizerData
{
public:
GSScanlineGlobalData global;
};
protected:
GSScanlineGlobalData m_global;
GSScanlineLocalData m_local;
@ -50,14 +58,14 @@ public:
// IDrawScanline
void BeginDraw(const void* param);
void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
#ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
lea(edi, ptr[ebx * 2]);
add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
// edx = &m_local.d[skip]
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
mov(ebx, ptr[esp + _v]);
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
}
@ -455,7 +455,7 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// z += m_local.d4.z;
@ -501,7 +501,7 @@ void GSDrawScanlineCodeGenerator::Step()
vpaddd(xmm2, ptr[&m_local.temp.s]);
vmovdqa(ptr[&m_local.temp.s], xmm2);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
vpaddd(xmm3, ptr[&m_local.temp.t]);
@ -597,7 +597,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi;
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.zoverflow)
{
@ -733,7 +733,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4i vf = v.xxzzlh().srl16(1);
@ -2227,7 +2227,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_local.gd->frb.lerp16<0>(rb, f);
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
vmovdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]);
vmovdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
vmovdqa(xmm1, xmm6);
vmovdqa(xmm2, ptr[&m_local.gd->frb]);
@ -2350,7 +2350,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
bool fast = m_sel.ztest && m_sel.zpsm < 2;
vmovdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]);
vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
if(fast)
{

View File

@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
lea(edi, ptr[ebx * 2]);
add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
// edx = &m_local.d[skip]
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
mov(ebx, ptr[esp + _v]);
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
}
@ -458,7 +458,7 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// z += m_local.d4.z;
@ -504,7 +504,7 @@ void GSDrawScanlineCodeGenerator::Step()
paddd(xmm2, ptr[&m_local.temp.s]);
movdqa(ptr[&m_local.temp.s], xmm2);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
paddd(xmm3, ptr[&m_local.temp.t]);
@ -602,7 +602,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi;
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.zoverflow)
{
@ -738,7 +738,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.uf], xmm0);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4i vf = v.xxzzlh().srl16(1);
@ -2341,7 +2341,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_local.gd->frb.lerp16<0>(rb, f);
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
movdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]);
movdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
movdqa(xmm1, xmm6);
movdqa(xmm2, ptr[&m_local.gd->frb]);
@ -2464,7 +2464,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
bool fast = m_sel.ztest && m_sel.zpsm < 2;
movdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]);
movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
if(fast)
{

View File

@ -41,12 +41,11 @@ public:
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
__aligned(struct, 32)
struct
{
GSVector4i dx10;
GSVector4 dx9;
GSVector4 in;
GSVector4 ex;
GSVector4 ofex;
uint32 ofxy;
} scissor;
struct
@ -83,25 +82,22 @@ public:
void UpdateScissor()
{
scissor.dx10 = GSVector4i(
scissor.ofex = GSVector4(
(int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX),
(int)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY),
(int)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX),
(int)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY));
scissor.dx9 = GSVector4(scissor.dx10);
scissor.in = GSVector4(
(int)SCISSOR.SCAX0,
(int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1 + 1,
(int)SCISSOR.SCAY1 + 1);
scissor.ex = GSVector4(
(int)SCISSOR.SCAX0,
(int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1,
(int)SCISSOR.SCAY1);
uint16 ofx = (uint16)XYOFFSET.OFX - 15;
uint16 ofy = (uint16)XYOFFSET.OFY - 15;
scissor.ofxy = ((ofy << 16) | ofx); // ceil(xy) => (xy - offset + 15) >> 4 => (xy - [offset - 15]) >> 4
}
bool DepthRead() const

View File

@ -24,10 +24,7 @@
GSDump::GSDump()
: m_gs(NULL)
, m_obj(NULL)
, m_frames(0)
, m_objects(0)
, m_vertices(0)
{
}
@ -39,11 +36,8 @@ GSDump::~GSDump()
void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs)
{
m_gs = fopen((fn + ".gs").c_str(), "wb");
m_obj = fopen((fn + ".obj").c_str(), "wt");
m_frames = 0;
m_objects = 0;
m_vertices = 0;
if(m_gs)
{
@ -57,7 +51,6 @@ void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GS
void GSDump::Close()
{
if(m_gs) {fclose(m_gs); m_gs = NULL;}
if(m_obj) {fclose(m_obj); m_obj = NULL;}
}
void GSDump::Transfer(int index, const uint8* mem, size_t size)
@ -96,67 +89,3 @@ void GSDump::VSync(int field, bool last, const GSPrivRegSet* regs)
}
}
}
void GSDump::Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass)
{
if(m_obj)
{
switch(primclass)
{
case GS_POINT_CLASS:
// TODO
break;
case GS_LINE_CLASS:
// TODO
break;
case GS_TRIANGLE_CLASS:
for(int i = 0; i < count; i++)
{
float x = vertices[i].p.x;
float y = vertices[i].p.y;
float z = vertices[i].p.z;
fprintf(m_obj, "v %f %f %f\n", x, y, z);
}
for(int i = 0; i < count; i++)
{
fprintf(m_obj, "vt %f %f %f\n", vertices[i].t.x, vertices[i].t.y, vertices[i].t.z);
}
for(int i = 0; i < count; i++)
{
fprintf(m_obj, "vn %f %f %f\n", 0.0f, 0.0f, 0.0f);
}
fprintf(m_obj, "g f%d_o%d_p%d_v%d\n", m_frames, m_objects, primclass, count);
for(int i = 0; i < count; i += 3)
{
int a = m_vertices + i + 1;
int b = m_vertices + i + 2;
int c = m_vertices + i + 3;
fprintf(m_obj, "f %d/%d/%d %d/%d/%d %d/%d/%d\n", a, a, a, b, b, b, c, c, c);
}
m_vertices += count;
m_objects++;
break;
case GS_SPRITE_CLASS:
// TODO
break;
}
}
}

View File

@ -46,10 +46,7 @@ Regs data (id == 3)
class GSDump
{
FILE* m_gs;
FILE* m_obj;
int m_frames;
int m_objects;
int m_vertices;
public:
GSDump();
@ -60,6 +57,5 @@ public:
void ReadFIFO(uint32 size);
void Transfer(int index, const uint8* mem, size_t size);
void VSync(int field, bool last, const GSPrivRegSet* regs);
void Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass);
operator bool() {return m_gs != NULL;}
};

View File

@ -76,8 +76,8 @@ GtkWidget* CreateRenderComboBox()
case 8 : renderer_box_position = 1; break;
case 10: renderer_box_position = 2; break;
case 11: renderer_box_position = 3; break;
case 12: renderer_box_position = 4; break;
case 13: renderer_box_position = 5; break;
case 15: renderer_box_position = 4; break;
case 16: renderer_box_position = 5; break;
}
gtk_combo_box_set_active(GTK_COMBO_BOX(render_combo_box), renderer_box_position);
return render_combo_box;
@ -375,8 +375,8 @@ bool RunLinuxDialog()
case 1: theApp.SetConfig("renderer", 8); break;
case 2: theApp.SetConfig("renderer", 10); break;
case 3: theApp.SetConfig("renderer", 11); break;
case 4: theApp.SetConfig("renderer", 12); break;
case 5: theApp.SetConfig("renderer", 13); break;
case 4: theApp.SetConfig("renderer", 15); break;
case 5: theApp.SetConfig("renderer", 16); break;
}
}

View File

@ -449,7 +449,7 @@ GSLocalMemory::~GSLocalMemory()
for_each(m_omap.begin(), m_omap.end(), aligned_free_second());
for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second());
for(hash_map<uint32, list<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++)
for(hash_map<uint64, vector<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++)
{
delete [] i->second;
}
@ -500,6 +500,11 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32);
o->hash = hash;
o->fbp = fbp;
o->zbp = zbp;
o->fpsm = fpsm;
o->zpsm = zpsm;
o->bw = bw;
pixelAddress fpa = m_psm[fpsm].pa;
pixelAddress zpa = m_psm[zpsm].pa;
@ -526,11 +531,11 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;}
list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
{
uint32 hash = TEX0.TBP0 | (TEX0.TBW << 14) | (TEX0.PSM << 20) | (TEX0.TW << 26);
uint64 hash = TEX0.u64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH
hash_map<uint32, list<GSVector2i>*>::iterator i = m_p2tmap.find(hash);
hash_map<uint64, vector<GSVector2i>*>::iterator i = m_p2tmap.find(hash);
if(i != m_p2tmap.end())
{
@ -540,13 +545,13 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
GSVector2i bs = m_psm[TEX0.PSM].bs;
int tw = std::max<int>(1 << TEX0.TW, bs.x);
// int th = std::max<int>(1 << TEX0.TH, bs.y);
int th = std::max<int>(1 << TEX0.TH, bs.y);
const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
hash_map<uint32, hash_set<uint32> > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
for(int y = 0; y < 1024; y += bs.y) // the hash is a little short on bits for TEX0.TH, hard-coding it to 1024 lines
for(int y = 0; y < th; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
@ -563,7 +568,7 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
// combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array
list<GSVector2i>* p2t = new list<GSVector2i>[MAX_PAGES];
vector<GSVector2i>* p2t = new vector<GSVector2i>[MAX_PAGES];
for(hash_map<uint32, hash_set<uint32> >::iterator i = tmp.begin(); i != tmp.end(); i++)
{
@ -594,16 +599,12 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
// sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y)
vector<GSVector2i> tmp;
for(hash_map<uint32, uint32>::iterator j = m.begin(); j != m.end(); j++)
{
tmp.push_back(GSVector2i(j->first, ~j->second));
p2t[page].push_back(GSVector2i(j->first, ~j->second));
}
std::sort(tmp.begin(), tmp.end(), cmp_vec2x);
p2t[page].insert(p2t[page].end(), tmp.begin(), tmp.end());
std::sort(p2t[page].begin(), p2t[page].end(), cmp_vec2x);
}
m_p2tmap[hash] = p2t;
@ -1305,13 +1306,13 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
{
if(len <= 0) return;
uint8* pb = (uint8*)dst;
uint16* pw = (uint16*)dst;
uint32* pd = (uint32*)dst;
uint8* RESTRICT pb = (uint8*)dst;
uint16* RESTRICT pw = (uint16*)dst;
uint32* RESTRICT pd = (uint32*)dst;
uint32 bp = BITBLTBUF.SBP;
uint32 bw = BITBLTBUF.SBW;
psm_t* psm = &m_psm[BITBLTBUF.SPSM];
psm_t* RESTRICT psm = &m_psm[BITBLTBUF.SPSM];
int x = tx;
int y = ty;
@ -1323,16 +1324,26 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMCT32:
case PSM_PSMZ32:
// MGS1 intro, fade effect between two scenes (airplane outside-inside transition)
len /= 4;
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pd += 4)
{
pd[0] = ps[offset[x + 0]];
pd[1] = ps[offset[x + 1]];
pd[2] = ps[offset[x + 2]];
pd[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pd++)
{
*pd = ReadPixel32(addr + offset[x]);
*pd = ps[offset[x]];
}
if(x == ex) {x = sx; y++;}
@ -1347,16 +1358,16 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x++, pb += 3)
{
uint32 c = ReadPixel32(addr + offset[x]);
uint32 c = ps[offset[x]];
pb[0] = ((uint8*)&c)[0];
pb[1] = ((uint8*)&c)[1];
pb[2] = ((uint8*)&c)[2];
pb[0] = (uint8)(c);
pb[1] = (uint8)(c >> 8);
pb[2] = (uint8)(c >> 16);
}
if(x == ex) {x = sx; y++;}
@ -1373,12 +1384,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
{
pw[0] = ps[offset[x + 0]];
pw[1] = ps[offset[x + 1]];
pw[2] = ps[offset[x + 2]];
pw[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pw++)
{
*pw = ReadPixel16(addr + offset[x]);
*pw = ps[offset[x]];
}
if(x == ex) {x = sx; y++;}
@ -1390,12 +1409,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{
pb[0] = ps[offset[x + 0]];
pb[1] = ps[offset[x + 1]];
pb[2] = ps[offset[x + 2]];
pb[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pb++)
{
*pb = ReadPixel8(addr + offset[x]);
*pb = ps[offset[x]];
}
if(x == ex) {x = sx; y++;}
@ -1408,7 +1435,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
for(; len > 0 && x < ex; len--, x += 2, pb++)
{
@ -1424,12 +1451,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{
pb[0] = (uint8)(ps[offset[x + 0]] >> 24);
pb[1] = (uint8)(ps[offset[x + 1]] >> 24);
pb[2] = (uint8)(ps[offset[x + 2]] >> 24);
pb[3] = (uint8)(ps[offset[x + 3]] >> 24);
}
for(; len > 0 && x < ex; len--, x++, pb++)
{
*pb = ReadPixel8H(addr + offset[x]);
*pb = (uint8)(ps[offset[x]] >> 24);
}
if(x == ex) {x = sx; y++;}
@ -1441,12 +1476,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x += 2, pb++)
{
*pb = ReadPixel4HL(addr + offset[x + 0]) | (ReadPixel4HL(addr + offset[x + 1]) << 4);
uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f;
uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0;
*pb = (uint8)(c0 | c1);
}
if(x == ex) {x = sx; y++;}
@ -1458,12 +1496,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x += 2, pb++)
{
*pb = ReadPixel4HH(addr + offset[x + 0]) | (ReadPixel4HH(addr + offset[x + 1]) << 4);
uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f;
uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0;
*pb = (uint8)(c0 | c1);
}
if(x == ex) {x = sx; y++;}
@ -1994,13 +2035,9 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
GSOffset::~GSOffset()
{
for(hash_map<uint64, list<uint32>*>::iterator i = m_cache.begin(); i != m_cache.end(); i++)
{
delete i->second;
}
}
list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox)
{
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
@ -2008,55 +2045,61 @@ list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
if(bbox != NULL) *bbox = r;
uint64 r_hash;
// worst case:
// bp page-aligned: (w * h) / (64 * 32)
// bp block-aligned: (w * h) / (8 * 8)
GSVector4i::storel(&r_hash, r.sra32(3).ps32()); // max 19-bit coordinates, should not be a problem (can shift right by 3 because it is mod8, smallest block size)
int size = r.width() * r.height();
hash_map<uint64, list<uint32>*>::iterator i = m_cache.find(r_hash);
int limit = MAX_PAGES + 1;
if(i != m_cache.end())
if(pages == NULL)
{
return i->second;
limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
pages = new uint32[limit];
}
uint32 tmp[16];
__aligned(uint32, 16) tmp[16];
memset(tmp, 0, sizeof(tmp));
((GSVector4i*)tmp)[0] = GSVector4i::zero();
((GSVector4i*)tmp)[1] = GSVector4i::zero();
((GSVector4i*)tmp)[2] = GSVector4i::zero();
((GSVector4i*)tmp)[3] = GSVector4i::zero();
r = r.sra32(3);
bs.x >>= 3;
bs.y >>= 3;
uint32* RESTRICT p = pages;
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = block.row[y >> 3];
uint32 base = block.row[y];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 n = (base + block.col[x >> 3]) >> 5;
uint32 n = (base + block.col[x]) >> 5;
if(n < MAX_PAGES)
{
tmp[n >> 5] |= 1 << (n & 31);
}
}
}
uint32& row = tmp[n >> 5];
uint32 col = 1 << (n & 31);
list<uint32>* l = new list<uint32>();
for(int i = 0; i < countof(tmp); i++)
if((row & col) == 0)
{
uint32 p = tmp[i];
row |= col;
if(p == 0) continue;
unsigned long j;
while(_BitScanForward(&j, p))
{
p ^= 1 << j;
l->push_back((i << 5) + j);
*p++ = n;
}
}
}
}
m_cache[r_hash] = l;
*p++ = EOP;
return l;
ASSERT(p - pages <= limit);
return pages;
}

View File

@ -30,8 +30,6 @@
class GSOffset : public GSAlignedClass<32>
{
hash_map<uint64, list<uint32>*> m_cache;
public:
__aligned(struct, 32) Block
{
@ -53,7 +51,9 @@ public:
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
list<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
enum {EOP = 0xffffffff};
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
};
struct GSPixelOffset4
@ -63,6 +63,7 @@ struct GSPixelOffset4
GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...)
GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...)
uint32 hash;
uint32 fbp, zbp, fpsm, zpsm, bw;
};
class GSLocalMemory : public GSBlock
@ -158,7 +159,7 @@ protected:
hash_map<uint32, GSOffset*> m_omap;
hash_map<uint32, GSPixelOffset4*> m_po4map;
hash_map<uint32, list<GSVector2i>*> m_p2tmap;
hash_map<uint64, vector<GSVector2i>*> m_p2tmap;
public:
GSLocalMemory();
@ -166,7 +167,7 @@ public:
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
list<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
// address

View File

@ -28,9 +28,8 @@ public:
{
Main,
Sync,
WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15,
WorkerSync0, WorkerSync1, WorkerSync2, WorkerSync3, WorkerSync4, WorkerSync5, WorkerSync6, WorkerSync7, WorkerSync8, WorkerSync9, WorkerSync10, WorkerSync11, WorkerSync12, WorkerSync13, WorkerSync14, WorkerSync15,
WorkerSleep0, WorkerSleep1, WorkerSleep2, WorkerSleep3, WorkerSleep4, WorkerSleep5, WorkerSleep6, WorkerSleep7, WorkerSleep8, WorkerSleep9, WorkerSleep10, WorkerSleep11, WorkerSleep12, WorkerSleep13, WorkerSleep14, WorkerSleep15,
WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7,
WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15,
TimerLast,
};

View File

@ -35,6 +35,7 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
, m_id(id)
, m_threads(threads)
, m_perfmon(perfmon)
, m_pixels(0)
{
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0;
@ -63,11 +64,15 @@ GSRasterizer::~GSRasterizer()
bool GSRasterizer::IsOneOfMyScanlines(int top) const
{
ASSERT(top >= 0 && top < 2048);
return m_myscanline[top >> THREAD_HEIGHT] != 0;
}
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
{
ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048);
top = top >> THREAD_HEIGHT;
bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT;
@ -98,26 +103,42 @@ int GSRasterizer::FindMyNextScanline(int top) const
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
{
Draw(data);
Draw(data.get());
}
void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
int GSRasterizer::GetPixels(bool reset)
{
int pixels = m_pixels;
if(reset)
{
m_pixels = 0;
}
return pixels;
}
void GSRasterizer::Draw(GSRasterizerData* data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->count == 0) return;
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
m_ds->BeginDraw(data->param);
m_ds->BeginDraw(data);
const GSVertexSW* vertices = data->vertices;
const GSVertexSW* vertices_end = data->vertices + data->count;
const GSVertexSW* vertex = data->vertex;
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
const uint32* index = data->index;
const uint32* index_end = data->index + data->index_count;
uint32 tmp_index[] = {0, 1, 2};
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
m_scissor = data->scissor;
m_fscissor = GSVector4(data->scissor);
m_pixels = 0;
m_fscissor_x = GSVector4(data->scissor).xzxz();
m_fscissor_y = GSVector4(data->scissor).ywyw();
uint64 start = __rdtsc();
@ -127,33 +148,57 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
if(scissor_test)
{
DrawPoint<true>(vertices, data->count);
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
}
else
{
DrawPoint<false>(vertices, data->count);
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
}
break;
case GS_LINE_CLASS:
do {DrawLine(vertices); vertices += 2;}
while(vertices < vertices_end);
if(index != NULL)
{
do {DrawLine(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawLine(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
break;
case GS_TRIANGLE_CLASS:
do {DrawTriangle(vertices); vertices += 3;}
while(vertices < vertices_end);
if(index != NULL)
{
do {DrawTriangle(vertex, index); index += 3;}
while(index < index_end);
}
else
{
do {DrawTriangle(vertex, tmp_index); vertex += 3;}
while(vertex < vertex_end);
}
break;
case GS_SPRITE_CLASS:
do {DrawSprite(vertices, data->solidrect); vertices += 2;}
while(vertices < vertices_end);
if(index != NULL)
{
do {DrawSprite(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawSprite(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
break;
@ -163,18 +208,19 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
uint64 ticks = __rdtsc() - start;
_InterlockedExchangeAdd(&data->ticks, ticks);
_InterlockedExchangeAdd(&data->pixels, m_pixels);
m_ds->EndDraw(data->frame, ticks, m_pixels);
}
template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
{
for(; count > 0; count--, v++)
if(index != NULL)
{
GSVector4i p(v->p);
for(int i = 0; i < index_count; i++, index++)
{
const GSVertexSW& v = vertex[*index];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
@ -182,17 +228,44 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{
m_pixels++;
m_ds->SetupPrim(v, *v);
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, *v);
m_ds->DrawScanline(1, p.x, p.y, v);
}
}
}
}
void GSRasterizer::DrawLine(const GSVertexSW* v)
else
{
GSVertexSW dv = v[1] - v[0];
uint32 tmp_index[1] = {0};
for(int i = 0; i < vertex_count; i++, vertex++)
{
const GSVertexSW& v = vertex[0];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
{
m_pixels++;
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, v);
}
}
}
}
}
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
{
const GSVertexSW& v0 = vertex[index[0]];
const GSVertexSW& v1 = vertex[index[1]];
GSVertexSW dv = v1 - v0;
GSVector4 dp = dv.p.abs();
@ -200,10 +273,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(m_ds->HasEdge())
{
DrawEdge(v[0], v[1], dv, i, 0);
DrawEdge(v[0], v[1], dv, i, 1);
DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v0, v1, dv, i, 1);
Flush(v, GSVertexSW::zero(), true);
Flush(vertex, index, GSVertexSW::zero(), true);
return;
}
@ -216,23 +289,21 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
{
// shortcut for horizontal lines
GSVector4 mask = (v[0].p > v[1].p).xxxx();
GSVector4 mask = (v0.p > v1.p).xxxx();
GSVertexSW scan;
scan.p = v[0].p.blend32(v[1].p, mask);
scan.t = v[0].t.blend32(v[1].t, mask);
scan.c = v[0].c.blend32(v[1].c, mask);
scan.p = v0.p.blend32(v1.p, mask);
scan.t = v0.t.blend32(v1.t, mask);
scan.c = v0.c.blend32(v1.c, mask);
GSVector4i p(scan.p);
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
{
GSVector4 scissor = m_fscissor.xzxz();
GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil();
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil();
GSVector4 l = lrf.max(m_fscissor_x);
GSVector4 r = lrf.min(m_fscissor_x);
GSVector4i lr = GSVector4i(l.xxyy(r));
int left = lr.extract32<0>();
@ -248,7 +319,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(v, dscan);
m_ds->SetupPrim(vertex, index, dscan);
m_ds->DrawScanline(pixels, left, p.y, scan);
}
@ -262,7 +333,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(steps > 0)
{
GSVertexSW edge = v[0];
GSVertexSW edge = v0;
GSVertexSW dedge = dv / GSVector4(dp.v[i]);
GSVertexSW* RESTRICT e = m_edge.buff;
@ -288,7 +359,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
m_edge.count = e - m_edge.buff;
Flush(v, GSVertexSW::zero());
Flush(vertex, index, GSVertexSW::zero());
}
}
@ -304,42 +375,47 @@ static const uint8 s_ysort[8][4] =
{2, 1, 0, 0}, // y2 < y1 < y0
};
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
{
GSVertexSW v[3];
GSVertexSW dv[3];
GSVertexSW edge;
GSVertexSW dedge;
GSVertexSW dscan;
GSVector4 y0011 = vertices[0].p.yyyy(vertices[1].p);
GSVector4 y1221 = vertices[1].p.yyyy(vertices[2].p).xzzx();
GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p);
GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx();
int mask = (y0011 > y1221).mask() & 7;
int m1 = (y0011 > y1221).mask() & 7;
v[0] = vertices[s_ysort[mask][0]];
v[1] = vertices[s_ysort[mask][1]];
v[2] = vertices[s_ysort[mask][2]];
int i[3];
y0011 = v[0].p.yyyy(v[1].p);
y1221 = v[1].p.yyyy(v[2].p).xzzx();
i[0] = index[s_ysort[m1][0]];
i[1] = index[s_ysort[m1][1]];
i[2] = index[s_ysort[m1][2]];
int i = (y0011 == y1221).mask() & 7;
const GSVertexSW& v0 = vertex[i[0]];
const GSVertexSW& v1 = vertex[i[1]];
const GSVertexSW& v2 = vertex[i[2]];
y0011 = v0.p.yyyy(v1.p);
y1221 = v1.p.yyyy(v2.p).xzzx();
m1 = (y0011 == y1221).mask() & 7;
// if(i == 0) => y0 < y1 < y2
// if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2
if(i == 7) return; // y0 == y1 == y2
if(m1 == 7) return; // y0 == y1 == y2
GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor.ywyw());
GSVector4 tbmin = tbf.min(m_fscissor.ywyw());
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
GSVector4 tbmax = tbf.max(m_fscissor_y);
GSVector4 tbmin = tbf.min(m_fscissor_y);
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b)
dv[0] = v[1] - v[0];
dv[1] = v[2] - v[0];
dv[2] = v[2] - v[1];
dv[0] = v1 - v0;
dv[1] = v2 - v0;
dv[2] = v2 - v1;
GSVector4 cross = dv[0].p * dv[1].p.yxwz();
@ -347,11 +423,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
int j = cross.upl(cross == GSVector4::zero()).mask();
int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(j & 2) return;
if(m2 & 2) return;
j &= 1;
m2 &= 1;
cross = cross.rcpnr();
@ -391,42 +467,42 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
if(i & 1)
if(m1 & 1)
{
if(tb.y < tb.w)
{
edge = v[1 - j];
edge = vertex[i[1 - m2]];
edge.p = edge.p.insert<0, 1>(v[j].p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
edge.p = edge.p.insert<0, 1>(vertex[i[m2]].p);
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
}
}
else
{
if(tb.x < tb.z)
{
edge = v[0];
edge = v0;
edge.p = edge.p.xxzw();
dedge.p = ddx[j].xyzw(dedge.p);
dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v[0].p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if(tb.y < tb.w)
{
edge = v[1];
edge = v1;
edge.p = (v[0].p.xxxx() + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
}
}
Flush(v, dscan);
Flush(vertex, index, dscan);
if(m_ds->HasEdge())
{
@ -434,14 +510,14 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
int i = a.mask();
int j = ((a | b) ^ c).mask() ^ 2; // evil
int orientation = a.mask();
int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge(v[0], v[1], dv[0], i & 1, j & 1);
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2);
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4);
DrawEdge(v0, v1, dv[0], orientation & 1, side & 1);
DrawEdge(v0, v2, dv[1], orientation & 2, side & 2);
DrawEdge(v1, v2, dv[2], orientation & 4, side & 4);
Flush(v, GSVertexSW::zero(), true);
Flush(vertex, index, GSVertexSW::zero(), true);
}
}
@ -452,7 +528,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 scissor = m_fscissor.xzxz();
GSVector4 scissor = m_fscissor_x;
top = FindMyNextScanline(top);
@ -493,18 +569,21 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
m_edge.count += e - &m_edge.buff[m_edge.count];
}
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
{
const GSVertexSW& v0 = vertex[index[0]];
const GSVertexSW& v1 = vertex[index[1]];
GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero());
GSVertexSW v[2];
GSVector4 mask = (vertices[0].p < vertices[1].p).xyzw(GSVector4::zero());
v[0].p = v1.p.blend32(v0.p, mask);
v[0].t = v1.t.blend32(v0.t, mask);
v[0].c = v1.c;
v[0].p = vertices[1].p.blend32(vertices[0].p, mask);
v[0].t = vertices[1].t.blend32(vertices[0].t, mask);
v[0].c = vertices[1].c;
v[1].p = vertices[0].p.blend32(vertices[1].p, mask);
v[1].t = vertices[0].t.blend32(vertices[1].t, mask);
v[1].p = v0.p.blend32(v1.p, mask);
v[1].t = v0.t.blend32(v1.t, mask);
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
@ -514,14 +593,31 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
GSVertexSW scan = v[0];
if(solidrect)
if(m_ds->IsSolidRect())
{
if(m_id == 0)
if(m_threads == 1)
{
m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height();
}
else
{
int top = FindMyNextScanline(r.top);
int bottom = r.bottom;
while(top < bottom)
{
r.top = top;
r.bottom = std::min<int>((top + (1 << THREAD_HEIGHT)) & ~((1 << THREAD_HEIGHT) - 1), bottom);
m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height();
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT);
}
}
return;
}
@ -543,7 +639,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(v, dscan);
m_ds->SetupPrim(vertex, index, dscan);
while(1)
{
@ -575,13 +671,12 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
if(orientation)
{
GSVector4 tbmax = lrtb.max(m_fscissor.yyyy());
GSVector4 tbmin = lrtb.min(m_fscissor.wwww());
GSVector4i tb = GSVector4i(tbmax.zwzw(tbmin));
GSVector4 tbf = v0.p.yyyy(v1.p).ceil(); // t t b b
GSVector4 tbmax = tbf.max(m_fscissor_y); // max(t, st) max(t, sb) max(b, st) max(b, sb)
GSVector4 tbmin = tbf.min(m_fscissor_y); // min(t, st) min(t, sb) min(b, st) min(b, sb)
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(t, st) max(b, sb) min(t, st) min(b, sb)
int top, bottom;
@ -589,27 +684,27 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if((dv.p >= GSVector4::zero()).mask() & 2)
{
top = tb.extract32<0>();
bottom = tb.extract32<3>();
top = tb.extract32<0>(); // max(t, st)
bottom = tb.extract32<3>(); // min(b, sb)
if(top >= bottom) return;
edge = v0;
dedge = dv / dv.p.yyyy();
edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
edge += dedge * (tbmax.xxxx() - edge.p.yyyy());
}
else
{
top = tb.extract32<1>();
bottom = tb.extract32<2>();
top = tb.extract32<1>(); // max(b, st)
bottom = tb.extract32<2>(); // min(t, sb)
if(top >= bottom) return;
edge = v1;
dedge = dv / dv.p.yyyy();
edge += dedge * (tbmax.wwww() - edge.p.yyyy());
edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
}
GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);
@ -664,9 +759,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
GSVector4 lrmax = lrtb.max(m_fscissor.xxxx());
GSVector4 lrmin = lrtb.min(m_fscissor.zzzz());
GSVector4i lr = GSVector4i(lrmax.xyxy(lrmin));
GSVector4 lrf = v0.p.xxxx(v1.p).ceil(); // l l r r
GSVector4 lrmax = lrf.max(m_fscissor_x); // max(l, sl) max(l, sr) max(r, sl) max(r, sr)
GSVector4 lrmin = lrf.min(m_fscissor_x); // min(l, sl) min(l, sr) min(r, sl) min(r, sr)
GSVector4i lr = GSVector4i(lrmax.xzyw(lrmin)); // max(l, sl) max(r, sl) min(l, sr) min(r, sr)
int left, right;
@ -674,8 +770,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if((dv.p >= GSVector4::zero()).mask() & 1)
{
left = lr.extract32<0>();
right = lr.extract32<3>();
left = lr.extract32<0>(); // max(l, sl)
right = lr.extract32<3>(); // min(r, sr)
if(left >= right) return;
@ -686,15 +782,15 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
left = lr.extract32<1>();
right = lr.extract32<2>();
left = lr.extract32<1>(); // max(r, sl)
right = lr.extract32<2>(); // min(l, sr)
if(left >= right) return;
edge = v1;
dedge = dv / dv.p.xxxx();
edge += dedge * (lrmax.yyyy() - edge.p.xxxx());
edge += dedge * (lrmax.zzzz() - edge.p.xxxx());
}
GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);
@ -760,7 +856,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
e->p.i16[2] = (int16)top;
}
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge)
void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge)
{
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
@ -768,7 +864,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
if(count > 0)
{
m_ds->SetupPrim(vertices, dscan);
m_ds->SetupPrim(vertex, index, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
@ -811,6 +907,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
GSRasterizerList::GSRasterizerList()
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_sync_count(0)
, m_syncpoint_count(0)
{
}
@ -847,26 +944,28 @@ void GSRasterizerList::Sync()
m_sync_count++;
}
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch?
int GSRasterizerList::GetPixels(bool reset)
{
int pixels = 0;
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
pixels += m_workers[i]->GetPixels(reset);
}
m_workers.front()->Process(item);
return;
return pixels;
}
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
if(item->syncpoint)
{
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
m_syncpoint_count++;
}
for(size_t i = 0; i < m_workers.size(); i++)
@ -890,6 +989,11 @@ GSRasterizerList::GSWorker::~GSWorker()
delete m_r;
}
int GSRasterizerList::GSWorker::GetPixels(bool reset)
{
return m_r->GetPixels(reset);
}
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
{
GSVector4i r = item->bbox.rintersect(item->scissor);
@ -902,5 +1006,5 @@ void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item);
m_r->Draw(item.get());
}

View File

@ -34,45 +34,38 @@ public:
GSVector4i scissor;
GSVector4i bbox;
GS_PRIM_CLASS primclass;
GSVertexSW* vertices;
int count;
bool solidrect;
uint8* buff;
GSVertexSW* vertex;
int vertex_count;
uint32* index;
int index_count;
bool syncpoint;
uint64 frame;
void* param;
// drawing stats
volatile long ticks;
volatile long pixels;
GSRasterizerData()
: scissor(GSVector4i::zero())
, bbox(GSVector4i::zero())
, primclass(GS_INVALID_CLASS)
, vertices(NULL)
, count(0)
, solidrect(false)
, buff(NULL)
, vertex(NULL)
, vertex_count(0)
, index(NULL)
, index_count(0)
, syncpoint(false)
, frame(0)
, param(NULL)
, ticks(0)
, pixels(0)
{
}
virtual ~GSRasterizerData()
{
if(vertices != NULL) _aligned_free(vertices);
// derived class should free param and its members
if(buff != NULL) _aligned_free(buff);
}
};
class IDrawScanline : public GSAlignedClass<32>
{
public:
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
@ -86,19 +79,19 @@ public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
virtual ~IDrawScanline() {}
virtual void BeginDraw(const void* param) = 0;
virtual void BeginDraw(const GSRasterizerData* data) = 0;
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
#ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) {m_sp(vertex, index, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
#else
virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
virtual void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) = 0;
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
@ -106,6 +99,7 @@ public:
#endif
__forceinline bool HasEdge() const {return m_de != NULL;}
__forceinline bool IsSolidRect() const {return m_dr != NULL;}
};
class IRasterizer : public GSAlignedClass<32>
@ -115,6 +109,7 @@ public:
virtual void Queue(shared_ptr<GSRasterizerData> data) = 0;
virtual void Sync() = 0;
virtual int GetPixels(bool reset = true) = 0;
};
__aligned(class, 32) GSRasterizer : public IRasterizer
@ -126,24 +121,25 @@ protected:
int m_threads;
uint8* m_myscanline;
GSVector4i m_scissor;
GSVector4 m_fscissor;
GSVector4 m_fscissor_x;
GSVector4 m_fscissor_y;
struct {GSVertexSW* buff; int count;} m_edge;
int m_pixels;
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template<bool scissor_test>
void DrawPoint(const GSVertexSW* v, int count);
void DrawLine(const GSVertexSW* v);
void DrawTriangle(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v, bool solidrect);
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const uint32* index);
void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
void DrawSprite(const GSVertexSW* vertex, const uint32* index);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
__forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
public:
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
@ -153,12 +149,13 @@ public:
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
__forceinline int FindMyNextScanline(int top) const;
void Draw(shared_ptr<GSRasterizerData> data);
void Draw(GSRasterizerData* data);
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync() {}
int GetPixels(bool reset);
};
class GSRasterizerList
@ -174,6 +171,8 @@ protected:
GSWorker(GSRasterizer* r);
virtual ~GSWorker();
int GetPixels(bool reset);
// GSJobQueue
void Push(const shared_ptr<GSRasterizerData>& item);
@ -213,9 +212,11 @@ public:
}
int m_sync_count;
int m_syncpoint_count;
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
int GetPixels(bool reset);
};

View File

@ -22,9 +22,8 @@
#include "stdafx.h"
#include "GSRenderer.h"
GSRenderer::GSRenderer()
: GSState()
, m_vt(this)
GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
: GSState(vt, vertex_stride)
, m_dev(NULL)
, m_shader(0)
, m_shift_key(false)
@ -80,8 +79,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
void GSRenderer::ResetDevice()
{
ResetPrim();
if(m_dev) m_dev->Reset(1, 1);
}
@ -350,8 +347,16 @@ void GSRenderer::VSync(int field)
if(fillrate > 0)
{
s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024));
int sum = 0;
for(int i = 0; i < 16; i++)
{
sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
}
s += format(" | %d%% CPU", sum);
}
}
else
{
@ -528,7 +533,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
return;
case VK_F7:
m_shader = (m_shader + 3 + step) % 3;
printf("GSdx: Set shader %d (%s).\n", (int)m_shader);
printf("GSdx: Set shader %d.\n", (int)m_shader);
return;
case VK_DELETE:
m_aa1 = !m_aa1;
@ -602,308 +607,3 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
}
#endif
}
void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
{
int tw = TEX0.TW;
int th = TEX0.TH;
int w = 1 << tw;
int h = 1 << th;
GSVector4i tr(0, 0, w, h);
int wms = CLAMP.WMS;
int wmt = CLAMP.WMT;
int minu = (int)CLAMP.MINU;
int minv = (int)CLAMP.MINV;
int maxu = (int)CLAMP.MAXU;
int maxv = (int)CLAMP.MAXV;
GSVector4i vr = tr;
switch(wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.x < minu) vr.x = minu;
if(vr.z > maxu + 1) vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.y < minv) vr.y = minv;
if(vr.w > maxv + 1) vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
if(wms + wmt < 6)
{
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if(linear)
{
st += GSVector4(-0x8000, 0x8000).xxyy();
}
GSVector4i uv = GSVector4i(st).sra32(16);
GSVector4i u, v;
int mask = 0;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
uv = uv.rintersect(tr);
switch(wms)
{
case CLAMP_REPEAT:
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
}
r = vr.rintersect(tr);
}
void GSRenderer::GetAlphaMinMax()
{
if(m_vt.m_alpha.valid)
{
return;
}
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if(PRIM->TME && context->TEX0.TCC)
{
switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
{
case 0:
a.y = 0;
a.w = 0xff;
break;
case 1:
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
break;
case 2:
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
break;
case 3:
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
break;
default:
__assume(0);
}
switch(context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vt.m_alpha.min = a.x;
m_vt.m_alpha.max = a.z;
m_vt.m_alpha.valid = true;
}
bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm)
{
const GSDrawingContext* context = m_context;
bool pass = true;
if(context->TEST.ATST == ATST_NEVER)
{
pass = false;
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GetAlphaMinMax();
int amin = m_vt.m_alpha.min;
int amax = m_vt.m_alpha.max;
int aref = context->TEST.AREF;
switch(context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if(amax < aref) pass = true;
else if(amin >= aref) pass = false;
else return false;
break;
case ATST_LEQUAL:
if(amax <= aref) pass = true;
else if(amin > aref) pass = false;
else return false;
break;
case ATST_EQUAL:
if(amin == aref && amax == aref) pass = true;
else if(amin > aref || amax < aref) pass = false;
else return false;
break;
case ATST_GEQUAL:
if(amin >= aref) pass = true;
else if(amax < aref) pass = false;
else return false;
break;
case ATST_GREATER:
if(amin > aref) pass = true;
else if(amax <= aref) pass = false;
else return false;
break;
case ATST_NOTEQUAL:
if(amin == aref && amax == aref) pass = false;
else if(amin > aref || amax < aref) pass = true;
else return false;
break;
default:
__assume(0);
}
}
if(!pass)
{
switch(context->TEST.AFAIL)
{
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
default: __assume(0);
}
}
return true;
}
bool GSRenderer::IsOpaque()
{
if(PRIM->AA1)
{
return false;
}
if(!PRIM->ABE)
{
return true;
}
const GSDrawingContext* context = m_context;
int amin = 0, amax = 0xff;
if(context->ALPHA.A != context->ALPHA.B)
{
if(context->ALPHA.C == 0)
{
GetAlphaMinMax();
amin = m_vt.m_alpha.min;
amax = m_vt.m_alpha.max;
}
else if(context->ALPHA.C == 1)
{
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
{
amin = amax = 0x80;
}
}
else if(context->ALPHA.C == 2)
{
amin = amax = context->ALPHA.FIX;
}
}
return context->ALPHA.IsOpaque(amin, amax);
}

View File

@ -24,8 +24,6 @@
#include "GSdx.h"
#include "GSWnd.h"
#include "GSState.h"
#include "GSVertexTrace.h"
#include "GSVertexList.h"
#include "GSCapture.h"
class GSRenderer : public GSState
@ -53,15 +51,6 @@ protected:
virtual GSTexture* GetOutput(int i) = 0;
GSVertexTrace m_vt;
// following functions need m_vt to be initialized
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public:
GSWnd m_wnd;
GSDevice* m_dev;
@ -71,10 +60,9 @@ public:
bool s_save;
bool s_savez;
int s_saven;
GSCritSec s_lock;
public:
GSRenderer();
GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
virtual ~GSRenderer();
virtual bool CreateWnd(const string& title, int w, int h);
@ -98,156 +86,3 @@ public:
char m_GStitleInfoBuffer[128];
};
template<class Vertex> class GSRendererT : public GSRenderer
{
protected:
Vertex* m_vertices;
int m_count;
int m_maxcount;
GSVertexList<Vertex> m_vl;
void Reset()
{
m_count = 0;
m_vl.RemoveAll();
GSRenderer::Reset();
}
void ResetPrim()
{
m_vl.RemoveAll();
}
void FlushPrim()
{
if(m_count == 0) return;
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
{
// FIXME: berserk fpsm = 27 (8H)
if(!m_dev->IsLost())
{
m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM));
Draw();
}
m_perfmon.Put(GSPerfMon::Draw, 1);
}
m_count = 0;
}
void GrowVertexBuffer()
{
int maxcount = std::max<int>(m_maxcount * 3 / 2, 10000);
Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * maxcount, 16);
if(m_vertices != NULL)
{
memcpy(vertices, m_vertices, sizeof(Vertex) * m_maxcount);
_aligned_free(m_vertices);
}
m_vertices = vertices;
m_maxcount = maxcount - 100;
}
// Returns a pointer to the drawing vertex. Can return NULL!
template<uint32 prim> __forceinline Vertex* DrawingKick(bool skip, int& count)
{
switch(prim)
{
case GS_POINTLIST: count = 1; break;
case GS_LINELIST: count = 2; break;
case GS_LINESTRIP: count = 2; break;
case GS_TRIANGLELIST: count = 3; break;
case GS_TRIANGLESTRIP: count = 3; break;
case GS_TRIANGLEFAN: count = 3; break;
case GS_SPRITE: count = 2; break;
case GS_INVALID: count = 1; break;
default: __assume(0);
}
if(m_vl.GetCount() < count)
{
return NULL;
}
if(m_count >= m_maxcount)
{
GrowVertexBuffer();
}
Vertex* v = &m_vertices[m_count];
switch(prim)
{
case GS_POINTLIST:
m_vl.GetAt(0, v[0]);
m_vl.RemoveAll();
break;
case GS_LINELIST:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GS_LINESTRIP:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAt(0, 1);
break;
case GS_TRIANGLELIST:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAll();
break;
case GS_TRIANGLESTRIP:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAt(0, 2);
break;
case GS_TRIANGLEFAN:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAt(1, 1);
break;
case GS_SPRITE:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GS_INVALID:
ASSERT(0);
m_vl.RemoveAll();
return NULL;
default:
__assume(0);
}
return !skip ? v : NULL;
}
virtual void Draw() = 0;
public:
GSRendererT()
: GSRenderer()
, m_vertices(NULL)
, m_count(0)
, m_maxcount(0)
{
}
virtual ~GSRendererT()
{
if(m_vertices) _aligned_free(m_vertices);
}
};

View File

@ -0,0 +1,426 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSRendererCS.h"
GSRendererCS::GSRendererCS()
: GSRenderer(new GSVertexTraceCS(this), sizeof(GSVertex))
{
m_nativeres = true;
InitConvertVertex(GSRendererCS);
memset(m_vm_valid, 0, sizeof(m_vm_valid));
}
GSRendererCS::~GSRendererCS()
{
}
bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
{
if(!__super::CreateDevice(dev_unk))
return false;
D3D_FEATURE_LEVEL level;
((GSDeviceDX*)dev_unk)->GetFeatureLevel(level);
if(level < D3D_FEATURE_LEVEL_10_0)
return false;
HRESULT hr;
GSDevice11* dev = (GSDevice11*)dev_unk;
D3D11_BUFFER_DESC bd;
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
// video memory (4MB)
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = 4 * 1024 * 1024;
bd.StructureByteStride = 4;
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vm);
if(FAILED(hr)) return false;
memset(&uavd, 0, sizeof(uavd));
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
uavd.Buffer.FirstElement = 0;
uavd.Buffer.NumElements = 1024 * 1024;
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
if(FAILED(hr)) return false;
// vertex buffer
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(GSVertex) * 10000;
bd.StructureByteStride = sizeof(GSVertex);
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vb);
if(FAILED(hr)) return false;
// index buffer
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(uint32) * 10000 * 3;
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return false;
// one page, for copying between cpu<->gpu
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = PAGE_SIZE;
bd.Usage = D3D11_USAGE_STAGING;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_pb);
if(FAILED(hr)) return false;
return true;
}
GSTexture* GSRendererCS::GetOutput(int i)
{
// TODO: create a compute shader which unswizzles the frame from m_vm to the output texture
return NULL;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index)
{
// TODO: vertex format more fitting as the input for the compute shader
if(src_index != dst_index)
{
GSVertex v = ((GSVertex*)m_vertex.buff)[src_index];
((GSVertex*)m_vertex.buff)[dst_index] = v;
}
}
void GSRendererCS::Draw()
{
HRESULT hr;
GSDevice11* dev = (GSDevice11*)m_dev;
ID3D11DeviceContext* ctx = *dev;
D3D11_BUFFER_DESC bd;
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
D3D11_MAPPED_SUBRESOURCE map;
CComPtr<ID3D11ShaderResourceView> vb_srv;
CComPtr<ID3D11ShaderResourceView> ib_srv;
// TODO: cache these in hash_maps
CComPtr<ID3D11Buffer> fbr, fbc, zbr, zbc;
CComPtr<ID3D11ShaderResourceView> fbr_srv, fbc_srv, zbr_srv, zbc_srv;
// TODO: grow m_vb, m_ib if needed
if(m_vertex.next > 10000) return;
if(m_index.tail > 30000) return;
// TODO: fill/advance/discardwhenfull, as in GSDevice11::IASetVertexBuffer/IASetIndexBuffer
hr = ctx->Map(m_vb, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
if(FAILED(hr)) return;
memcpy(map.pData, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
ctx->Unmap(m_vb, 0);
//
hr = ctx->Map(m_ib, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
if(FAILED(hr)) return;
memcpy(map.pData, m_index.buff, sizeof(uint32) * m_index.tail);
ctx->Unmap(m_ib, 0);
// TODO: UpdateResource might be faster, based on my exprience with the real vertex buffer, write-no-overwrite/discarded dynamic buffer + map is better
//
memset(&srvd, 0, sizeof(srvd));
srvd.Format = DXGI_FORMAT_UNKNOWN;
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvd.Buffer.FirstElement = 0;
srvd.Buffer.NumElements = m_vertex.next;
hr = (*dev)->CreateShaderResourceView(m_vb, &srvd, &vb_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
memset(&srvd, 0, sizeof(srvd));
srvd.Format = DXGI_FORMAT_R32_UINT;
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvd.Buffer.FirstElement = 0;
srvd.Buffer.NumElements = m_index.tail;
hr = (*dev)->CreateShaderResourceView(m_ib, &srvd, &ib_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
// fzb offsets
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(int) * 4096;
bd.StructureByteStride = sizeof(int);
bd.Usage = D3D11_USAGE_IMMUTABLE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
D3D11_SUBRESOURCE_DATA data;
memset(&data, 0, sizeof(data));
data.pSysMem = m_context->offset.fb->pixel.row;
hr = (*dev)->CreateBuffer(&bd, &data, &fbr);
data.pSysMem = m_context->offset.fb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
hr = (*dev)->CreateBuffer(&bd, &data, &fbc);
data.pSysMem = m_context->offset.zb->pixel.row;
hr = (*dev)->CreateBuffer(&bd, &data, &zbr);
data.pSysMem = m_context->offset.zb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
hr = (*dev)->CreateBuffer(&bd, &data, &zbc);
// TODO: D3D10_SHADER_MACRO (primclass, less frequently changing drawing attribs, etc.)
uint32 sel = 0; // TODO
hash_map<uint32, CComPtr<ID3D11ComputeShader> >::iterator i = m_cs.find(sel);
CComPtr<ID3D11ComputeShader> cs;
if(i == m_cs.end())
{
// hr = dev->CompileShader(IDR_CS_FX, "cs_main", NULL, &cs);
hr = dev->CompileShader("E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\cs.fx", "cs_main", NULL, &cs);
if(FAILED(hr)) return;
m_cs[sel] = cs;
}
else
{
cs = i->second;
}
//
dev->CSSetShaderUAV(0, m_vm_uav);
dev->CSSetShaderSRV(0, vb_srv);
dev->CSSetShaderSRV(1, ib_srv);
dev->CSSetShaderSRV(2, fbr_srv);
dev->CSSetShaderSRV(3, fbc_srv);
dev->CSSetShaderSRV(4, zbr_srv);
dev->CSSetShaderSRV(5, zbc_srv);
dev->CSSetShader(cs);
GSVector4i bbox = GSVector4i(0, 0, 640, 512); // TODO: vertex trace
GSVector4i r = bbox.ralign<Align_Outside>(GSVector2i(16, 8));
bool fb = true; // TODO: frame buffer used
bool zb = true; // TODO: z-buffer used
if(fb) Write(m_context->offset.fb, r);
if(zb) Write(m_context->offset.zb, r);
// TODO: constant buffer (frequently chaning drawing attribs)
// TODO: texture (implement texture cache)
// TODO: clut to a palette texture (should be texture1d, not simply buffer, it is random accessed)
// TODO: CSSetShaderSRV(6 7 8 ..., texture level 0 1 2 ...) or use Texture3D?
// TODO: invalidate texture cache
/*
CComPtr<ID3D11Query> q;
D3D11_QUERY_DESC qd;
memset(&qd, 0, sizeof(qd));
qd.Query = D3D11_QUERY_EVENT;
hr = (*dev)->CreateQuery(&qd, &q);
ctx->Begin(q);
*/
printf("[%lld] dispatch %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
__rdtsc(),
m_context->FRAME.Block(), m_context->FRAME.PSM,
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
PRIM->PRIM, m_vertex.next, m_index.tail);
GSVector4i rsize = r.rsize();
dev->Dispatch(rsize.z >> 4, rsize.w >> 3, 1); // TODO: pass upper-left corner offset (r.xy) in a const buffer
/*
ctx->End(q);
uint64 t0 = __rdtsc();
BOOL b;
while(S_OK != ctx->GetData(q, &b, sizeof(BOOL), 0)) {}
printf("%lld\n", __rdtsc() - t0);
*/
}
void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
Read(o, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated
// TODO: false deps, 8H/4HL/4HH texture sharing pages with 24-bit target
// TODO: invalidate texture cache
}
void GSRendererCS::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
Read(o, r, false);
}
void GSRendererCS::Write(GSOffset* o, const GSVector4i& r)
{
GSDevice11* dev = (GSDevice11*)m_dev;
ID3D11DeviceContext* ctx = *dev;
D3D11_BOX box;
memset(&box, 0, sizeof(box));
uint32* pages = o->GetPages(r);
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
{
uint32 page = pages[i];
uint32 row = page >> 5;
uint32 col = 1 << (page & 31);
if((m_vm_valid[row] & col) == 0)
{
m_vm_valid[row] |= col;
box.left = page * PAGE_SIZE;
box.right = box.left + PAGE_SIZE;
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + box.left, 0, 0);
printf("[%lld] write %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
}
}
delete [] pages;
}
void GSRendererCS::Read(GSOffset* o, const GSVector4i& r, bool invalidate)
{
GSDevice11* dev = (GSDevice11*)m_dev;
ID3D11DeviceContext* ctx = *dev;
D3D11_BOX box;
memset(&box, 0, sizeof(box));
uint32* pages = o->GetPages(r);
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
{
uint32 page = pages[i];
uint32 row = page >> 5;
uint32 col = 1 << (page & 31);
if(m_vm_valid[row] & col)
{
if(invalidate) m_vm_valid[row] ^= col;
box.left = page * PAGE_SIZE;
box.right = box.left + PAGE_SIZE;
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
D3D11_MAPPED_SUBRESOURCE map;
if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ_WRITE, 0, &map)))
{
memcpy(m_mem.m_vm8 + box.left, map.pData, PAGE_SIZE);
ctx->Unmap(m_pb, 0);
printf("[%lld] read %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
}
}
}
delete [] pages;
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSRenderer.h"
#include "GSDevice11.h"
class GSRendererCS : public GSRenderer
{
class GSVertexTraceCS : public GSVertexTrace
{
public:
GSVertexTraceCS(const GSState* state) : GSVertexTrace(state) {}
};
CComPtr<ID3D11Buffer> m_vm;
CComPtr<ID3D11UnorderedAccessView> m_vm_uav;
CComPtr<ID3D11Buffer> m_vb;
CComPtr<ID3D11Buffer> m_ib;
CComPtr<ID3D11Buffer> m_pb;
hash_map<uint32, CComPtr<ID3D11ComputeShader> > m_cs;
uint32 m_vm_valid[16];
void Write(GSOffset* o, const GSVector4i& r);
void Read(GSOffset* o, const GSVector4i& r, bool invalidate);
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
bool CreateDevice(GSDevice* dev);
GSTexture* GetOutput(int i);
void Draw();
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut);
public:
GSRendererCS();
virtual ~GSRendererCS();
};

View File

@ -21,3 +21,411 @@
#include "stdafx.h"
#include "GSRendererDX.h"
#include "GSDeviceDX.h"
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
: GSRendererHW(vt, vertex_stride, tc)
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
}
GSRendererDX::~GSRendererDX()
{
}
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture* rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceDX::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceDX::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceDX::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt->m_max.p.z > 0xffffff)
{
ASSERT(m_vt->m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt->m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt->m_max.p.z > 0xffff)
{
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt->m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// gs
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt->m_primclass;
// ps
GSDeviceDX::PSSelector ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawIndexedPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawIndexedPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawIndexedPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawIndexedPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}

View File

@ -23,8 +23,7 @@
#include "GSRendererHW.h"
template<class Vertex>
class GSRendererDX : public GSRendererHW<Vertex>
class GSRendererDX : public GSRendererHW
{
GSVector2 m_pixelcenter;
bool m_logz;
@ -35,413 +34,11 @@ class GSRendererDX : public GSRendererHW<Vertex>
protected:
int m_topology;
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
virtual void UpdateFBA(GSTexture* rt) {}
public:
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0))
: GSRendererHW<Vertex>(tc)
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
}
GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
virtual ~GSRendererDX();
virtual ~GSRendererDX()
{
}
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture *rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceDX::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceDX::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceDX::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt.m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt.m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// gs
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt.m_primclass;
// ps
GSDeviceDX::PSSelector ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertices, m_count, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}
};

View File

@ -25,9 +25,9 @@
#include "resource.h"
GSRendererDX11::GSRendererDX11()
: GSRendererDX<GSVertexHW11>(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
: GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
{
InitVertexKick(GSRendererDX11);
InitConvertVertex(GSRendererDX11);
}
bool GSRendererDX11::CreateDevice(GSDevice* dev)
@ -39,201 +39,42 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::VertexKick(bool skip)
void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVertexHW11& dst = m_vl.AddTail();
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
dst = *(GSVertexHW11*)&m_v;
#ifdef ENABLE_UPSCALE_HACKS
GSVector4i v0 = ((GSVector4i*)s)[0];
GSVector4i v1 = ((GSVector4i*)s)[1];
if(tme && fst)
{
//GSVector4::storel(&dst.ST, m_v.GetUV());
// TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
int Udiff = 0;
int Vdiff = 0;
int Uadjust = 0;
int Vadjust = 0;
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
}
int multiplier = GetUpscaleMultiplier();
((GSVector4i*)d)[0] = v0;
((GSVector4i*)d)[1] = v1;
}
if(multiplier > 1)
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.ST.S = (float)m_v.UV.U - Uadjust;
dst.ST.T = (float)m_v.UV.V - Vadjust;
}
else if(tme)
{
// Wip :p
//dst.XYZ.X += 5;
//dst.XYZ.Y += 5;
}
#else
if(tme && fst)
{
GSVector4::storel(&dst.ST, m_v.GetUV());
}
#endif
int count = 0;
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
{
GSVector4i scissor = m_context->scissor.dx10;
GSVector4i pmin, pmax;
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2;
switch(prim)
{
case GS_POINTLIST:
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
pmin = v0;
pmax = v0;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
pmin = v0.min_u16(v1).upl16();
pmax = v0.max_u16(v1).upl16();
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
v2 = GSVector4i::load((int)v[2].p.xy);
pmin = v0.min_u16(v1).min_u16(v2).upl16();
pmax = v0.max_u16(v1).max_u16(v2).upl16();
break;
}
#else
switch(prim)
{
case GS_POINTLIST:
pmin.x = v[0].p.x;
pmin.y = v[0].p.y;
pmax.x = v[0].p.x;
pmax.y = v[0].p.y;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
break;
}
#endif
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 0xff)
{
return;
}
m_count += count;
}
}
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt.m_primclass)
switch(m_vt->m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
break;
case GS_TRIANGLE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
break;
default:
__assume(0);
}
__super::Draw(rt, ds, tex);
__super::DrawPrims(rt, ds, tex);
}

View File

@ -25,16 +25,21 @@
#include "GSVertexHW.h"
#include "GSTextureCache11.h"
class GSRendererDX11 : public GSRendererDX<GSVertexHW11>
class GSRendererDX11 : public GSRendererDX
{
protected:
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
public:
GSRendererDX11();
virtual ~GSRendererDX11() {}
bool CreateDevice(GSDevice* dev);
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
};

View File

@ -25,9 +25,9 @@
#include "resource.h"
GSRendererDX9::GSRendererDX9()
: GSRendererDX<GSVertexHW9>(new GSTextureCache9(this))
: GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
{
InitVertexKick(GSRendererDX9);
InitConvertVertex(GSRendererDX9);
}
bool GSRendererDX9::CreateDevice(GSDevice* dev)
@ -58,210 +58,150 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::VertexKick(bool skip)
void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
if(tme && !fst)
{
p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q));
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
}
else
{
p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z));
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
}
GSVertexHW9& dst = m_vl.AddTail();
dst.p = p;
int Uadjust = 0;
int Vadjust = 0;
GSVector4 t = GSVector4::zero();
if(tme)
{
if(fst)
{
dst.t = m_v.GetUV();
#ifdef ENABLE_UPSCALE_HACKS
int Udiff = 0;
int Vdiff = 0;
int multiplier = GetUpscaleMultiplier();
if(multiplier > 1)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.t.x -= (float) Uadjust;
dst.t.y -= (float) Vadjust;
#endif
t = GSVector4(GSVector4i::load(s->UV).upl16());
}
else
{
dst.t = GSVector4::loadl(&m_v.ST);
t = GSVector4::loadl(&s->ST);
}
}
dst._c0() = m_v.RGBAQ.u32[0];
dst._c1() = m_v.FOG.u32[1];
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
//
d->p = p;
d->t = t;
}
// BaseDrawingKick can never return NULL here because the DrawingKick function
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
// condition where this function would return NULL).
int count = 0;
if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSVector4 scissor = m_context->scissor.dx9;
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 3)
{
return;
}
switch(prim)
{
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();}
break;
case GS_SPRITE:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
v[0].p.z = v[1].p.z;
v[0].p.w = v[1].p.w;
v[0]._c1() = v[1]._c1();
v[2] = v[1];
v[3] = v[1];
v[1].p.y = v[0].p.y;
v[1].t.y = v[0].t.y;
v[2].p.x = v[0].p.x;
v[2].t.x = v[0].t.x;
v[4] = v[1];
v[5] = v[2];
count += 4;
break;
}
m_count += count;
}
}
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt.m_primclass)
switch(m_vt->m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3DPT_POINTLIST;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break;
case GS_LINE_CLASS:
m_topology = D3DPT_LINELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
if(PRIM->IIP == 0)
{
for(size_t i = 0, j = m_index.tail; i < j; i += 2)
{
uint32 tmp = m_index.buff[i + 0];
m_index.buff[i + 0] = m_index.buff[i + 1];
m_index.buff[i + 1] = tmp;
}
}
break;
case GS_TRIANGLE_CLASS:
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
if(PRIM->IIP == 0)
{
for(size_t i = 0, j = m_index.tail; i < j; i += 3)
{
uint32 tmp = m_index.buff[i + 0];
m_index.buff[i + 0] = m_index.buff[i + 2];
m_index.buff[i + 2] = tmp;
}
}
break;
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
// each sprite converted to quad needs twice the space
while(m_vertex.tail * 2 > m_vertex.maxcount)
{
GrowVertexBuffer();
}
// assume vertices are tightly packed and sequentially indexed (it should be the case)
if(m_vertex.next >= 2)
{
size_t count = m_vertex.next;
int i = (int)count * 2 - 4;
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
GSVertexHW9 v0 = s[0];
GSVertexHW9 v1 = s[1];
v0.p = v0.p.xyzw(v1.p); // z, q
v0.t = v0.t.xyzw(v1.t); // c, f
q[0] = v0;
q[3] = v1;
// swap x, s
GSVector4 p = v0.p.insert<0, 0>(v1.p);
GSVector4 t = v0.t.insert<0, 0>(v1.t);
v1.p = v1.p.insert<0, 0>(v0.p);
v1.t = v1.t.insert<0, 0>(v0.t);
v0.p = p;
v0.t = t;
q[1] = v0;
q[2] = v1;
index[0] = i + 0;
index[1] = i + 1;
index[2] = i + 2;
index[3] = i + 1;
index[4] = i + 2;
index[5] = i + 3;
}
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
m_index.tail = count * 3;
}
break;
default:
__assume(0);
}
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
__super::Draw(rt, ds, tex);
__super::DrawPrims(rt, ds, tex);
}
void GSRendererDX9::UpdateFBA(GSTexture* rt)
@ -280,7 +220,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =

View File

@ -25,7 +25,7 @@
#include "GSVertexHW.h"
#include "GSTextureCache9.h"
class GSRendererDX9 : public GSRendererDX<GSVertexHW9>
class GSRendererDX9 : public GSRendererDX
{
protected:
struct
@ -34,14 +34,20 @@ protected:
Direct3DBlendState9 bs;
} m_fba;
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void UpdateFBA(GSTexture* rt);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
public:
GSRendererDX9();
virtual ~GSRendererDX9() {}
bool CreateDevice(GSDevice* dev);
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
};

View File

@ -21,3 +21,912 @@
#include "stdafx.h"
#include "GSRendererHW.h"
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
: GSRenderer(vt, vertex_stride)
, m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
, m_reset(false)
, m_upscale_multiplier(1)
{
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
if(!m_nativeres)
{
m_width = theApp.GetConfig("resx", m_width);
m_height = theApp.GetConfig("resy", m_height);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
if(m_upscale_multiplier > 6)
{
m_upscale_multiplier = 1; // use the normal upscale math
}
else if(m_upscale_multiplier > 1)
{
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else
{
m_upscale_multiplier = 1;
}
}
GSRendererHW::~GSRendererHW()
{
delete m_tc;
}
void GSRendererHW::SetGameCRC(uint32 crc, int options)
{
GSRenderer::SetGameCRC(crc, options);
m_hacks.SetGameCRC(m_game);
if(m_game.title == CRC::JackieChanAdv)
{
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
}
}
bool GSRendererHW::CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GSRendererHW::GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
void GSRendererHW::Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRenderer::Reset();
}
void GSRendererHW::VSync(int field)
{
GSRenderer::VSync(field);
m_tc->IncAge();
m_dev->AgePool();
m_skip = 0;
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void GSRendererHW::ResetDevice()
{
m_tc->RemoveAll();
GSRenderer::ResetDevice();
}
GSTexture* GSRendererHW::GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
{
t = rt->m_texture;
if(s_dump)
{
if(s_save && s_n >= s_saven)
{
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
}
return t;
}
void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void GSRendererHW::Draw()
{
if(m_dev->IsLost()) return;
#ifndef DISABLE_CRC_HACKS
if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
{
ASSERT(0);
return;
}
GSTextureCache::Source* tex = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) return;
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven && tex)
{
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
tex->m_texture->Save(s, true);
if(tex->m_palette)
{
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
tex->m_palette->Save(s, true);
}
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
{
return;
}
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(GSRenderer::TryAlphaTest(fm, zm))
{
context->TEST.ATST = ATST_ALWAYS;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
DrawPrims(rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
}
//
if(m_hacks.m_oo)
{
(this->*m_hacks.m_oo)();
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}
// hacks
GSRendererHW::Hacks::Hacks()
: m_oi_map(m_oi_list)
, m_oo_map(m_oo_list)
, m_cu_map(m_cu_list)
, m_oi(NULL)
, m_oo(NULL)
, m_cu(NULL)
{
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
}
void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
{
uint32 hash = (uint32)((game.region << 24) | game.title);
m_oi = m_oi_map[hash];
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
}
bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
static uint32* video = NULL;
static size_t lines = 0;
if(lines == 0)
{
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
{
lines = m_vertex.next / 2;
}
}
else
{
if(m_vt->m_primclass == GS_POINT_CLASS)
{
if(m_vertex.next >= 16 * 512)
{
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
if(!video) video = new uint32[512 * 512];
int ox = m_context->XYOFFSET.OFX;
int oy = m_context->XYOFFSET.OFY;
const uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{
int x = (GetPosX(v) - ox) >> 4;
int y = (GetPosY(v) - oy) >> 4;
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
}
return false;
}
else
{
lines = 0;
}
}
else if(m_vt->m_primclass == GS_LINE_CLASS)
{
if(m_vertex.next == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
m_dev->Recycle(t->m_texture);
t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
size_t stride = m_vertex.stride;
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
m_index.buff[0] = 0;
m_index.buff[1] = 1;
m_index.buff[2] = 2;
m_index.buff[3] = 1;
m_index.buff[4] = 2;
m_index.buff[5] = 3;
m_vertex.head = m_vertex.tail = m_vertex.next = 4;
m_index.tail = 6;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
}
else
{
lines = 0;
}
}
}
return true;
}
bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
uint32 TBP = m_context->TEX0.TBP0;
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
// missing red channel fix (looks alright in pcsx2 r5000+)
uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 r = (c >> 0) & 0xff;
uint32 g = (c >> 8) & 0xff;
uint32 b = (c >> 16) & 0xff;
if(r == 0 && g != 0 && b != 0)
{
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
}
}
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
return true;
}
bool GSRendererHW::OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
{
// z buffer clear
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
bool GSRendererHW::OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
{
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
// how? by using a render target that overlaps with the lower half of the z buffer...
// TODO: tony hawk pro skater 4 same problem, the empty half is not visible though, painted over fully
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(!PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
if(FBP == 0x008c0 && ZBP == 0x01a40)
{
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
GIFRegTEX0 TEX0;
TEX0.TBP0 = ZBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->FRAME.PSM;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
{
m_dev->ClearRenderTarget(rt->m_texture, 0);
}
return false;
}
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->ZBUF.PSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
}
return true;
}
bool GSRendererHW::OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
{
//only top half of the screen clears
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_vertex.next == 16)
{
uint8* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 16; i++, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else if(m_vertex.next == 256)
{
uint8* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 256; i++, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else
{
ASSERT(0);
}
}
}
return true;
}
void GSRendererHW::OO_DBZBT2()
{
// palette readback (cannot detect yet, when fetching the texture later)
uint32 FBP = m_context->FRAME.Block();
uint32 TBP0 = m_context->TEX0.TBP0;
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
}
}
void GSRendererHW::OO_MajokkoALaMode2()
{
// palette readback
uint32 FBP = m_context->FRAME.Block();
if(!PRIM->TME && FBP == 0x03f40)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
}
}
bool GSRendererHW::CU_DBZBT2()
{
// palette should stay 64 x 64
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03c00 && FBP != 0x03ac0;
}
bool GSRendererHW::CU_MajokkoALaMode2()
{
// palette should stay 16 x 16
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03f40;
}
bool GSRendererHW::CU_TalesOfAbyss()
{
// full image blur and brightening
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
}

View File

@ -26,34 +26,13 @@
#include "GSCrc.h"
#include "GSFunctionMap.h"
template<class Vertex>
class GSRendererHW : public GSRendererT<Vertex>
class GSRendererHW : public GSRenderer
{
protected:
using GSRendererT<Vertex>::m_vt;
using GSRendererT<Vertex>::m_count;
using GSRendererT<Vertex>::m_env;
using GSRendererT<Vertex>::m_context;
using GSRendererT<Vertex>::m_vertices;
using GSRendererT<Vertex>::m_dev;
using GSRendererT<Vertex>::PRIM;
using GSRendererT<Vertex>::m_mem;
using GSRendererT<Vertex>::m_regs;
using GSRendererT<Vertex>::m_perfmon;
using GSRendererT<Vertex>::m_game;
using GSRendererT<Vertex>::s_dump;
using GSRendererT<Vertex>::s_save;
using GSRendererT<Vertex>::s_saven;
using GSRendererT<Vertex>::s_savez;
using GSRendererT<Vertex>::s_n;
private:
int m_width;
int m_height;
int m_skip;
bool m_reset;
bool m_nativeres;
int m_upscale_multiplier;
int m_userhacks_skipdraw;
@ -63,505 +42,29 @@ private:
typedef void (GSRendererHW::*OO_Ptr)();
typedef bool (GSRendererHW::*CU_Ptr)();
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
static uint32* video = NULL;
static int lines = 0;
if(lines == 0)
{
if(m_vt.m_primclass == GS_LINE_CLASS && (m_count == 448 * 2 || m_count == 512 * 2))
{
lines = m_count / 2;
}
}
else
{
if(m_vt.m_primclass == GS_POINT_CLASS)
{
if(m_count >= 16 * 512)
{
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
if(!video) video = new uint32[512 * 512];
int ox = m_context->XYOFFSET.OFX;
int oy = m_context->XYOFFSET.OFY;
for(int i = 0; i < m_count; i++)
{
int x = ((int)m_vertices[i].p.x - ox) >> 4;
int y = ((int)m_vertices[i].p.y - oy) >> 4;
// video[y * 448 + x] = m_vertices[i].c0;
video[(y << 8) + (y << 7) + (y << 6) + x] = m_vertices[i]._c0();
}
return false;
}
else
{
lines = 0;
}
}
else if(m_vt.m_primclass == GS_LINE_CLASS)
{
if(m_count == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
m_dev->Recycle(t->m_texture);
t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
m_vertices[0] = m_vertices[0];
m_vertices[1] = m_vertices[1];
m_vertices[2] = m_vertices[m_count - 2];
m_vertices[3] = m_vertices[1];
m_vertices[4] = m_vertices[2];
m_vertices[5] = m_vertices[m_count - 1];
m_count = 6;
m_vt.Update(m_vertices, m_count, GS_TRIANGLE_CLASS);
}
else
{
lines = 0;
}
}
}
return true;
}
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
uint32 TBP = m_context->TEX0.TBP0;
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
// missing red channel fix
for(int i = 0, j = m_count; i < j; i++)
{
if(m_vertices[i]._r() == 0 && m_vertices[i]._g() != 0 && m_vertices[i]._b() != 0)
{
m_vertices[i]._r() = (m_vertices[i]._g() + m_vertices[i]._b()) / 2;
}
}
m_vt.Update(m_vertices, m_count, m_vt.m_primclass);
return true;
}
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
{
// z buffer clear
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
{
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
// how? by using a render target that overlaps with the lower half of the z buffer...
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(!PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
if(FBP == 0x008c0 && ZBP == 0x01a40)
{
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
GIFRegTEX0 TEX0;
TEX0.TBP0 = ZBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->FRAME.PSM;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
{
m_dev->ClearRenderTarget(rt->m_texture, 0);
}
return false;
}
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->ZBUF.PSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
}
return true;
}
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
{
//only top half of the screen clears
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_max.p.z == m_vt.m_min.p.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_count == 16)
{
for(int i = 0; i < 16; i++)
{
uint8 a = m_vertices[i]._a();
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
m_mem.WritePixel32(i & 7, i >> 3, m_vertices[i]._c0(), FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else if(m_count == 256)
{
for(int i = 0; i < 256; i++)
{
uint8 a = m_vertices[i]._a();
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
m_mem.WritePixel32(i & 15, i >> 4, m_vertices[i]._c0(), FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else
{
ASSERT(0);
}
}
}
return true;
}
void OO_DBZBT2()
{
// palette readback (cannot detect yet, when fetching the texture later)
uint32 FBP = m_context->FRAME.Block();
uint32 TBP0 = m_context->TEX0.TBP0;
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
}
}
void OO_MajokkoALaMode2()
{
// palette readback
uint32 FBP = m_context->FRAME.Block();
if(!PRIM->TME && FBP == 0x03f40)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
}
}
bool CU_DBZBT2()
{
// palette should stay 64 x 64
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03c00 && FBP != 0x03ac0;
}
bool CU_MajokkoALaMode2()
{
// palette should stay 16 x 16
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03f40;
}
bool CU_TalesOfAbyss()
{
// full image blur and brightening
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
}
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
void OO_DBZBT2();
void OO_MajokkoALaMode2();
bool CU_DBZBT2();
bool CU_MajokkoALaMode2();
bool CU_TalesOfAbyss();
class Hacks
{
@ -617,383 +120,37 @@ private:
OO_Ptr m_oo;
CU_Ptr m_cu;
Hacks()
: m_oi_map(m_oi_list)
, m_oo_map(m_oo_list)
, m_cu_map(m_cu_list)
, m_oi(NULL)
, m_oo(NULL)
, m_cu(NULL)
{
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
Hacks();
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
}
void SetGame(const CRC::Game& game)
{
uint32 hash = (uint32)((game.region << 24) | game.title);
m_oi = m_oi_map[hash];
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
}
void SetGameCRC(const CRC::Game& game);
} m_hacks;
virtual int GetPosX(const void* vertex) const = 0;
virtual int GetPosY(const void* vertex) const = 0;
virtual uint32 GetColor(const void* vertex) const = 0;
virtual void SetColor(void* vertex, uint32 c) const = 0;
#pragma endregion
protected:
GSTextureCache* m_tc;
void Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRendererT<Vertex>::Reset();
}
void VSync(int field)
{
GSRendererT<Vertex>::VSync(field);
m_tc->IncAge();
m_dev->AgePool();
m_skip = 0;
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void ResetDevice()
{
m_tc->RemoveAll();
GSRendererT<Vertex>::ResetDevice();
}
GSTexture* GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
{
t = rt->m_texture;
if(s_dump)
{
if(s_save && s_n >= s_saven)
{
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
}
return t;
}
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void Draw()
{
#ifndef DISABLE_CRC_HACKS
if(GSRendererT<Vertex>::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
{
ASSERT(0);
return;
}
GSTextureCache::Source* tex = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) return;
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven && tex)
{
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
tex->m_texture->Save(s, true);
if(tex->m_palette)
{
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
tex->m_palette->Save(s, true);
}
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
{
return;
}
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(GSRendererT<Vertex>::TryAlphaTest(fm, zm))
{
context->TEST.ATST = ATST_ALWAYS;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
Draw(rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
}
//
if(m_hacks.m_oo)
{
(this->*m_hacks.m_oo)();
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}
virtual void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
bool CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
public:
GSRendererHW(GSTextureCache* tc)
: GSRendererT<Vertex>()
, m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
, m_reset(false)
, m_upscale_multiplier(1)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
virtual ~GSRendererHW();
if(!m_nativeres)
{
m_width = theApp.GetConfig("resx", m_width);
m_height = theApp.GetConfig("resy", m_height);
void SetGameCRC(uint32 crc, int options);
bool CanUpscale();
int GetUpscaleMultiplier();
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
if(m_upscale_multiplier > 6)
{
m_upscale_multiplier = 1; // use the normal upscale math
}
else if(m_upscale_multiplier > 1)
{
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else m_upscale_multiplier = 1;
}
virtual ~GSRendererHW()
{
delete m_tc;
}
void SetGameCRC(uint32 crc, int options)
{
GSRendererT<Vertex>::SetGameCRC(crc, options);
m_hacks.SetGame(m_game);
if(m_game.title == CRC::JackieChanAdv)
{
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
}
}
void Reset();
void VSync(int field);
void ResetDevice();
GSTexture* GetOutput(int i);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void Draw();
};

View File

@ -23,9 +23,20 @@
#include "GSRenderer.h"
class GSRendererNull : public GSRendererT<GSVertexNull>
class GSRendererNull : public GSRenderer
{
class GSVertexTraceNull : public GSVertexTrace
{
public:
GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {}
};
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index)
{
}
void Draw()
{
}
@ -37,12 +48,8 @@ protected:
public:
GSRendererNull()
: GSRendererT<GSVertexNull>()
{
InitVertexKick(GSRendererNull);
}
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip)
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
{
InitConvertVertex(GSRendererNull);
}
};

View File

@ -25,9 +25,12 @@
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSRendererSW::GSRendererSW(int threads)
: m_fzb(NULL)
: GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW))
, m_fzb(NULL)
{
InitVertexKick(GSRendererSW);
InitConvertVertex(GSRendererSW);
m_nativeres = true; // ignore ini, sw is always native
m_tc = new GSTextureCacheSW(this);
@ -62,46 +65,32 @@ void GSRendererSW::Reset()
m_reset = true;
GSRendererT<GSVertexSW>::Reset();
GSRenderer::Reset();
}
void GSRendererSW::VSync(int field)
{
Sync(0); // IncAge might delete a cached texture in use
/*
printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n",
int draw[8], sum = 0;
for(int i = 0; i < countof(draw); i++)
{
draw[i] = m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
sum += draw[i];
}
printf("CPU %d Sync %d W %d %d %d %d %d %d %d %d (%d)\n",
m_perfmon.CPU(GSPerfMon::Main),
m_perfmon.CPU(GSPerfMon::Sync),
m_perfmon.CPU(GSPerfMon::WorkerSync0),
m_perfmon.CPU(GSPerfMon::WorkerSleep0),
m_perfmon.CPU(GSPerfMon::WorkerDraw0),
m_perfmon.CPU(GSPerfMon::WorkerSync1),
m_perfmon.CPU(GSPerfMon::WorkerSleep1),
m_perfmon.CPU(GSPerfMon::WorkerDraw1),
m_perfmon.CPU(GSPerfMon::WorkerSync2),
m_perfmon.CPU(GSPerfMon::WorkerSleep2),
m_perfmon.CPU(GSPerfMon::WorkerDraw2),
m_perfmon.CPU(GSPerfMon::WorkerSync3),
m_perfmon.CPU(GSPerfMon::WorkerSleep3),
m_perfmon.CPU(GSPerfMon::WorkerDraw3),
m_perfmon.CPU(GSPerfMon::WorkerSync4),
m_perfmon.CPU(GSPerfMon::WorkerSleep4),
m_perfmon.CPU(GSPerfMon::WorkerDraw4),
m_perfmon.CPU(GSPerfMon::WorkerSync5),
m_perfmon.CPU(GSPerfMon::WorkerSleep5),
m_perfmon.CPU(GSPerfMon::WorkerDraw5),
m_perfmon.CPU(GSPerfMon::WorkerSync6),
m_perfmon.CPU(GSPerfMon::WorkerSleep6),
m_perfmon.CPU(GSPerfMon::WorkerDraw6),
m_perfmon.CPU(GSPerfMon::WorkerSync7),
m_perfmon.CPU(GSPerfMon::WorkerSleep7),
m_perfmon.CPU(GSPerfMon::WorkerDraw7));
draw[0], draw[1], draw[2], draw[3], draw[4], draw[5], draw[6], draw[7], sum);
//
printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0;
printf("m_syncpoint_count = %d\n", ((GSRasterizerList*)m_rl)->m_syncpoint_count); ((GSRasterizerList*)m_rl)->m_syncpoint_count = 0;
*/
GSRendererT<GSVertexSW>::VSync(field);
GSRenderer::VSync(field);
m_tc->IncAge();
@ -162,91 +151,193 @@ GSTexture* GSRendererSW::GetOutput(int i)
return m_texture[i];
}
void GSRendererSW::Draw()
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
{
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
GSVector4i scissor = GSVector4i(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
ASSERT(d->_pad.u32[0] != 0x12345678);
scissor.z = std::min<int>(scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
uint32 z = s->XYZ.Z;
GSVector4i r = bbox.rintersect(scissor);
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
list<uint32>* fb_pages = m_context->offset.fb->GetPages(r);
list<uint32>* zb_pages = m_context->offset.zb->GetPages(r);
GSVector4 p, t, c;
GSRasterizerData2* data2 = new GSRasterizerData2(this, fb_pages, zb_pages);
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
shared_ptr<GSRasterizerData> data(data2);
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
if(!GetScanlineGlobalData(*gd))
if(tme)
{
return;
if(fst)
{
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
}
}
data->scissor = scissor;
data->bbox = bbox;
data->primclass = m_vt.m_primclass;
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later?
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here
data->count = m_count;
data->solidrect = gd->sel.IsSolidRect();
data->frame = m_perfmon.GetFrame();
c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
d->p = p;
d->c = c;
d->t = t;
#ifdef _DEBUG
d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once
#endif
if(prim == GS_SPRITE)
{
d->t.u32[3] = z;
}
}
#define LOG 0
FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
void GSRendererSW::Draw()
{
SharedData* sd = new SharedData(this);
shared_ptr<GSRasterizerData> data(sd);
sd->primclass = m_vt->m_primclass;
sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
sd->vertex = (GSVertexSW*)sd->buff;
sd->vertex_count = m_vertex.next;
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
sd->index_count = m_index.tail;
memcpy(sd->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
for(size_t i = 0; i < m_index.tail; i++)
{
ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678);
}
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
// TODO: m_tc->InvalidatePages must be called after texture->Update, move that inside GSRasterizerData::Update too
if(!GetScanlineGlobalData(sd)) return;
//
if(gd->sel.fwrite)
const GSDrawingContext* context = m_context;
GSScanlineGlobalData& gd = sd->global;
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
sd->scissor = scissor;
sd->bbox = bbox;
sd->frame = m_perfmon.GetFrame();
//
uint32* fb_pages = NULL;
uint32* zb_pages = NULL;
GSVector4i r = bbox.rintersect(scissor);
if(gd.sel.fwrite)
{
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
fb_pages = context->offset.fb->GetPages(r);
m_tc->InvalidatePages(fb_pages, context->offset.fb->psm);
}
if(gd->sel.zwrite)
if(gd.sel.zwrite)
{
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
zb_pages = context->offset.zb->GetPages(r);
m_tc->InvalidatePages(zb_pages, context->offset.zb->psm);
}
// set data->syncpoint
if(m_fzb != m_context->offset.fzb)
if(m_fzb != context->offset.fzb)
{
m_fzb = m_context->offset.fzb;
// hmm, what if "r" gets bigger next time and slips through unchecked, need to trace that too
data->syncpoint = true;
sd->syncpoint = true; // TODO
if(!sd->syncpoint)
{
if(fb_pages == NULL)
{
fb_pages = context->offset.fb->GetPages(r);
}
// - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue
// - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
if(CheckTargetPages<0xffffffff>(fb_pages))
{
sd->syncpoint = true;
if(!data->syncpoint)
{
if(gd->sel.fwrite)
{
for(list<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0xffff0000) // already used as a z-buffer
{
data->syncpoint = true;
break;
}
}
if(LOG) fprintf(s_fp, "syncpoint 0\n");
}
}
if(!data->syncpoint)
if(!sd->syncpoint)
{
if(gd->sel.zwrite)
if(zb_pages == NULL)
{
for(list<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0x0000ffff) // already used as a frame buffer
{
data->syncpoint = true;
zb_pages = context->offset.zb->GetPages(r);
}
break;
if(CheckTargetPages<0xffffffff>(zb_pages))
{
sd->syncpoint = true;
if(LOG) fprintf(s_fp, "syncpoint 1\n");
}
}
if(!sd->syncpoint)
{
if(LOG) fprintf(s_fp, "no syncpoint *\n");
}
m_fzb = context->offset.fzb;
}
else
{
// chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue,
// m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output
// is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
if(!sd->syncpoint)
{
if(gd.sel.fwrite)
{
if(CheckTargetPages<0xffff0000>(fb_pages)) // already used as a z-buffer
{
sd->syncpoint = true;
if(LOG) fprintf(s_fp, "syncpoint 2\n");
}
}
}
if(!sd->syncpoint)
{
if(gd.sel.zwrite)
{
if(CheckTargetPages<0x0000ffff>(zb_pages)) // already used as a frame buffer
{
sd->syncpoint = true;
if(LOG) fprintf(s_fp, "syncpoint 3\n");
}
}
}
@ -254,7 +345,7 @@ void GSRendererSW::Draw()
//
data2->UseTargetPages();
sd->UseTargetPages(fb_pages, zb_pages);
//
@ -313,21 +404,15 @@ void GSRendererSW::Draw()
}
else
{
if(LOG) fprintf(s_fp, "queue %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
m_context->FRAME.Block(), m_context->FRAME.PSM,
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
PRIM->PRIM, sd->vertex_count, sd->index_count);
m_rl->Queue(data);
}
int prims = 0;
switch(data->primclass)
{
case GS_POINT_CLASS: prims = data->count; break;
case GS_LINE_CLASS: prims = data->count / 2; break;
case GS_TRIANGLE_CLASS: prims = data->count / 3; break;
case GS_SPRITE_CLASS: prims = data->count / 2; break;
}
m_perfmon.Put(GSPerfMon::Prim, prims);
/*
if(0)//stats.ticks > 5000000)
{
@ -346,45 +431,59 @@ void GSRendererSW::Sync(int reason)
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
uint64 t = __rdtsc();
m_rl->Sync();
// NOTE: m_fzb_pages is refcounted, zeroing is done automatically
s_n++;
memset(m_tex_pages, 0, sizeof(m_tex_pages));
t = __rdtsc() - t;
if(LOG) fprintf(s_fp, "sync n=%d r=%d t=%lld p=%d %c\n", s_n, reason, t, m_rl->GetPixels(), t > 10000000 ? '*' : ' ');
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
}
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
list<uint32>* pages = o->GetPages(r);
uint32* RESTRICT p = m_tmp_pages;
m_tc->InvalidatePages(pages, o->psm);
o->GetPages(r, p);
// check if the changing pages either used as a texture or a target
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(; *p != GSOffset::EOP; p++)
{
uint32 page = *i;
uint32 page = *p;
if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31))))
//while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause();
if(m_fzb_pages[page] | m_tex_pages[page])
{
Sync(5);
break;
}
}
m_tc->InvalidatePages(m_tmp_pages, o->psm); // if texture update runs on a thread and Sync(5) happens then this must come later
}
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
list<uint32>* pages = o->GetPages(r);
uint32* RESTRICT p = m_tmp_pages;
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
o->GetPages(r, p);
for(; *p != GSOffset::EOP; p++)
{
if(m_fzb_pages[*i])
//while(m_fzb_pages[*p]) _mm_pause();
if(m_fzb_pages[*p])
{
Sync(6);
@ -393,52 +492,84 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
}
}
void GSRendererSW::UseTargetPages(const list<uint32>* pages, int offset)
void GSRendererSW::UsePages(const uint32* pages, int type)
{
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
if(type < 2)
{
ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX);
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset);
_InterlockedIncrement16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
//while(m_fzb_pages[*p]) _mm_pause();
void GSRendererSW::ReleaseTargetPages(const list<uint32>* pages, int offset)
{
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0);
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset);
}
}
void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t)
{
for(list<uint32>::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
{
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
if(m_fzb_pages[*p]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
{
Sync(7);
return;
break;
}
}
}
for(size_t i = 0; i < countof(t->m_pages.bm); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used
ASSERT(m_tex_pages[*p] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_tex_pages[*p]); // remember which texture pages are used
}
}
}
void GSRendererSW::ReleasePages(const uint32* pages, int type)
{
if(type < 2)
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] > 0);
_InterlockedDecrement16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*p] > 0);
_InterlockedDecrement16((short*)&m_tex_pages[*p]);
}
}
}
template<uint32 mask> bool GSRendererSW::CheckTargetPages(const uint32* pages)
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
if(mask != 0xffffffff ? (m_fzb_pages[*p] & mask) : m_fzb_pages[*p])
{
return true;
}
}
return false;
}
#include "GSTextureSW.h"
bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{
GSScanlineGlobalData& gd = data->global;
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
const GS_PRIM_CLASS primclass = m_vt->m_primclass;
gd.vm = m_mem.m_vm8;
@ -456,7 +587,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.atst = ATST_ALWAYS;
gd.sel.tfx = TFX_NONE;
gd.sel.ababcd = 255;
gd.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
gd.sel.prim = primclass;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
@ -500,7 +631,12 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
bool zwrite = zm != 0xffffffff;
bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
/*
printf("%05x %d %05x %d %05x %d %dx%d\n",
fwrite || ftest ? m_context->FRAME.Block() : 0xfffff, m_context->FRAME.PSM,
zwrite || ztest ? m_context->ZBUF.Block() : 0xfffff, m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH);
*/
if(!fwrite && !zwrite) return false;
gd.sel.fwrite = fwrite;
@ -510,7 +646,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff)
{
gd.sel.iip = PRIM->IIP;
}
@ -520,7 +656,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt.IsLinear();
gd.sel.ltf = m_vt->IsLinear();
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
@ -534,7 +670,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
@ -545,7 +681,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(t == NULL) {ASSERT(0); return false;}
UseSourcePages(t);
data->UseSourcePages(t, 0);
GSVector4i r;
@ -553,7 +689,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(!t->Update(r)) {ASSERT(0); return false;}
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0)
{
uint64 frame = m_perfmon.GetFrame();
@ -570,7 +706,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.tex[0] = t->m_buff;
gd.sel.tw = t->m_tw - 3;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0)
{
// TEX1.MMIN
// 000 p
@ -580,13 +716,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// 100 l round
// 101 l tri
if(m_vt.m_lod.x > 0)
if(m_vt->m_lod.x > 0)
{
gd.sel.ltf = context->TEX1.MMIN >> 2;
}
else
{
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0
}
gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
@ -595,9 +731,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16);
int k = context->TEX1.K << 12;
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL)
{
k = (int)m_vt.m_lod.x << 16; // set lod to max level
k = (int)m_vt->m_lod.x << 16; // set lod to max level
gd.sel.lcm = 1; // lod is constant
gd.sel.mmin = 1; // tri-linear is meaningless
@ -611,7 +747,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(gd.sel.fst)
{
ASSERT(gd.sel.lcm == 1);
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
gd.sel.lcm = 1;
}
@ -640,8 +776,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
GSVector4 tmin = m_vt.m_min.t;
GSVector4 tmax = m_vt.m_max.t;
GSVector4 tmin = m_vt->m_min.t;
GSVector4 tmax = m_vt->m_max.t;
static int s_counter = 0;
@ -691,14 +827,14 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
MIP_CLAMP.MAXU >>= 1;
MIP_CLAMP.MAXV >>= 1;
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
m_vt->m_min.t *= 0.5f;
m_vt->m_max.t *= 0.5f;
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;}
UseSourcePages(t);
data->UseSourcePages(t, i);
GSVector4i r;
@ -734,8 +870,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
s_counter++;
m_vt.m_min.t = tmin;
m_vt.m_max.t = tmax;
m_vt->m_min.t = tmin;
m_vt->m_max.t = tmax;
}
else
{
@ -743,17 +879,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{
// skip per pixel division if q is constant
GSVertexSW* v = m_vertices;
GSVertexSW* RESTRICT v = data->vertex;
if(m_vt.m_eq.q)
if(m_vt->m_eq.q)
{
gd.sel.fst = 1;
if(v[0].t.z != 1.0f)
{
GSVector4 w = v[0].t.zzzz().rcpnr();
const GSVector4& t = v[data->index[0]].t;
for(int i = 0, j = m_count; i < j; i++)
if(t.z != 1.0f)
{
GSVector4 w = t.zzzz().rcpnr();
for(int i = 0, j = data->vertex_count; i < j; i++)
{
GSVector4 t = v[i].t;
@ -765,7 +903,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{
gd.sel.fst = 1;
for(int i = 0, j = m_count; i < j; i += 2)
for(int i = 0, j = data->vertex_count; i < j; i += 2)
{
GSVector4 t0 = v[i + 0].t;
GSVector4 t1 = v[i + 1].t;
@ -786,9 +924,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
GSVector4 half(0x8000, 0x8000);
GSVertexSW* v = m_vertices;
GSVertexSW* RESTRICT v = data->vertex;
for(int i = 0, j = m_count; i < j; i++)
for(int i = 0, j = data->vertex_count; i < j; i++)
{
GSVector4 t = v[i].t;
@ -920,7 +1058,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000;
}
gd.fm = GSVector4i(fm);
@ -950,149 +1088,73 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
return true;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::VertexKick(bool skip)
GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
: m_parent(parent)
, m_fb_pages(NULL)
, m_zb_pages(NULL)
, m_using_pages(false)
{
const GSDrawingContext* context = m_context;
m_tex_pages[0] = NULL;
GSVertexSW& dst = m_vl.AddTail();
global.sel.key = 0;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme)
{
GSVector4 t;
if(fst)
{
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
global.clut = NULL;
global.dimx = NULL;
}
dst.t = t;
GSRendererSW::SharedData::~SharedData()
{
if(m_using_pages)
{
if(global.sel.fwrite)
{
m_parent->ReleasePages(m_fb_pages, 0);
}
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
if(prim == GS_SPRITE)
if(global.sel.zwrite)
{
dst.t.u32[3] = m_v.XYZ.Z;
}
int count = 0;
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
{
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(prim);
if(!m_dump)
{
GSVector4 pmin, pmax;
switch(primclass)
{
case GS_POINT_CLASS:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLE_CLASS:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}
GSVector4 scissor = context->scissor.ex;
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(primclass)
{
case GS_TRIANGLE_CLASS:
case GS_SPRITE_CLASS:
test |= pmin.ceil() == pmax.ceil();
break;
}
switch(primclass)
{
case GS_TRIANGLE_CLASS:
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
if(test.mask() & 3)
{
return;
}
}
switch(primclass)
{
case GS_POINT_CLASS:
break;
case GS_LINE_CLASS:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
break;
case GS_TRIANGLE_CLASS:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
break;
case GS_SPRITE_CLASS:
break;
}
if(m_count < 30 && m_count >= 3)
{
int tl = 0;
int br = 0;
if(primclass == GS_TRIANGLE_CLASS && GSVertexSW::IsQuad(&m_vertices[m_count - 3], tl, br))
{
m_count -= 3;
if(m_count > 0)
{
tl += m_count;
br += m_count;
Flush();
}
if(tl != 0) m_vertices[0] = m_vertices[tl];
if(br != 1) m_vertices[1] = m_vertices[br];
m_vertices[0].t.u32[3] = m_v.XYZ.Z;
m_vertices[1].t.u32[3] = m_v.XYZ.Z;
m_count = 2;
uint32 tmp = PRIM->PRIM;
PRIM->PRIM = GS_SPRITE;
Flush();
PRIM->PRIM = tmp;
m_perfmon.Put(GSPerfMon::Quad, 1);
return;
m_parent->ReleasePages(m_zb_pages, 1);
}
}
m_count += count;
delete m_fb_pages;
delete m_zb_pages;
// Flush();
for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++)
{
m_parent->ReleasePages(m_tex_pages[i], 2);
}
if(global.clut) _aligned_free(global.clut);
if(global.dimx) _aligned_free(global.dimx);
}
void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
{
if(m_using_pages) return;
m_fb_pages = fb_pages;
m_zb_pages = zb_pages;
if(global.sel.fwrite)
{
m_parent->UsePages(fb_pages, 0);
}
if(global.sel.zwrite)
{
m_parent->UsePages(zb_pages, 1);
}
m_using_pages = true;
}
void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
{
ASSERT(m_tex_pages[level] == NULL);
m_tex_pages[level] = t->m_pages.n;
m_tex_pages[level + 1] = NULL;
m_parent->UsePages(t->m_pages.n, 2);
}

View File

@ -25,83 +25,22 @@
#include "GSTextureCacheSW.h"
#include "GSDrawScanline.h"
class GSRendererSW : public GSRendererT<GSVertexSW>
class GSRendererSW : public GSRenderer
{
class GSRasterizerData2 : public GSRasterizerData
class SharedData : public GSDrawScanline::SharedData
{
GSRendererSW* m_parent;
const list<uint32>* m_fb_pages;
const list<uint32>* m_zb_pages;
const uint32* m_fb_pages;
const uint32* m_zb_pages;
const uint32* m_tex_pages[7 + 1]; // NULL terminated
bool m_using_pages;
public:
GSRasterizerData2(GSRendererSW* parent, const list<uint32>* fb_pages, const list<uint32>* zb_pages)
: m_parent(parent)
, m_fb_pages(fb_pages)
, m_zb_pages(zb_pages)
, m_using_pages(false)
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
SharedData(GSRendererSW* parent);
virtual ~SharedData();
gd->sel.key = 0;
gd->clut = NULL;
gd->dimx = NULL;
param = gd;
}
virtual ~GSRasterizerData2()
{
ReleaseTargetPages();
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
if(gd->dimx) _aligned_free(gd->dimx);
_aligned_free(gd);
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
}
void UseTargetPages()
{
if(m_using_pages) {ASSERT(0); return;}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
{
m_parent->UseTargetPages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_parent->UseTargetPages(m_zb_pages, 1);
}
m_using_pages = true;
}
void ReleaseTargetPages()
{
if(!m_using_pages) {ASSERT(0); return;}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
{
m_parent->ReleaseTargetPages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_parent->ReleaseTargetPages(m_zb_pages, 1);
}
m_using_pages = false;
}
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
};
protected:
@ -112,7 +51,8 @@ protected:
bool m_reset;
GSPixelOffset4* m_fzb;
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint32 m_tex_pages[16];
uint16 m_tex_pages[512];
uint32 m_tmp_pages[512 + 1];
void Reset();
void VSync(int field);
@ -124,16 +64,16 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UseTargetPages(const list<uint32>* pages, int offset);
void ReleaseTargetPages(const list<uint32>* pages, int offset);
void UseSourcePages(const GSTextureCacheSW::Texture* t);
void UsePages(const uint32* pages, int type);
void ReleasePages(const uint32* pages, int type);
template<uint32 mask> bool CheckTargetPages(const uint32* pages);
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);
bool GetScanlineGlobalData(SharedData* data);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
public:
GSRendererSW(int threads);
virtual ~GSRendererSW();
template<uint32 prim, uint32 tme, uint32 fst>
void VertexKick(bool skip);
};

View File

@ -61,12 +61,12 @@ union GSScanlineSelector
uint32 colclamp:1; // 43
uint32 fba:1; // 44
uint32 dthe:1; // 45
uint32 sprite:1; // 46
uint32 edge:1; // 47
uint32 prim:2; // 46
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 lcm:1; // 49
uint32 mmin:2; // 50
uint32 edge:1; // 48
uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 lcm:1; // 50
uint32 mmin:2; // 51
};
struct
@ -92,7 +92,7 @@ union GSScanlineSelector
bool IsSolidRect() const
{
return sprite
return prim == GS_SPRITE_CLASS
&& iip == 0
&& tfx == TFX_NONE
&& abe == 0

View File

@ -317,9 +317,9 @@ void GSSettingsDlg::UpdateControls()
EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw && GSDevice9::GetMaxDepth(m_lastValidMsaa) < 32);
EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw);
//EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used
//EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
//EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_MSAAEDIT), hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_MSAA), hw);

View File

@ -27,10 +27,17 @@
using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
void GSSetupPrimCodeGenerator::Generate()
{
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 5; i++)
{
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
@ -53,7 +60,7 @@ void GSSetupPrimCodeGenerator::Depth()
return;
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4 p = dscan.p;
@ -107,7 +114,12 @@ void GSSetupPrimCodeGenerator::Depth()
}
else
{
// GSVector4 p = vertices[0].p;
// GSVector4 p = vertex[index[1]].p;
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -312,7 +324,25 @@ void GSSetupPrimCodeGenerator::Color()
}
else
{
// GSVector4i c = GSVector4i(vertices[0].c);
// GSVector4i c = GSVector4i(vertex[index[last].c);
int last = 0;
switch(m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * last]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
}
vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

View File

@ -27,10 +27,17 @@
using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
void GSSetupPrimCodeGenerator::Generate()
{
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 5; i++)
{
movaps(Xmm(3 + i), ptr[&m_shift[i]]);
@ -53,7 +60,7 @@ void GSSetupPrimCodeGenerator::Depth()
return;
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4 p = dscan.p;
@ -112,7 +119,12 @@ void GSSetupPrimCodeGenerator::Depth()
}
else
{
// GSVector4 p = vertices[0].p;
// GSVector4 p = vertex[index[1]].p;
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -327,7 +339,25 @@ void GSSetupPrimCodeGenerator::Color()
}
else
{
// GSVector4i c = GSVector4i(vertices[0].c);
// GSVector4i c = GSVector4i(vertex[index[last].c);
int last = 0;
switch(m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * last]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
}
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@
#include "GSDrawingContext.h"
#include "GSDrawingEnvironment.h"
#include "GSVertex.h"
#include "GSVertexList.h"
#include "GSVertexTrace.h"
#include "GSUtil.h"
#include "GSPerfMon.h"
#include "GSVector.h"
@ -42,13 +42,14 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r);
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4];
void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
@ -56,8 +57,9 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r);
GIFRegHandler m_fpGIFRegHandlers[256];
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
void ApplyTEX0(int i, GIFRegTEX0& TEX0);
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
void ApplyPRIM(const GIFRegPRIM& PRIM);
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
@ -65,13 +67,11 @@ class GSState : public GSAlignedClass<32>
void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r);
void GIFRegHandlerST(const GIFReg* RESTRICT r);
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF3(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ3(const GIFReg* RESTRICT r);
void GIFRegHandlerNOP(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX1(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX2(const GIFReg* RESTRICT r);
@ -126,49 +126,68 @@ class GSState : public GSAlignedClass<32>
} m_tr;
void FlushWrite();
protected:
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
typedef void (GSState::*VertexKickPtr)(bool skip);
GSVertex m_v;
float m_q;
GSVector4 m_scissor;
uint32 m_ofxy;
VertexKickPtr m_vk[8][2][2];
VertexKickPtr m_vkf;
#define InitVertexKick3(T, P, N, M) \
m_vk[P][N][M] = (VertexKickPtr)(void (T::*)(bool))&T::VertexKick<P, N, M>;
#define InitVertexKick2(T, P) \
InitVertexKick3(T, P, 0, 0) \
InitVertexKick3(T, P, 0, 1) \
InitVertexKick3(T, P, 1, 0) \
InitVertexKick3(T, P, 1, 1) \
#define InitVertexKick(T) \
InitVertexKick2(T, GS_POINTLIST) \
InitVertexKick2(T, GS_LINELIST) \
InitVertexKick2(T, GS_LINESTRIP) \
InitVertexKick2(T, GS_TRIANGLELIST) \
InitVertexKick2(T, GS_TRIANGLESTRIP) \
InitVertexKick2(T, GS_TRIANGLEFAN) \
InitVertexKick2(T, GS_SPRITE) \
InitVertexKick2(T, GS_INVALID) \
void UpdateVertexKick()
struct
{
m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST];
}
uint8* buff;
size_t stride;
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
GSVector4 xy[4];
size_t xy_tail;
uint8* tmp;
} m_vertex;
void VertexKickNull(bool skip)
struct
{
ASSERT(0);
}
uint32* buff;
size_t tail;
} m_index;
void VertexKick(bool skip)
{
(this->*m_vkf)(skip);
}
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
#define InitConvertVertex2(T, P) \
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
#define InitConvertVertex(T) \
InitConvertVertex2(T, GS_POINTLIST) \
InitConvertVertex2(T, GS_LINELIST) \
InitConvertVertex2(T, GS_LINESTRIP) \
InitConvertVertex2(T, GS_TRIANGLELIST) \
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
InitConvertVertex2(T, GS_TRIANGLEFAN) \
InitConvertVertex2(T, GS_SPRITE) \
InitConvertVertex2(T, GS_INVALID) \
void UpdateContext();
void UpdateScissor();
virtual void UpdateVertexKick();
void GrowVertexBuffer();
template<uint32 prim>
void VertexKick(uint32 skip);
// following functions need m_vt to be initialized
GSVertexTrace* m_vt;
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public:
GIFPath m_path[4];
@ -177,10 +196,6 @@ public:
GSLocalMemory m_mem;
GSDrawingEnvironment m_env;
GSDrawingContext* m_context;
GSVertex m_v;
float m_q;
uint32 m_vprim;
GSPerfMon m_perfmon;
uint32 m_crc;
int m_options;
@ -188,9 +203,10 @@ public:
bool m_framelimit;
CRC::Game m_game;
GSDump m_dump;
bool m_nativeres;
public:
GSState();
GSState(GSVertexTrace* vt, size_t vertex_stride);
virtual ~GSState();
void ResetHandlers();
@ -205,8 +221,9 @@ public:
virtual void Reset();
virtual void Flush();
virtual void FlushPrim() = 0;
virtual void ResetPrim() = 0;
virtual void FlushPrim();
virtual void FlushWrite();
virtual void Draw() = 0;
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}

View File

@ -319,11 +319,13 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
GSVector4i r;
list<uint32>* pages = o->GetPages(rect, &r);
uint32* pages = (uint32*)m_temp;
o->GetPages(rect, pages, &r);
bool found = false;
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
@ -337,22 +339,24 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{
uint32* RESTRICT valid = s->m_valid;
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target)
{
if(s->m_repeating)
{
list<GSVector2i>& l = s->m_p2t[page];
vector<GSVector2i>& l = s->m_p2t[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
for(vector<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
s->m_valid[k->x] &= k->y;
valid[k->x] &= k->y;
}
}
else
{
s->m_valid[page] = 0;
valid[page] = 0;
}
s->m_complete = false;

View File

@ -75,7 +75,7 @@ public:
bool m_target;
bool m_complete;
bool m_repeating;
list<GSVector2i>* m_p2t;
vector<GSVector2i>* m_p2t;
public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp);

View File

@ -74,18 +74,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
m_textures.insert(t);
for(list<uint32>::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
{
m_map[*i].push_front(t);
m_map[*p].push_front(t);
}
}
return t;
}
void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm)
void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
{
for(list<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
@ -95,20 +95,22 @@ void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm)
{
Texture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
if(GSUtil::HasSharedBits(psm, t->m_sharedbits))
{
uint32* RESTRICT valid = t->m_valid;
if(t->m_repeating)
{
list<GSVector2i>& l = t->m_p2t[page];
vector<GSVector2i>& l = t->m_p2t[page];
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
for(vector<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
t->m_valid[j->x] &= j->y;
valid[j->x] &= j->y;
}
}
else
{
t->m_valid[page] = 0;
valid[page] = 0;
}
t->m_complete = false;
@ -179,16 +181,17 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
memset(m_valid, 0, sizeof(m_valid));
memset(m_pages.bm, 0, sizeof(m_pages.bm));
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++)
{
uint32 page = *i;
uint32 page = *p;
m_pages.bm[page >> 5] |= 1 << (page & 31);
m_pages.n.push_back(page);
}
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
@ -201,6 +204,8 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
GSTextureCacheSW::Texture::~Texture()
{
delete [] m_pages.n;
if(m_buff)
{
_aligned_free(m_buff);
@ -267,22 +272,29 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
uint8* dst = (uint8*)m_buff + pitch * r.top;
int block_pitch = pitch * bs.y;
r = r.srl32(3);
bs.x >>= 3;
bs.y >>= 3;
shift += 3;
if(m_repeating)
{
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
uint32 base = o->block.row[y];
for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
uint32 block = base + o->block.col[x];
if(block < MAX_BLOCKS)
{
uint32 addr = i >> 3;
uint32 row = addr >> 5;
uint32 col = 1 << (addr & 31);
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
if((m_valid[row] & col) == 0)
{
@ -298,13 +310,13 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
}
else
{
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
uint32 base = o->block.row[y];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
uint32 block = base + o->block.col[x];
if(block < MAX_BLOCKS)
{

View File

@ -38,9 +38,10 @@ public:
uint32 m_age;
bool m_complete;
bool m_repeating;
list<GSVector2i>* m_p2t;
vector<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES];
struct {uint32 bm[16]; list<uint32> n;} m_pages;
struct {uint32 bm[16]; const uint32* n;} m_pages;
const uint32* RESTRICT m_sharedbits;
// m_valid
// fast mode: each uint32 bits map to the 32 blocks of that page
@ -64,7 +65,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidatePages(const list<uint32>* pages, uint32 psm);
void InvalidatePages(const uint32* pages, uint32 psm);
void RemoveAll();
void RemoveAt(Texture* t);

View File

@ -82,9 +82,10 @@ bool GSDevice11::CreateTextureFX()
return true;
}
void GSDevice11::SetupIA(const void* vertices, int count, int prim)
void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{
IASetVertexBuffer(vertices, sizeof(GSVertexHW11), count);
IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
}

View File

@ -61,9 +61,10 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
return t;
}
void GSDevice9::SetupIA(const void* vertices, int count, int prim)
void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{
IASetVertexBuffer(vertices, sizeof(GSVertexHW9), count);
IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
}

View File

@ -275,10 +275,7 @@ protected:
if(m_exit) {m_cv.lock.Unlock(); return;}
}
{
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();
T& item = m_queue.front();
m_cv.lock.Unlock();
@ -287,7 +284,6 @@ protected:
m_cv.lock.Lock();
m_queue.pop();
}
if(m_queue.empty())
{
@ -312,10 +308,7 @@ protected:
m_ev.lock.Lock();
}
{
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();
T& item = m_queue.front();
m_ev.lock.Unlock();
@ -324,11 +317,9 @@ protected:
m_ev.lock.Lock();
m_queue.pop();
}
_InterlockedDecrement(&m_ev.count);
}
}
}

View File

@ -91,6 +91,7 @@ static class GSUtilMaps
{
public:
uint8 PrimClassField[8];
uint8 VertexCountField[8];
uint32 CompatibleBitsField[64][2];
uint32 SharedBitsField[64][2];
@ -105,6 +106,15 @@ public:
PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS;
PrimClassField[GS_INVALID] = GS_INVALID_CLASS;
VertexCountField[GS_POINTLIST] = 1;
VertexCountField[GS_LINELIST] = 2;
VertexCountField[GS_LINESTRIP] = 2;
VertexCountField[GS_TRIANGLELIST] = 3;
VertexCountField[GS_TRIANGLESTRIP] = 3;
VertexCountField[GS_TRIANGLEFAN] = 3;
VertexCountField[GS_SPRITE] = 2;
VertexCountField[GS_INVALID] = 1;
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
for(int i = 0; i < 64; i++)
@ -146,6 +156,21 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
return (GS_PRIM_CLASS)s_maps.PrimClassField[prim];
}
int GSUtil::GetVertexCount(uint32 prim)
{
return s_maps.VertexCountField[prim];
}
const uint32* GSUtil::HasSharedBitsPtr(uint32 dpsm)
{
return s_maps.SharedBitsField[dpsm];
}
bool GSUtil::HasSharedBits(uint32 spsm, const uint32* RESTRICT ptr)
{
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
}
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
{
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
@ -321,7 +346,7 @@ static bool DXUTDelayLoadDXGI()
bool GSUtil::CheckDirect3D11Level(D3D_FEATURE_LEVEL& level)
{
HRESULT hr;
HRESULT hr = S_OK;
level = (D3D_FEATURE_LEVEL)0;

View File

@ -29,7 +29,10 @@ public:
static const char* GetLibName();
static GS_PRIM_CLASS GetPrimClass(uint32 prim);
static int GetVertexCount(uint32 prim);
static const uint32* HasSharedBitsPtr(uint32 dpsm);
static bool HasSharedBits(uint32 spsm, const uint32* ptr);
static bool HasSharedBits(uint32 spsm, uint32 dpsm);
static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm);
static bool HasCompatibleBits(uint32 spsm, uint32 dpsm);

View File

@ -1900,7 +1900,7 @@ public:
d = f.uph64(d);
}
__forceinline static bool compare16(const void* dst, const void* src, int size)
__forceinline static bool compare16(const void* dst, const void* src, size_t size)
{
ASSERT((size & 15) == 0);
@ -1909,7 +1909,7 @@ public:
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i++)
for(size_t i = 0; i < size; i++)
{
if(!d[i].eq(s[i]))
{
@ -1920,7 +1920,7 @@ public:
return true;
}
__forceinline static bool compare64(const void* dst, const void* src, int size)
__forceinline static bool compare64(const void* dst, const void* src, size_t size)
{
ASSERT((size & 63) == 0);
@ -1929,7 +1929,7 @@ public:
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i += 4)
for(size_t i = 0; i < size; i += 4)
{
GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]);
GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]);
@ -1948,7 +1948,7 @@ public:
return true;
}
__forceinline static bool update(const void* dst, const void* src, int size)
__forceinline static bool update(const void* dst, const void* src, size_t size)
{
ASSERT((size & 15) == 0);
@ -1959,7 +1959,7 @@ public:
GSVector4i v = GSVector4i::xffffffff();
for(int i = 0; i < size; i++)
for(size_t i = 0; i < size; i++)
{
v &= d[i] == s[i];

View File

@ -29,16 +29,21 @@
#pragma pack(push, 1)
__aligned(struct, 32) GSVertex
{
union
{
struct
{
GIFRegST ST;
GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ;
GIFRegFOG FOG;
GIFRegUV UV;
uint32 UV, FOG;
};
GSVertex() {memset(this, 0, sizeof(*this));}
__m128i m[2];
};
GSVector4 GetUV() const {return GSVector4(GSVector4i::load(UV.u32[0]).upl16());}
void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];}
};
struct GSVertexP
@ -58,9 +63,4 @@ struct GSVertexPT2
GSVector2 t[2];
};
struct GSVertexNull
{
GSVector4 p;
};
#pragma pack(pop)

View File

@ -35,16 +35,6 @@ __aligned(struct, 32) GSVertexHW9
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
float& _q() {return p.w;}
uint8& _r() {return t.u8[8];}
uint8& _g() {return t.u8[9];}
uint8& _b() {return t.u8[10];}
uint8& _a() {return t.u8[11];}
uint32& _c0() {return t.u32[2];}
uint32& _c1() {return t.u32[3];}
};
__aligned(union, 32) GSVertexHW11
@ -86,16 +76,6 @@ __aligned(union, 32) GSVertexHW11
return *this;
}
float& _q() {return q;}
uint8& _r() {return r;}
uint8& _g() {return g;}
uint8& _b() {return b;}
uint8& _a() {return a;}
uint32& _c0() {return c0;}
uint32& _c1() {return c1;}
};
#pragma pack(pop)

View File

@ -23,9 +23,9 @@
#include "GSVector.h"
__aligned(struct, 16) GSVertexSW
__aligned(struct, 32) GSVertexSW
{
GSVector4 p, t, c;
GSVector4 p, t, c, _pad;
__forceinline GSVertexSW() {}
__forceinline GSVertexSW(const GSVertexSW& v) {*this = v;}

View File

@ -28,30 +28,17 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state)
, m_map_sw("VertexTraceSW", NULL)
, m_map_hw9("VertexTraceHW9", NULL)
, m_map_hw11("VertexTraceHW11", NULL)
{
}
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_primclass = primclass;
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4);
m_alpha.valid = false;
if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC))
if(m_state->PRIM->TME)
{
hash |= 1 << 5;
}
return hash;
}
void GSVertexTrace::UpdateLOD()
{
if(!m_state->PRIM->TME) return;
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
m_filter.mmag = TEX1.IsMagLinear();
@ -93,21 +80,44 @@ void GSVertexTrace::UpdateLOD()
m_filter.linear = m_filter.mmag | m_filter.mmin;
}
}
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass)
{
m_map_sw[Hash(primclass)](count, v, m_min, m_max);
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
UpdateLOD();
}
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass)
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
{
m_map_hw9[Hash(primclass)](count, v, m_min, m_max);
m_primclass = primclass;
uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4);
if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC))
{
hash |= 1 << 5;
}
return hash;
}
GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceSW", NULL)
{
}
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
GSVertexTrace::Update(vertex, index, count, primclass);
}
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW9", NULL)
{
}
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context;
@ -132,16 +142,18 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl
m_max.t *= s;
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
UpdateLOD();
GSVertexTrace::Update(vertex, index, count, primclass);
}
void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass)
GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW11", NULL)
{
m_map_hw11[Hash(primclass)](count, v, m_min, m_max);
}
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context;
@ -166,10 +178,6 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
m_max.t *= s;
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
UpdateLOD();
GSVertexTrace::Update(vertex, index, count, primclass);
}

View File

@ -29,42 +29,18 @@
class GSState;
__aligned(class, 32) GSVertexTrace
__aligned(class, 32) GSVertexTrace : public GSAlignedClass<32>
{
public:
struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000
struct VertexAlpha {int min, max; bool valid;};
private:
typedef void (*VertexTracePtr)(int count, const void* v, Vertex& min, Vertex& max);
class CGSW : public GSCodeGenerator
{
public:
CGSW(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW9 : public GSCodeGenerator
{
public:
CGHW9(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW11 : public GSCodeGenerator
{
public:
CGHW11(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr> m_map_sw;
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
protected:
const GSState* m_state;
uint32 Hash(GS_PRIM_CLASS primclass);
void UpdateLOD();
typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
static const GSVector4 s_minmax;
@ -73,10 +49,7 @@ public:
Vertex m_min;
Vertex m_max;
// source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
VertexAlpha m_alpha;
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
union
{
@ -92,12 +65,59 @@ public:
GSVector2 m_lod; // x = min, y = max
public:
GSVertexTrace(const GSState* state);
virtual ~GSVertexTrace() {}
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexNull* v, int count, GS_PRIM_CLASS primclass) {}
virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
bool IsLinear() const {return m_filter.linear;}
};
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceSW(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX9(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX11(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};

View File

@ -26,13 +26,14 @@
using namespace Xbyak;
static const int _args = 0;
static const int _args = 4;
static const int _count = _args + 4; // rcx
static const int _v = _args + 8; // rdx
static const int _min = _args + 12; // r8
static const int _max = _args + 16; // r9
static const int _vertex = _args + 8; // rdx
static const int _index = _args + 12; // r8
static const int _min = _args + 16; // r9
static const int _max = _args + 20; // _args + 4
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -83,7 +86,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -92,18 +96,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
vminps(xmm2, xmm0);
vmaxps(xmm3, xmm0);
@ -112,7 +122,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
@ -122,7 +132,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst)
{
@ -140,7 +150,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
}
}
add(edx, n * sizeof(GSVertexSW));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -170,10 +180,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -189,17 +201,17 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
case GS_POINT_CLASS:
n = 1;
break;
case GS_SPRITE_CLASS:
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_SPRITE_CLASS:
n = 6;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -226,7 +238,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -235,16 +248,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
@ -256,7 +275,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
@ -287,7 +306,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
}
}
add(edx, n * sizeof(GSVertexHW9));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -330,10 +349,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -358,6 +379,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -384,7 +407,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -393,9 +417,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
vmovaps(xmm0, ptr[edx + eax]);
}
if(color && (iip || j == n - 1))
@ -424,7 +451,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmaxps(xmm7, xmm0);
}
vmovdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
vmovdqa(xmm0, ptr[edx + eax + 16]);
vpmovzxwd(xmm1, xmm0);
vpsrld(xmm0, 1);
@ -435,7 +462,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmaxps(xmm5, xmm1);
}
add(edx, n * sizeof(GSVertexHW11));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -478,6 +505,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}

View File

@ -26,13 +26,14 @@
using namespace Xbyak;
static const int _args = 0;
static const int _args = 4;
static const int _count = _args + 4; // rcx
static const int _v = _args + 8; // rdx
static const int _min = _args + 12; // r8
static const int _max = _args + 16; // r9
static const int _vertex = _args + 8; // rdx
static const int _index = _args + 12; // r8
static const int _min = _args + 16; // r9
static const int _max = _args + 20; // _args + 4
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -86,7 +89,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -95,18 +99,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
@ -115,7 +125,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
@ -125,7 +135,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst)
{
@ -144,7 +154,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
}
}
add(edx, n * sizeof(GSVertexSW));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -174,10 +184,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -204,6 +216,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -233,7 +247,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -242,16 +257,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
@ -264,7 +285,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
@ -295,7 +316,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
}
}
add(edx, n * sizeof(GSVertexHW9));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -351,10 +372,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -379,6 +402,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -408,7 +433,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -417,9 +443,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
movaps(xmm0, ptr[edx + eax]);
}
if(color && (iip || j == n - 1))
@ -448,7 +477,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
maxps(xmm7, xmm0);
}
movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
movdqa(xmm0, ptr[edx + eax + 16]);
if(m_cpu.has(util::Cpu::tSSE41))
{
@ -469,7 +498,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
maxps(xmm5, xmm1);
}
add(edx, n * sizeof(GSVertexHW11));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -525,6 +554,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}

Some files were not shown because too many files have changed in this diff Show More