gsdx-ogl: linux only (merge from trunk 5022:5068)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5069 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2012-01-15 14:51:49 +00:00
commit 0ad5982364
124 changed files with 42486 additions and 36408 deletions

View File

@ -43,7 +43,7 @@ SUBGROUPING = YES
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
# Build related configuration options # Build related configuration options
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
EXTRACT_ALL = YES EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES EXTRACT_LOCAL_CLASSES = YES
@ -60,14 +60,14 @@ INLINE_INFO = YES
SORT_MEMBER_DOCS = YES SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES GENERATE_TODOLIST = NO
GENERATE_TESTLIST = YES GENERATE_TESTLIST = NO
GENERATE_BUGLIST = YES GENERATE_BUGLIST = NO
GENERATE_DEPRECATEDLIST= YES GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS = ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30 MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES SHOW_USED_FILES = YES
SHOW_DIRECTORIES = NO SHOW_DIRECTORIES = YES
FILE_VERSION_FILTER = FILE_VERSION_FILTER =
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
# configuration options related to warning and progress messages # configuration options related to warning and progress messages
@ -83,15 +83,14 @@ WARN_LOGFILE =
# configuration options related to the input files # configuration options related to the input files
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
INPUT = doc/src \ INPUT = doc/src \
src \
include \ include \
test examples
FILE_PATTERNS = *.h \ FILE_PATTERNS = *.h \
*.c \ *.c \
*.cpp \ *.cpp \
*.dox *.dox
RECURSIVE = YES RECURSIVE = YES
EXCLUDE = EXCLUDE = src/hostapi/wasapi/mingw-include
EXCLUDE_SYMLINKS = NO EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = EXCLUDE_PATTERNS =
EXAMPLE_PATH = EXAMPLE_PATH =
@ -104,7 +103,7 @@ FILTER_SOURCE_FILES = NO
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
# configuration options related to source browsing # configuration options related to source browsing
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
SOURCE_BROWSER = NO SOURCE_BROWSER = YES
INLINE_SOURCES = NO INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES REFERENCED_BY_RELATION = YES

241
3rdparty/portaudio/Doxyfile.developer vendored Normal file
View File

@ -0,0 +1,241 @@
# Doxyfile 1.4.6
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = PortAudio
PROJECT_NUMBER = 2.0
OUTPUT_DIRECTORY = ./doc/
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = NO
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF = "The $name class" \
"The $name widget" \
"The $name file" \
is \
provides \
specifies \
contains \
represents \
a \
an \
the
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 8
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
BUILTIN_STL_SUPPORT = NO
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = YES
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS = INTERNAL
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_DIRECTORIES = YES
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = doc/src \
include \
examples \
src \
test \
qa
FILE_PATTERNS = *.h \
*.c \
*.cpp \
*.dox
RECURSIVE = YES
EXCLUDE = src/hostapi/wasapi/mingw-include
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH = doc/src/images
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = NO
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = NO
HIDE_UNDOC_RELATIONS = NO
HAVE_DOT = NO
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = YES
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
DOT_PATH =
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 1000
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

View File

@ -18,7 +18,7 @@ libdir = @libdir@
includedir = @includedir@ includedir = @includedir@
CC = @CC@ CC = @CC@
CXX = @CXX@ CXX = @CXX@
CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src/common -I$(top_srcdir)/src/os/unix @CFLAGS@ @DEFS@ CFLAGS = @CFLAGS@ @DEFS@
LIBS = @LIBS@ LIBS = @LIBS@
AR = @AR@ AR = @AR@
RANLIB = @RANLIB@ RANLIB = @RANLIB@
@ -67,9 +67,23 @@ LOOPBACK_OBJS = \
qa/loopback/src/write_wav.o \ qa/loopback/src/write_wav.o \
qa/loopback/src/paqa.o qa/loopback/src/paqa.o
TESTS = \ EXAMPLES = \
bin/pa_devs \
bin/pa_fuzz \
bin/paex_pink \
bin/paex_read_write_wire \
bin/paex_record \
bin/paex_saw \
bin/paex_sine \
bin/paex_write_sine \
bin/paex_write_sine_nonint
SELFTESTS = \
bin/paqa_devs \ bin/paqa_devs \
bin/paqa_errs \ bin/paqa_errs \
bin/paqa_latency
TESTS = \
bin/patest1 \ bin/patest1 \
bin/patest_buffer \ bin/patest_buffer \
bin/patest_callbackstop \ bin/patest_callbackstop \
@ -85,15 +99,9 @@ TESTS = \
bin/patest_mono \ bin/patest_mono \
bin/patest_multi_sine \ bin/patest_multi_sine \
bin/patest_out_underflow \ bin/patest_out_underflow \
bin/patest_pink \
bin/patest_prime \ bin/patest_prime \
bin/patest_read_record \
bin/patest_read_write_wire \
bin/patest_record \
bin/patest_ringmix \ bin/patest_ringmix \
bin/patest_saw \
bin/patest_sine8 \ bin/patest_sine8 \
bin/patest_sine \
bin/patest_sine_channelmaps \ bin/patest_sine_channelmaps \
bin/patest_sine_formats \ bin/patest_sine_formats \
bin/patest_sine_time \ bin/patest_sine_time \
@ -105,10 +113,6 @@ TESTS = \
bin/patest_two_rates \ bin/patest_two_rates \
bin/patest_underflow \ bin/patest_underflow \
bin/patest_wire \ bin/patest_wire \
bin/patest_write_sine \
bin/patest_write_sine_nonint \
bin/pa_devs \
bin/pa_fuzz \
bin/pa_minlat bin/pa_minlat
# Most of these don't compile yet. Put them in TESTS, above, if # Most of these don't compile yet. Put them in TESTS, above, if
@ -151,10 +155,14 @@ SRC_DIRS = \
SUBDIRS = SUBDIRS =
@ENABLE_CXX_TRUE@SUBDIRS += bindings/cpp @ENABLE_CXX_TRUE@SUBDIRS += bindings/cpp
all: lib/$(PALIB) all-recursive tests all: lib/$(PALIB) all-recursive tests examples selftests
tests: bin-stamp $(TESTS) tests: bin-stamp $(TESTS)
examples: bin-stamp $(EXAMPLES)
selftests: bin-stamp $(SELFTESTS)
loopback: bin-stamp bin/paloopback loopback: bin-stamp bin/paloopback
# With ASIO enabled we must link libportaudio and all test programs with CXX # With ASIO enabled we must link libportaudio and all test programs with CXX
@ -166,6 +174,14 @@ $(ALL_TESTS): bin/%: lib/$(PALIB) $(MAKEFILE) $(PAINC) test/%.c
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/test/$*.c lib/$(PALIB) $(LIBS) @WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/test/$*.c lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/test/$*.c lib/$(PALIB) $(LIBS) @WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/test/$*.c lib/$(PALIB) $(LIBS)
$(EXAMPLES): bin/%: lib/$(PALIB) $(MAKEFILE) $(PAINC) examples/%.c
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/examples/$*.c lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/examples/$*.c lib/$(PALIB) $(LIBS)
$(SELFTESTS): bin/%: lib/$(PALIB) $(MAKEFILE) $(PAINC) qa/%.c
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(top_srcdir)/qa/$*.c lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(top_srcdir)/qa/$*.c lib/$(PALIB) $(LIBS)
bin/paloopback: lib/$(PALIB) $(MAKEFILE) $(PAINC) $(LOOPBACK_OBJS) bin/paloopback: lib/$(PALIB) $(MAKEFILE) $(PAINC) $(LOOPBACK_OBJS)
@WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(LOOPBACK_OBJS) lib/$(PALIB) $(LIBS) @WITH_ASIO_FALSE@ $(LIBTOOL) --mode=link $(CC) -o $@ $(CFLAGS) $(LOOPBACK_OBJS) lib/$(PALIB) $(LIBS)
@WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(LOOPBACK_OBJS) lib/$(PALIB) $(LIBS) @WITH_ASIO_TRUE@ $(LIBTOOL) --mode=link --tag=CXX $(CXX) -o $@ $(CXXFLAGS) $(LOOPBACK_OBJS) lib/$(PALIB) $(LIBS)

File diff suppressed because it is too large Load Diff

View File

@ -1,78 +0,0 @@
# Project: portaudio-dll
# Makefile created by Dev-C++ 4.9.8.2
CPP = g++.exe
CC = gcc.exe
WINDRES = windres.exe
RES =
OBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LINKOBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LIBS = -L"C:/Dev-CPP/lib" -fmessage-length=0 --no-export-all-symbols --add-stdcall-alias ../../../asiosdk2/asiosdk2.a -lkernel32 -luser32 -lgdi32 -lwinspool -lcomdlg32 -ladvapi32 -lshell32 -lole32 -loleaut32 -luuid -lodbc32 -lodbccp32 -lwinmm -O3 -s
INCS = -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
CXXINCS = -I"C:/Dev-CPP/include/c++" -I"C:/Dev-CPP/include/c++/mingw32" -I"C:/Dev-CPP/include/c++/backward" -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
BIN = portaudio-dll.dll
CXXFLAGS = $(CXXINCS)-O3 -fmessage-length=0 -Wall
CFLAGS = $(INCS)-DBUILDING_DLL=1 -O3 -fmessage-length=0 -Wall
.PHONY: all all-before all-after clean clean-custom
all: all-before portaudio-dll.dll all-after
clean: clean-custom
rm -f $(OBJ) $(BIN)
DLLWRAP=dllwrap.exe
DEFFILE=libportaudio-dll.def
STATICLIB=libportaudio-dll.a
$(BIN): $(LINKOBJ)
$(DLLWRAP) --output-def $(DEFFILE) --driver-name c++ --implib $(STATICLIB) $(LINKOBJ) $(LIBS) -o $(BIN)
./pa_hostapi_skeleton.o: ../../hostapi/skeleton/pa_hostapi_skeleton.c
$(CPP) -c ../../hostapi/skeleton/pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CXXFLAGS)
./pa_stream.o: ../../common/pa_stream.c
$(CPP) -c ../../common/pa_stream.c -o ./pa_stream.o $(CXXFLAGS)
./pa_trace.o: ../../common/pa_trace.c
$(CPP) -c ../../common/pa_trace.c -o ./pa_trace.o $(CXXFLAGS)
./pa_allocation.o: ../../common/pa_allocation.c
$(CPP) -c ../../common/pa_allocation.c -o ./pa_allocation.o $(CXXFLAGS)
./pa_converters.o: ../../common/pa_converters.c
$(CPP) -c ../../common/pa_converters.c -o ./pa_converters.o $(CXXFLAGS)
./pa_cpuload.o: ../../common/pa_cpuload.c
$(CPP) -c ../../common/pa_cpuload.c -o ./pa_cpuload.o $(CXXFLAGS)
./pa_dither.o: ../../common/pa_dither.c
$(CPP) -c ../../common/pa_dither.c -o ./pa_dither.o $(CXXFLAGS)
./pa_front.o: ../../common/pa_front.c
$(CPP) -c ../../common/pa_front.c -o ./pa_front.o $(CXXFLAGS)
./pa_process.o: ../../common/pa_process.c
$(CPP) -c ../../common/pa_process.c -o ./pa_process.o $(CXXFLAGS)
./pa_asio.o: ../../pa_asio/pa_asio.cpp
$(CPP) -c ../../pa_asio/pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
./pa_win_util.o: ../pa_win_util.c
$(CPP) -c ../pa_win_util.c -o ./pa_win_util.o $(CXXFLAGS)
./pa_win_hostapis.o: ../pa_win_hostapis.c
$(CPP) -c ../pa_win_hostapis.c -o ./pa_win_hostapis.o $(CXXFLAGS)
./pa_win_ds.o: ../../pa_win_ds/pa_win_ds.c
$(CPP) -c ../../pa_win_ds/pa_win_ds.c -o ./pa_win_ds.o $(CXXFLAGS)
./dsound_wrapper.o: ../../pa_win_ds/dsound_wrapper.c
$(CPP) -c ../../pa_win_ds/dsound_wrapper.c -o ./dsound_wrapper.o $(CXXFLAGS)
./pa_win_wmme.o: ../../pa_win_wmme/pa_win_wmme.c
$(CPP) -c ../../pa_win_wmme/pa_win_wmme.c -o ./pa_win_wmme.o $(CXXFLAGS)
./iasiothiscallresolver.o: ../../pa_asio/iasiothiscallresolver.cpp
$(CPP) -c ../../pa_asio/iasiothiscallresolver.cpp -o ./iasiothiscallresolver.o $(CXXFLAGS)

View File

@ -1,75 +0,0 @@
# Project: portaudio-static
# Makefile created by Dev-C++ 4.9.8.2
CPP = g++.exe
CC = gcc.exe
WINDRES = windres.exe
RES =
OBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LINKOBJ = ./pa_hostapi_skeleton.o ./pa_stream.o ./pa_trace.o ./pa_allocation.o ./pa_converters.o ./pa_cpuload.o ./pa_dither.o ./pa_front.o ./pa_process.o ./pa_asio.o ./pa_win_util.o ./pa_win_hostapis.o ./pa_win_ds.o ./dsound_wrapper.o ./pa_win_wmme.o ./iasiothiscallresolver.o $(RES)
LIBS = -L"C:/Dev-CPP/lib" -fmessage-length=0 -O3 -s
INCS = -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
CXXINCS = -I"C:/Dev-CPP/include/c++" -I"C:/Dev-CPP/include/c++/mingw32" -I"C:/Dev-CPP/include/c++/backward" -I"C:/Dev-CPP/include" -I"../../../asiosdk2" -I"../../../asiosdk2/common" -I"../../../asiosdk2/host" -I"../../../asiosdk2/host/pc" -I"../../common"
BIN = portaudio-static.a
CXXFLAGS = $(CXXINCS)-O3 -fmessage-length=0 -Wall
CFLAGS = $(INCS)-O3 -fmessage-length=0 -Wall
.PHONY: all all-before all-after clean clean-custom
all: all-before portaudio-static.a all-after
clean: clean-custom
rm -f $(OBJ) $(BIN)
$(BIN): $(LINKOBJ)
ar r $(BIN) $(LINKOBJ)
ranlib $(BIN)
./pa_hostapi_skeleton.o: ../../hostapi/skeleton/pa_hostapi_skeleton.c
$(CPP) -c ../../hostapi/skeleton/pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CXXFLAGS)
./pa_stream.o: ../../common/pa_stream.c
$(CPP) -c ../../common/pa_stream.c -o ./pa_stream.o $(CXXFLAGS)
./pa_trace.o: ../../common/pa_trace.c
$(CPP) -c ../../common/pa_trace.c -o ./pa_trace.o $(CXXFLAGS)
./pa_allocation.o: ../../common/pa_allocation.c
$(CPP) -c ../../common/pa_allocation.c -o ./pa_allocation.o $(CXXFLAGS)
./pa_converters.o: ../../common/pa_converters.c
$(CPP) -c ../../common/pa_converters.c -o ./pa_converters.o $(CXXFLAGS)
./pa_cpuload.o: ../../common/pa_cpuload.c
$(CPP) -c ../../common/pa_cpuload.c -o ./pa_cpuload.o $(CXXFLAGS)
./pa_dither.o: ../../common/pa_dither.c
$(CPP) -c ../../common/pa_dither.c -o ./pa_dither.o $(CXXFLAGS)
./pa_front.o: ../../common/pa_front.c
$(CPP) -c ../../common/pa_front.c -o ./pa_front.o $(CXXFLAGS)
./pa_process.o: ../../common/pa_process.c
$(CPP) -c ../../common/pa_process.c -o ./pa_process.o $(CXXFLAGS)
./pa_asio.o: ../../pa_asio/pa_asio.cpp
$(CPP) -c ../../pa_asio/pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
./pa_win_util.o: ../pa_win_util.c
$(CPP) -c ../pa_win_util.c -o ./pa_win_util.o $(CXXFLAGS)
./pa_win_hostapis.o: ../pa_win_hostapis.c
$(CPP) -c ../pa_win_hostapis.c -o ./pa_win_hostapis.o $(CXXFLAGS)
./pa_win_ds.o: ../../pa_win_ds/pa_win_ds.c
$(CPP) -c ../../pa_win_ds/pa_win_ds.c -o ./pa_win_ds.o $(CXXFLAGS)
./dsound_wrapper.o: ../../pa_win_ds/dsound_wrapper.c
$(CPP) -c ../../pa_win_ds/dsound_wrapper.c -o ./dsound_wrapper.o $(CXXFLAGS)
./pa_win_wmme.o: ../../pa_win_wmme/pa_win_wmme.c
$(CPP) -c ../../pa_win_wmme/pa_win_wmme.c -o ./pa_win_wmme.o $(CXXFLAGS)
./iasiothiscallresolver.o: ../../pa_asio/iasiothiscallresolver.cpp
$(CPP) -c ../../pa_asio/iasiothiscallresolver.cpp -o ./iasiothiscallresolver.o $(CXXFLAGS)

View File

@ -1,209 +0,0 @@
[Project]
FileName=portaudio-dll.dev
Name=portaudio-dll
UnitCount=16
Type=3
Ver=1
ObjFiles=
Includes=..\..\..\asiosdk2;..\..\..\asiosdk2\common;..\..\..\asiosdk2\host;..\..\..\asiosdk2\host\pc;..\..\common
Libs=
PrivateResource=
ResourceIncludes=
MakeIncludes=
Compiler=-DBUILDING_DLL=1_@@_-O3_@@_
CppCompiler=-O3_@@_
Linker=--no-export-all-symbols --add-stdcall-alias_@@_../../../asiosdk2/asiosdk2.a_@@_-lkernel32 -luser32 -lgdi32 -lwinspool -lcomdlg32 -ladvapi32 -lshell32 -lole32 -loleaut32 -luuid -lodbc32 -lodbccp32 -lwinmm_@@_-O3 -s_@@_
IsCpp=1
Icon=
ExeOutput=.
ObjectOutput=.
OverrideOutput=0
OverrideOutputName=portaudio.a
HostApplication=
Folders=
CommandLine=
IncludeVersionInfo=0
SupportXPThemes=0
CompilerSet=0
CompilerSettings=0000000000000000000
UseCustomMakefile=0
CustomMakefile=
[Unit1]
FileName=..\..\hostapi\skeleton\pa_hostapi_skeleton.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CFLAGS)
[Unit2]
FileName=..\..\common\pa_stream.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_stream.c -o ./pa_stream.o $(CFLAGS)
[Unit3]
FileName=..\..\common\pa_trace.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_trace.c -o ./pa_trace.o $(CFLAGS)
[Unit4]
FileName=..\..\common\pa_allocation.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_allocation.c -o ./pa_allocation.o $(CFLAGS)
[Unit5]
FileName=..\..\common\pa_converters.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_converters.c -o ./pa_converters.o $(CFLAGS)
[Unit6]
FileName=..\..\common\pa_cpuload.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_cpuload.c -o ./pa_cpuload.o $(CFLAGS)
[Unit7]
FileName=..\..\common\pa_dither.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_dither.c -o ./pa_dither.o $(CFLAGS)
[Unit8]
FileName=..\..\common\pa_front.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_front.c -o ./pa_front.o $(CFLAGS)
[Unit9]
FileName=..\..\common\pa_process.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_process.c -o ./pa_process.o $(CFLAGS)
[VersionInfo]
Major=0
Minor=1
Release=1
Build=1
LanguageID=1033
CharsetID=1252
CompanyName=
FileVersion=
FileDescription=Developed using the Dev-C++ IDE
InternalName=
LegalCopyright=
LegalTrademarks=
OriginalFilename=
ProductName=
ProductVersion=
AutoIncBuildNr=0
[Unit10]
FileName=..\..\pa_asio\pa_asio.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CPP) -c pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
[Unit11]
FileName=..\pa_win_util.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_util.c -o ./pa_win_util.o $(CFLAGS)
[Unit12]
FileName=..\pa_win_hostapis.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_hostapis.c -o ./pa_win_hostapis.o $(CFLAGS)
[Unit13]
FileName=..\..\pa_win_ds\pa_win_ds.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_ds.c -o ./pa_win_ds.o $(CFLAGS)
[Unit14]
FileName=..\..\pa_win_ds\dsound_wrapper.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c dsound_wrapper.c -o ./dsound_wrapper.o $(CFLAGS)
[Unit15]
FileName=..\..\pa_win_wmme\pa_win_wmme.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_wmme.c -o ./pa_win_wmme.o $(CFLAGS)
[Unit16]
FileName=..\..\pa_asio\iasiothiscallresolver.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=

View File

@ -1,209 +0,0 @@
[Project]
FileName=portaudio-static.dev
Name=portaudio-static
UnitCount=16
Type=2
Ver=1
ObjFiles=
Includes=..\..\..\asiosdk2;..\..\..\asiosdk2\common;..\..\..\asiosdk2\host;..\..\..\asiosdk2\host\pc;..\..\common
Libs=
PrivateResource=
ResourceIncludes=
MakeIncludes=
Compiler=-O3_@@_
CppCompiler=-O3_@@_
Linker=-O3 -s_@@_
IsCpp=1
Icon=
ExeOutput=.
ObjectOutput=.
OverrideOutput=0
OverrideOutputName=portaudio.a
HostApplication=
Folders=
CommandLine=
IncludeVersionInfo=0
SupportXPThemes=0
CompilerSet=0
CompilerSettings=0000000000000000000
UseCustomMakefile=0
CustomMakefile=
[Unit1]
FileName=..\..\hostapi\skeleton\pa_hostapi_skeleton.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_hostapi_skeleton.c -o ./pa_hostapi_skeleton.o $(CFLAGS)
[Unit2]
FileName=..\..\common\pa_stream.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_stream.c -o ./pa_stream.o $(CFLAGS)
[Unit3]
FileName=..\..\common\pa_trace.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_trace.c -o ./pa_trace.o $(CFLAGS)
[Unit4]
FileName=..\..\common\pa_allocation.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_allocation.c -o ./pa_allocation.o $(CFLAGS)
[Unit5]
FileName=..\..\common\pa_converters.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_converters.c -o ./pa_converters.o $(CFLAGS)
[Unit6]
FileName=..\..\common\pa_cpuload.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_cpuload.c -o ./pa_cpuload.o $(CFLAGS)
[Unit7]
FileName=..\..\common\pa_dither.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_dither.c -o ./pa_dither.o $(CFLAGS)
[Unit8]
FileName=..\..\common\pa_front.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_front.c -o ./pa_front.o $(CFLAGS)
[Unit9]
FileName=..\..\common\pa_process.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_process.c -o ./pa_process.o $(CFLAGS)
[VersionInfo]
Major=0
Minor=1
Release=1
Build=1
LanguageID=1033
CharsetID=1252
CompanyName=
FileVersion=
FileDescription=Developed using the Dev-C++ IDE
InternalName=
LegalCopyright=
LegalTrademarks=
OriginalFilename=
ProductName=
ProductVersion=
AutoIncBuildNr=0
[Unit10]
FileName=..\..\pa_asio\pa_asio.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CPP) -c pa_asio.cpp -o ./pa_asio.o $(CXXFLAGS)
[Unit11]
FileName=..\..\pa_win\pa_win_util.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_util.c -o ./pa_win_util.o $(CFLAGS)
[Unit12]
FileName=..\..\pa_win\pa_win_hostapis.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_hostapis.c -o ./pa_win_hostapis.o $(CFLAGS)
[Unit13]
FileName=..\..\pa_win_ds\pa_win_ds.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_ds.c -o ./pa_win_ds.o $(CFLAGS)
[Unit14]
FileName=..\..\pa_win_ds\dsound_wrapper.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c dsound_wrapper.c -o ./dsound_wrapper.o $(CFLAGS)
[Unit15]
FileName=..\..\pa_win_wmme\pa_win_wmme.c
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=$(CC) -c pa_win_wmme.c -o ./pa_win_wmme.o $(CFLAGS)
[Unit16]
FileName=..\..\pa_asio\iasiothiscallresolver.cpp
CompileCpp=1
Folder=portaudio
Compile=1
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=

View File

@ -1,23 +0,0 @@
From: "Peter L Jones"
Sent: Wednesday, September 17, 2003 5:18 AM
Subject: Dev-C++ project files
I attach two project files intended for portaudio/pa_win/dev-cpp (i.e. in
parallel with the msvc directory), if you want them. One is for a static
library build and one for a DLL. I've used the static library (in building
a single monolithic DLL) but I can't guarantee the DLL version will build a
working library (I think it's mostly there, though!).
I also attach the resulting makefiles, which may be of use to other MinGW
users.
They're rooted in the directory given above and drop their object and
library files in the same place. They assume the asiosdk2 files are in the
same directory as portaudio/ in a sub-directory called asiosdk2/. Oh! The
DLL is built against a static asiosdk2.a library... maybe not the best way
to do it... I ought to figure out how to link against a "home made" dll in
Dev-C++, I guess ;-)
Cheers,
-- Peter

View File

@ -1,10 +1,10 @@
#! /bin/sh #! /bin/sh
# Attempt to guess a canonical system name. # Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# Free Software Foundation, Inc. # 2011 Free Software Foundation, Inc.
timestamp='2009-12-30' timestamp='2011-05-11'
# This file is free software; you can redistribute it and/or modify it # This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by # under the terms of the GNU General Public License as published by
@ -57,7 +57,7 @@ GNU config.guess ($timestamp)
Originally written by Per Bothner. Originally written by Per Bothner.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
Software Foundation, Inc. Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO This is free software; see the source for copying conditions. There is NO
@ -270,7 +270,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# A Xn.n version is an unreleased experimental baselevel. # A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r. # 1.2 uses "1.2" for uname -r.
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
exit ;; # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
exit $exitcode ;;
Alpha\ *:Windows_NT*:*) Alpha\ *:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem? # How do we know it's Interix rather than the generic POSIX subsystem?
# Should we change UNAME_MACHINE based on the output of uname instead # Should we change UNAME_MACHINE based on the output of uname instead
@ -552,7 +555,7 @@ EOF
echo rs6000-ibm-aix3.2 echo rs6000-ibm-aix3.2
fi fi
exit ;; exit ;;
*:AIX:*:[456]) *:AIX:*:[4567])
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000 IBM_ARCH=rs6000
@ -879,7 +882,13 @@ EOF
then then
echo ${UNAME_MACHINE}-unknown-linux-gnu echo ${UNAME_MACHINE}-unknown-linux-gnu
else else
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_PCS_VFP
then
echo ${UNAME_MACHINE}-unknown-linux-gnueabi echo ${UNAME_MACHINE}-unknown-linux-gnueabi
else
echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
fi
fi fi
exit ;; exit ;;
avr32*:Linux:*:*) avr32*:Linux:*:*)
@ -968,6 +977,9 @@ EOF
sparc:Linux:*:* | sparc64:Linux:*:*) sparc:Linux:*:* | sparc64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;; exit ;;
tile*:Linux:*:*)
echo ${UNAME_MACHINE}-tilera-linux-gnu
exit ;;
vax:Linux:*:*) vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-gnu echo ${UNAME_MACHINE}-dec-linux-gnu
exit ;; exit ;;
@ -1231,6 +1243,9 @@ EOF
*:QNX:*:4*) *:QNX:*:4*)
echo i386-pc-qnx echo i386-pc-qnx
exit ;; exit ;;
NEO-?:NONSTOP_KERNEL:*:*)
echo neo-tandem-nsk${UNAME_RELEASE}
exit ;;
NSE-?:NONSTOP_KERNEL:*:*) NSE-?:NONSTOP_KERNEL:*:*)
echo nse-tandem-nsk${UNAME_RELEASE} echo nse-tandem-nsk${UNAME_RELEASE}
exit ;; exit ;;

View File

@ -1,10 +1,10 @@
#! /bin/sh #! /bin/sh
# Configuration validation subroutine script. # Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# Free Software Foundation, Inc. # 2011 Free Software Foundation, Inc.
timestamp='2010-01-22' timestamp='2011-03-23'
# This file is (in principle) common to ALL GNU software. # This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software # The presence of a machine in this file suggests that SOME GNU software
@ -76,7 +76,7 @@ version="\
GNU config.sub ($timestamp) GNU config.sub ($timestamp)
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
Software Foundation, Inc. Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO This is free software; see the source for copying conditions. There is NO
@ -124,8 +124,9 @@ esac
# Here we must recognize all the valid KERNEL-OS combinations. # Here we must recognize all the valid KERNEL-OS combinations.
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in case $maybe_os in
nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
knetbsd*-gnu* | netbsd*-gnu* | \
kopensolaris*-gnu* | \ kopensolaris*-gnu* | \
storm-chaos* | os2-emx* | rtmk-nova*) storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os os=-$maybe_os
@ -282,11 +283,13 @@ case $basic_machine in
| moxie \ | moxie \
| mt \ | mt \
| msp430 \ | msp430 \
| nds32 | nds32le | nds32be \
| nios | nios2 \ | nios | nios2 \
| ns16k | ns32k \ | ns16k | ns32k \
| open8 \
| or32 \ | or32 \
| pdp10 | pdp11 | pj | pjl \ | pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ | powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \ | pyramid \
| rx \ | rx \
| score \ | score \
@ -294,15 +297,24 @@ case $basic_machine in
| sh64 | sh64le \ | sh64 | sh64le \
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
| spu | strongarm \ | spu \
| tahoe | thumb | tic4x | tic80 | tron \ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
| ubicom32 \ | ubicom32 \
| v850 | v850e \ | v850 | v850e \
| we32k \ | we32k \
| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ | x86 | xc16x | xstormy16 | xtensa \
| z8k | z80) | z8k | z80)
basic_machine=$basic_machine-unknown basic_machine=$basic_machine-unknown
;; ;;
c54x)
basic_machine=tic54x-unknown
;;
c55x)
basic_machine=tic55x-unknown
;;
c6x)
basic_machine=tic6x-unknown
;;
m6811 | m68hc11 | m6812 | m68hc12 | picochip) m6811 | m68hc11 | m6812 | m68hc12 | picochip)
# Motorola 68HC11/12. # Motorola 68HC11/12.
basic_machine=$basic_machine-unknown basic_machine=$basic_machine-unknown
@ -314,6 +326,18 @@ case $basic_machine in
basic_machine=mt-unknown basic_machine=mt-unknown
;; ;;
strongarm | thumb | xscale)
basic_machine=arm-unknown
;;
xscaleeb)
basic_machine=armeb-unknown
;;
xscaleel)
basic_machine=armel-unknown
;;
# We use `pc' rather than `unknown' # We use `pc' rather than `unknown'
# because (1) that's what they normally are, and # because (1) that's what they normally are, and
# (2) the word "unknown" tends to confuse beginning users. # (2) the word "unknown" tends to confuse beginning users.
@ -334,7 +358,7 @@ case $basic_machine in
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \ | avr-* | avr32-* \
| bfin-* | bs2000-* \ | bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \
| clipper-* | craynv-* | cydra-* \ | clipper-* | craynv-* | cydra-* \
| d10v-* | d30v-* | dlx-* \ | d10v-* | d30v-* | dlx-* \
| elxsi-* \ | elxsi-* \
@ -368,26 +392,28 @@ case $basic_machine in
| mmix-* \ | mmix-* \
| mt-* \ | mt-* \
| msp430-* \ | msp430-* \
| nds32-* | nds32le-* | nds32be-* \
| nios-* | nios2-* \ | nios-* | nios2-* \
| none-* | np1-* | ns16k-* | ns32k-* \ | none-* | np1-* | ns16k-* | ns32k-* \
| open8-* \
| orion-* \ | orion-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
| pyramid-* \ | pyramid-* \
| romp-* | rs6000-* | rx-* \ | romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
| sparclite-* \ | sparclite-* \
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
| tahoe-* | thumb-* \ | tahoe-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
| tile-* | tilegx-* \ | tile-* | tilegx-* \
| tron-* \ | tron-* \
| ubicom32-* \ | ubicom32-* \
| v850-* | v850e-* | vax-* \ | v850-* | v850e-* | vax-* \
| we32k-* \ | we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \
| xstormy16-* | xtensa*-* \ | xstormy16-* | xtensa*-* \
| ymp-* \ | ymp-* \
| z8k-* | z80-*) | z8k-* | z80-*)
@ -482,6 +508,15 @@ case $basic_machine in
basic_machine=powerpc-ibm basic_machine=powerpc-ibm
os=-cnk os=-cnk
;; ;;
c54x-*)
basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c55x-*)
basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c6x-*)
basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c90) c90)
basic_machine=c90-cray basic_machine=c90-cray
os=-unicos os=-unicos
@ -518,7 +553,7 @@ case $basic_machine in
basic_machine=craynv-cray basic_machine=craynv-cray
os=-unicosmp os=-unicosmp
;; ;;
cr16) cr16 | cr16-*)
basic_machine=cr16-unknown basic_machine=cr16-unknown
os=-elf os=-elf
;; ;;
@ -841,6 +876,12 @@ case $basic_machine in
np1) np1)
basic_machine=np1-gould basic_machine=np1-gould
;; ;;
neo-tandem)
basic_machine=neo-tandem
;;
nse-tandem)
basic_machine=nse-tandem
;;
nsr-tandem) nsr-tandem)
basic_machine=nsr-tandem basic_machine=nsr-tandem
;; ;;
@ -923,9 +964,10 @@ case $basic_machine in
;; ;;
power) basic_machine=power-ibm power) basic_machine=power-ibm
;; ;;
ppc) basic_machine=powerpc-unknown ppc | ppcbe) basic_machine=powerpc-unknown
;; ;;
ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ppc-* | ppcbe-*)
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;; ;;
ppcle | powerpclittle | ppc-le | powerpc-little) ppcle | powerpclittle | ppc-le | powerpc-little)
basic_machine=powerpcle-unknown basic_machine=powerpcle-unknown
@ -1019,6 +1061,9 @@ case $basic_machine in
basic_machine=i860-stratus basic_machine=i860-stratus
os=-sysv4 os=-sysv4
;; ;;
strongarm-* | thumb-*)
basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
sun2) sun2)
basic_machine=m68000-sun basic_machine=m68000-sun
;; ;;
@ -1075,18 +1120,6 @@ case $basic_machine in
basic_machine=t90-cray basic_machine=t90-cray
os=-unicos os=-unicos
;; ;;
tic54x | c54x*)
basic_machine=tic54x-unknown
os=-coff
;;
tic55x | c55x*)
basic_machine=tic55x-unknown
os=-coff
;;
tic6x | c6x*)
basic_machine=tic6x-unknown
os=-coff
;;
# This must be matched before tile*. # This must be matched before tile*.
tilegx*) tilegx*)
basic_machine=tilegx-unknown basic_machine=tilegx-unknown
@ -1163,6 +1196,9 @@ case $basic_machine in
xps | xps100) xps | xps100)
basic_machine=xps100-honeywell basic_machine=xps100-honeywell
;; ;;
xscale-* | xscalee[bl]-*)
basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
;;
ymp) ymp)
basic_machine=ymp-cray basic_machine=ymp-cray
os=-unicos os=-unicos
@ -1301,7 +1337,8 @@ case $os in
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \ | -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
| -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ | -mingw32* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* \ | -uxpv* | -beos* | -mpeix* | -udk* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
@ -1484,6 +1521,15 @@ case $basic_machine in
c4x-* | tic4x-*) c4x-* | tic4x-*)
os=-coff os=-coff
;; ;;
tic54x-*)
os=-coff
;;
tic55x-*)
os=-coff
;;
tic6x-*)
os=-coff
;;
# This must come before the *-dec entry. # This must come before the *-dec entry.
pdp10-*) pdp10-*)
os=-tops20 os=-tops20

File diff suppressed because it is too large Load Diff

View File

@ -97,6 +97,10 @@ AC_ARG_WITH(host_os, [], host_os=$withval)
dnl Checks for programs. dnl Checks for programs.
AC_PROG_CC AC_PROG_CC
dnl ASIO and CXX bindings need a C++ compiler
if [[ "$with_asio" = "yes" ] || [ "$enable_cxx" = "yes" ]] ; then
AC_PROG_CXX
fi
AC_LIBTOOL_WIN32_DLL AC_LIBTOOL_WIN32_DLL
AC_PROG_LIBTOOL AC_PROG_LIBTOOL
AC_PROG_INSTALL AC_PROG_INSTALL
@ -191,13 +195,16 @@ add_objects()
INCLUDES=portaudio.h INCLUDES=portaudio.h
dnl Include directories needed by all implementations
CFLAGS="$CFLAGS -I\$(top_srcdir)/include -I\$(top_srcdir)/src/common"
case "${host_os}" in case "${host_os}" in
darwin* ) darwin* )
dnl Mac OS X configuration dnl Mac OS X configuration
AC_DEFINE(PA_USE_COREAUDIO) AC_DEFINE(PA_USE_COREAUDIO,1)
CFLAGS="$CFLAGS -Werror" CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix -Werror"
LIBS="-framework CoreAudio -framework AudioToolbox -framework AudioUnit -framework Carbon" LIBS="-framework CoreAudio -framework AudioToolbox -framework AudioUnit -framework Carbon"
if test "x$enable_mac_universal" = "xyes" ; then if test "x$enable_mac_universal" = "xyes" ; then
@ -235,7 +242,7 @@ case "${host_os}" in
PADLL="portaudio.dll" PADLL="portaudio.dll"
THREAD_CFLAGS="-mthreads" THREAD_CFLAGS="-mthreads"
SHARED_FLAGS="-shared" SHARED_FLAGS="-shared"
CFLAGS="$CFLAGS -I\$(top_srcdir)/include -DPA_USE_WMME=0 -DPA_USE_ASIO=0 -DPA_USE_WDMKS=0 -DPA_USE_DS=0 -DPA_USE_WASAPI=0" CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/win -DPA_USE_WMME=0 -DPA_USE_ASIO=0 -DPA_USE_WDMKS=0 -DPA_USE_DS=0 -DPA_USE_WASAPI=0"
if [[ "x$with_directx" = "xyes" ]]; then if [[ "x$with_directx" = "xyes" ]]; then
DXDIR="$with_dxdir" DXDIR="$with_dxdir"
@ -244,7 +251,7 @@ case "${host_os}" in
DLL_LIBS="${DLL_LIBS} -lwinmm -lm -L$DXDIR/lib -ldsound -lole32" DLL_LIBS="${DLL_LIBS} -lwinmm -lm -L$DXDIR/lib -ldsound -lole32"
#VC98="\"/c/Program Files/Microsoft Visual Studio/VC98/Include\"" #VC98="\"/c/Program Files/Microsoft Visual Studio/VC98/Include\""
#CFLAGS="$CFLAGS -I$VC98 -DPA_NO_WMME -DPA_NO_ASIO" #CFLAGS="$CFLAGS -I$VC98 -DPA_NO_WMME -DPA_NO_ASIO"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/win -I$DXDIR/include -UPA_USE_DS -DPA_USE_DS=1" CFLAGS="$CFLAGS -I$DXDIR/include -UPA_USE_DS -DPA_USE_DS=1"
fi fi
if [[ "x$with_asio" = "xyes" ]]; then if [[ "x$with_asio" = "xyes" ]]; then
@ -252,7 +259,14 @@ case "${host_os}" in
add_objects src/hostapi/asio/pa_asio.o src/common/pa_ringbuffer.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_coinitialize.o src/hostapi/asio/iasiothiscallresolver.o $ASIODIR/common/asio.o $ASIODIR/host/asiodrivers.o $ASIODIR/host/pc/asiolist.o add_objects src/hostapi/asio/pa_asio.o src/common/pa_ringbuffer.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_coinitialize.o src/hostapi/asio/iasiothiscallresolver.o $ASIODIR/common/asio.o $ASIODIR/host/asiodrivers.o $ASIODIR/host/pc/asiolist.o
LIBS="-lwinmm -lm -lole32 -luuid" LIBS="-lwinmm -lm -lole32 -luuid"
DLL_LIBS="${DLL_LIBS} -lwinmm -lm -lole32 -luuid" DLL_LIBS="${DLL_LIBS} -lwinmm -lm -lole32 -luuid"
CFLAGS="$CFLAGS -ffast-math -fomit-frame-pointer -I\$(top_srcdir)/src/common -I\$(top_srcdir)/src/hostapi/asio -I$ASIODIR/host/pc -I$ASIODIR/common -I$ASIODIR/host -UPA_USE_ASIO -DPA_USE_ASIO=1 -DWINDOWS" CFLAGS="$CFLAGS -ffast-math -fomit-frame-pointer -I\$(top_srcdir)/src/hostapi/asio -I$ASIODIR/host/pc -I$ASIODIR/common -I$ASIODIR/host -UPA_USE_ASIO -DPA_USE_ASIO=1 -DWINDOWS"
dnl Setting the windows version flags below resolves a conflict between Interlocked*
dnl definitions in mingw winbase.h and Interlocked* hacks in ASIO SDK combase.h
dnl combase.h is included by asiodrvr.h
dnl PortAudio does not actually require Win XP (winver 501) APIs
CFLAGS="$CFLAGS -D_WIN32_WINNT=0x0501 -DWINVER=0x0501"
CXXFLAGS="$CFLAGS" CXXFLAGS="$CFLAGS"
fi fi
@ -263,21 +277,21 @@ case "${host_os}" in
DLL_LIBS="${DLL_LIBS} -lwinmm -lm -L$DXDIR/lib -luuid -lsetupapi -lole32" DLL_LIBS="${DLL_LIBS} -lwinmm -lm -L$DXDIR/lib -luuid -lsetupapi -lole32"
#VC98="\"/c/Program Files/Microsoft Visual Studio/VC98/Include\"" #VC98="\"/c/Program Files/Microsoft Visual Studio/VC98/Include\""
#CFLAGS="$CFLAGS -I$VC98 -DPA_NO_WMME -DPA_NO_ASIO" #CFLAGS="$CFLAGS -I$VC98 -DPA_NO_WMME -DPA_NO_ASIO"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/common -I$DXDIR/include -UPA_USE_WDMKS -DPA_USE_WDMKS=1" CFLAGS="$CFLAGS -I$DXDIR/include -UPA_USE_WDMKS -DPA_USE_WDMKS=1"
fi fi
if [[ "x$with_wmme" = "xyes" ]]; then if [[ "x$with_wmme" = "xyes" ]]; then
add_objects src/hostapi/wmme/pa_win_wmme.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_waveformat.o add_objects src/hostapi/wmme/pa_win_wmme.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_waveformat.o
LIBS="-lwinmm -lm -lole32 -luuid" LIBS="-lwinmm -lm -lole32 -luuid"
DLL_LIBS="${DLL_LIBS} -lwinmm" DLL_LIBS="${DLL_LIBS} -lwinmm"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/common -UPA_USE_WMME -DPA_USE_WMME=1" CFLAGS="$CFLAGS -UPA_USE_WMME -DPA_USE_WMME=1"
fi fi
if [[ "x$with_wasapi" = "xyes" ]]; then if [[ "x$with_wasapi" = "xyes" ]]; then
add_objects src/hostapi/wasapi/pa_win_wasapi.o src/common/pa_ringbuffer.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_coinitialize.o src/os/win/pa_win_waveformat.o add_objects src/hostapi/wasapi/pa_win_wasapi.o src/common/pa_ringbuffer.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_coinitialize.o src/os/win/pa_win_waveformat.o
LIBS="-lwinmm -lm -lole32 -luuid" LIBS="-lwinmm -lm -lole32 -luuid"
DLL_LIBS="${DLL_LIBS} -lwinmm -lole32" DLL_LIBS="${DLL_LIBS} -lwinmm -lole32"
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/common -I\$(top_srcdir)/src/hostapi/wasapi/mingw-include -UPA_USE_WASAPI -DPA_USE_WASAPI=1" CFLAGS="$CFLAGS -I\$(top_srcdir)/src/hostapi/wasapi/mingw-include -UPA_USE_WASAPI -DPA_USE_WASAPI=1"
fi fi
;; ;;
@ -285,7 +299,7 @@ case "${host_os}" in
dnl Cygwin configuration dnl Cygwin configuration
OTHER_OBJS="src/hostapi/wmme/pa_win_wmme.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_waveformat.o" OTHER_OBJS="src/hostapi/wmme/pa_win_wmme.o src/os/win/pa_win_hostapis.o src/os/win/pa_win_util.o src/os/win/pa_win_waveformat.o"
CFLAGS="$CFLAGS -DPA_USE_DS=0 -DPA_USE_WDMKS=0 -DPA_USE_ASIO=0 -DPA_USE_WASAPI=0 -DPA_USE_WMME=1" CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/win -DPA_USE_DS=0 -DPA_USE_WDMKS=0 -DPA_USE_ASIO=0 -DPA_USE_WASAPI=0 -DPA_USE_WMME=1"
LIBS="-lwinmm -lm" LIBS="-lwinmm -lm"
PADLL="portaudio.dll" PADLL="portaudio.dll"
THREAD_CFLAGS="-mthreads" THREAD_CFLAGS="-mthreads"
@ -304,7 +318,9 @@ case "${host_os}" in
dnl See the '#ifdef PA_USE_SGI' in file pa_unix/pa_unix_hostapis.c dnl See the '#ifdef PA_USE_SGI' in file pa_unix/pa_unix_hostapis.c
dnl which selects the appropriate PaXXX_Initialize() function. dnl which selects the appropriate PaXXX_Initialize() function.
dnl dnl
AC_DEFINE(PA_USE_SGI) AC_DEFINE(PA_USE_SGI,1)
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix"
dnl The _REENTRANT option for pthread safety. Perhaps not necessary but it 'll do no harm. dnl The _REENTRANT option for pthread safety. Perhaps not necessary but it 'll do no harm.
dnl dnl
@ -322,6 +338,8 @@ case "${host_os}" in
*) *)
dnl Unix configuration dnl Unix configuration
CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix"
AC_CHECK_LIB(pthread, pthread_create,[have_pthread="yes"], AC_CHECK_LIB(pthread, pthread_create,[have_pthread="yes"],
AC_MSG_ERROR([libpthread not found!])) AC_MSG_ERROR([libpthread not found!]))
@ -330,7 +348,7 @@ case "${host_os}" in
LIBS="$LIBS -lasound" LIBS="$LIBS -lasound"
OTHER_OBJS="$OTHER_OBJS src/hostapi/alsa/pa_linux_alsa.o" OTHER_OBJS="$OTHER_OBJS src/hostapi/alsa/pa_linux_alsa.o"
INCLUDES="$INCLUDES pa_linux_alsa.h" INCLUDES="$INCLUDES pa_linux_alsa.h"
AC_DEFINE(PA_USE_ALSA) AC_DEFINE(PA_USE_ALSA,1)
fi fi
if [[ "$have_jack" = "yes" ] && [ "$with_jack" != "no" ]] ; then if [[ "$have_jack" = "yes" ] && [ "$with_jack" != "no" ]] ; then
@ -338,7 +356,7 @@ case "${host_os}" in
CFLAGS="$CFLAGS $JACK_CFLAGS" CFLAGS="$CFLAGS $JACK_CFLAGS"
OTHER_OBJS="$OTHER_OBJS src/hostapi/jack/pa_jack.o src/common/pa_ringbuffer.o" OTHER_OBJS="$OTHER_OBJS src/hostapi/jack/pa_jack.o src/common/pa_ringbuffer.o"
INCLUDES="$INCLUDES pa_jack.h" INCLUDES="$INCLUDES pa_jack.h"
AC_DEFINE(PA_USE_JACK) AC_DEFINE(PA_USE_JACK,1)
fi fi
if [[ "$with_oss" != "no" ]] ; then if [[ "$with_oss" != "no" ]] ; then
@ -347,14 +365,14 @@ case "${host_os}" in
DLL_LIBS="$DLL_LIBS -lossaudio" DLL_LIBS="$DLL_LIBS -lossaudio"
LIBS="$LIBS -lossaudio" LIBS="$LIBS -lossaudio"
fi fi
AC_DEFINE(PA_USE_OSS) AC_DEFINE(PA_USE_OSS,1)
fi fi
if [[ "$have_asihpi" = "yes" ] && [ "$with_asihpi" != "no" ]] ; then if [[ "$have_asihpi" = "yes" ] && [ "$with_asihpi" != "no" ]] ; then
LIBS="$LIBS -lhpi" LIBS="$LIBS -lhpi"
DLL_LIBS="$DLL_LIBS -lhpi" DLL_LIBS="$DLL_LIBS -lhpi"
OTHER_OBJS="$OTHER_OBJS src/hostapi/asihpi/pa_linux_asihpi.o" OTHER_OBJS="$OTHER_OBJS src/hostapi/asihpi/pa_linux_asihpi.o"
AC_DEFINE(PA_USE_ASIHPI) AC_DEFINE(PA_USE_ASIHPI,1)
fi fi
DLL_LIBS="$DLL_LIBS -lm -lpthread" DLL_LIBS="$DLL_LIBS -lm -lpthread"
@ -368,7 +386,7 @@ case "${host_os}" in
THREAD_CFLAGS="-mt" THREAD_CFLAGS="-mt"
;; ;;
*) *)
SHARED_FLAGS="-shared -fPIC" SHARED_FLAGS="-fPIC"
THREAD_CFLAGS="-pthread" THREAD_CFLAGS="-pthread"
;; ;;
esac esac

View File

@ -1,7 +1,7 @@
/** @page License PortAudio License /** @page License PortAudio License
PortAudio Portable Real-Time Audio Library <br> PortAudio Portable Real-Time Audio Library <br>
Copyright (c) 1999-2006 Ross Bencina, Phil Burk Copyright (c) 1999-2011 Ross Bencina, Phil Burk

View File

@ -1,35 +1,60 @@
/* doxygen index page */ /* doxygen index page */
/** @mainpage /** @mainpage
@section overview Overview
PortAudio is a cross-platform, open-source C language library for real-time audio input and output. The library provides functions that allow your software to acquire and output real-time audio streams from your computer's hardware audio interfaces. It is designed to simplify writing cross-platform audio applications, and also to simplify the development of audio software in general by hiding the complexities of dealing directly with each native audio API. PortAudio is used to implement sound recording, editing and mixing applications, software synthesizers, effects processors, music players, internet telephony applications, software defined radios and more. Supported platforms include MS Windows, Mac OS X and Linux. Third-party language bindings make it possible to call PortAudio from other programming languages including C++, C#, Python, PureBasic, FreePascal and Lazarus. PortAudio is a cross-platform, open-source C language library for real-time audio input and output. The library provides functions that allow your software to acquire and output real-time audio streams from your computer's hardware audio interfaces. It is designed to simplify writing cross-platform audio applications, and also to simplify the development of audio software in general by hiding the complexities of dealing directly with each native audio API. PortAudio is used to implement sound recording, editing and mixing applications, software synthesizers, effects processors, music players, internet telephony applications, software defined radios and more. Supported platforms include MS Windows, Mac OS X and Linux. Third-party language bindings make it possible to call PortAudio from other programming languages including C++, C#, Python, PureBasic, FreePascal and Lazarus.
See the PortAudio website for further information http://www.portaudio.com
Read the @ref api_overview for a top-down view of the PortAudio API, its capabilities, functions and data structures. The documentation for PortAudio's main header file portaudio.h details the individual data types and functions that make up the API. @section start_here Start here
To get started writing code check out the tutorials on the PortAudio Wiki: - @ref api_overview<br>
http://www.portaudio.com/trac/wiki/TutorialDir/TutorialStart A top-down view of the PortAudio API, its capabilities, functions and data structures
- <a href="http://www.portaudio.com/trac/wiki/TutorialDir/TutorialStart">PortAudio Tutorials</a><br>
Get started writing code with PortAudio tutorials
- @ref examples_src "Examples"<br>
Simple example programs demonstrating PortAudio usage
- @ref License<br>
PortAudio is licenced under the MIT Expat open source licence. We make a non-binding request for you to contribute your changes back to the project.
@section reference API Reference
- portaudio.h Portable API<br>
Detailed documentation for each portable API function and data type
- @ref public_header "Host API Specific Extensions"<br>
Documentation for non-portable platform-specific host API extensions
@section resources Resources
- <a href="http://www.portaudio.com">The PortAudio website</a>
- <a href="http://music.columbia.edu/mailman/listinfo/portaudio/">Our mailing list for users and developers</a><br>
- <a href="http://www.portaudio.com/trac">The PortAudio wiki</a>
@section developer_resources Developer Resources
@if INTERNAL
- @ref srcguide
@endif
- <a href="http://www.portaudio.com/trac">Our Trac wiki and issue tracking system</a>
- <a href="http://www.portaudio.com/docs/proposals/014-StyleGuide.html">Coding guidelines</a>
If you're interested in helping out with PortAudio development we're more than happy for you to be involved. Just drop by the PortAudio mailing list and ask how you can help. Or <a href="http://www.portaudio.com/trac/report/3">check out the starter tickets in Trac</a>.
@section older_api_versions Older API Versions
This documentation covers the current API version: PortAudio V19, API version 2.0. API 2.0 differs in a number of ways from previous versions (most often encountered in PortAudio V18), please consult the enhancement proposals for details of what was added/changed for V19: This documentation covers the current API version: PortAudio V19, API version 2.0. API 2.0 differs in a number of ways from previous versions (most often encountered in PortAudio V18), please consult the enhancement proposals for details of what was added/changed for V19:
http://www.portaudio.com/docs/proposals/index.html http://www.portaudio.com/docs/proposals/index.html
You might also be interested in:
- @ref srcguide
- The @ref License
- Our mailing list for users and developers:
http://music.columbia.edu/mailman/listinfo/portaudio/
- Our issue tracking system:
http://www.portaudio.com/trac
- Coding guidelines:
http://www.portaudio.com/docs/proposals/014-StyleGuide.html
If you're interested in helping out with PortAudio development we're more than happy for you to be involved. Just drop by the PortAudio mailing list and ask how you can help.
*/ */

View File

@ -7,33 +7,45 @@
*/ */
/** /**
@internal
@defgroup common_src Source code common to all implementations @defgroup common_src Source code common to all implementations
*/ */
/** /**
@internal
@defgroup win_src Source code common to all Windows implementations @defgroup win_src Source code common to all Windows implementations
*/ */
/** /**
@internal
@defgroup unix_src Source code common to all Unix implementations @defgroup unix_src Source code common to all Unix implementations
*/ */
/** /**
@internal
@defgroup macosx_src Source code common to all Macintosh implementations @defgroup macosx_src Source code common to all Macintosh implementations
*/ */
/** /**
@internal
@defgroup hostapi_src Source code for specific Host APIs @defgroup hostapi_src Source code for specific Host APIs
*/ */
/** /**
@defgroup test_src Test and example programs @internal
@defgroup test_src Test programs
*/ */
/** /**
@page srcguide A guide to the PortAudio sources. @defgroup examples_src Example programs demonstrating PortAudio usage
*/
/**
@internal
@page srcguide A guide to the PortAudio sources
- \ref public_header - \ref public_header
- \ref examples_src
- \ref common_src - \ref common_src
- \ref win_src - \ref win_src
- \ref unix_src - \ref unix_src

View File

@ -61,24 +61,21 @@ typedef struct PaWinDirectSoundStreamInfo{
PaHostApiTypeId hostApiType; /**< paDirectSound */ PaHostApiTypeId hostApiType; /**< paDirectSound */
unsigned long version; /**< 2 */ unsigned long version; /**< 2 */
unsigned long flags; unsigned long flags; /**< enable other features of this struct */
/* low-level latency setting support /**
Control the size of host buffers in order to set latency. They will low-level latency setting support
be used instead of the generic parameters to Pa_OpenStream() if Sets the size of the DirectSound host buffer.
flags contains the paWinDirectSoundUseLowLevelLatencyParameters When flags contains the paWinDirectSoundUseLowLevelLatencyParameters
flag. this size will be used instead of interpreting the generic latency
parameters to Pa_OpenStream(). If the flag is not set this value is ignored.
If PaWinDirectSoundStreamInfo structures with paWinDirectSoundUseLowLevelLatencyParameters If the stream is a full duplex stream the implementation requires that
are supplied for both input and output in a full duplex stream, then the the values of framesPerBuffer for input and output match (if both are specified).
input and output framesPerBuffer must be the same, or the larger of the
two must be a multiple of the smaller, otherwise a
paIncompatibleHostApiSpecificStreamInfo error will be returned from
Pa_OpenStream().
*/ */
unsigned long framesPerBuffer; /* NOT IMPLEMENTED see http://www.portaudio.com/trac/ticket/129 */ unsigned long framesPerBuffer;
/* /**
support for WAVEFORMATEXTENSIBLE channel masks. If flags contains support for WAVEFORMATEXTENSIBLE channel masks. If flags contains
paWinDirectSoundUseChannelMask this allows you to specify which speakers paWinDirectSoundUseChannelMask this allows you to specify which speakers
to address in a multichannel stream. Constants for channelMask to address in a multichannel stream. Constants for channelMask

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/* /*
* $Id: pa_linux_alsa.c 1691 2011-05-26 20:19:19Z aknudsen $ * $Id: pa_linux_alsa.c 1798 2011-12-08 19:43:29Z alan_horstmann $
* PortAudio Portable Real-Time Audio Library * PortAudio Portable Real-Time Audio Library
* Latest Version at: http://www.portaudio.com * Latest Version at: http://www.portaudio.com
* ALSA implementation by Joshua Haberman and Arve Knudsen * ALSA implementation by Joshua Haberman and Arve Knudsen
@ -957,10 +957,10 @@ typedef struct
HwDevInfo predefinedNames[] = { HwDevInfo predefinedNames[] = {
{ "center_lfe", NULL, 0, 1, 0 }, { "center_lfe", NULL, 0, 1, 0 },
/* { "default", NULL, 0, 1, 0 }, */ /* { "default", NULL, 0, 1, 1 }, */
/* { "dmix", NULL, 0, 1, 0 }, */ { "dmix", NULL, 0, 1, 0 },
/* { "dpl", NULL, 0, 1, 0 }, */ /* { "dpl", NULL, 0, 1, 0 }, */
/* { "dsnoop", NULL, 0, 1, 0 }, */ /* { "dsnoop", NULL, 0, 0, 1 }, */
{ "front", NULL, 0, 1, 0 }, { "front", NULL, 0, 1, 0 },
{ "iec958", NULL, 0, 1, 0 }, { "iec958", NULL, 0, 1, 0 },
/* { "modem", NULL, 0, 1, 0 }, */ /* { "modem", NULL, 0, 1, 0 }, */

View File

@ -1,12 +1,16 @@
/* /*
* $Id:$
* PortAudio Portable Real-Time Audio Library * PortAudio Portable Real-Time Audio Library
* Latest Version at: http://www.portaudio.com * Latest Version at: http://www.portaudio.com
* AudioScience HPI implementation by Fred Gleason, Ludwig Schwardt and
* Eliot Blennerhassett
* *
* PortAudio v18 version of AudioScience HPI driver by Fred Gleason <fredg@salemradiolabs.com> * Copyright (c) 2003 Fred Gleason <fredg@salemradiolabs.com>
* PortAudio v19 version of AudioScience HPI driver by Ludwig Schwardt <schwardt@sun.ac.za> * Copyright (c) 2005,2006 Ludwig Schwardt <schwardt@sun.ac.za>
* Copyright (c) 2011 Eliot Blennerhassett <eblennerhassett@audioscience.com>
* *
* Copyright (c) 2003 Fred Gleason * Based on the Open Source API proposed by Ross Bencina
* Copyright (c) 2005,2006 Ludwig Schwardt * Copyright (c) 1999-2008 Ross Bencina, Phil Burk
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files * a copy of this software and associated documentation files
@ -61,7 +65,7 @@
Documentation for the HPI API can be found at: Documentation for the HPI API can be found at:
http://www.audioscience.com/internet/download/sdk/spchpi.pdf http://www.audioscience.com/internet/download/sdk/hpi_usermanual_html/html/index.html
The Linux HPI driver itself (a kernel module + library) can be downloaded from: The Linux HPI driver itself (a kernel module + library) can be downloaded from:
@ -134,8 +138,6 @@
Output buffer priming via the user callback (i.e. paPrimeOutputBuffersUsingStreamCallback Output buffer priming via the user callback (i.e. paPrimeOutputBuffersUsingStreamCallback
and friends) is not implemented yet. All output is primed with silence. and friends) is not implemented yet. All output is primed with silence.
Please send bug reports etc. to Ludwig Schwardt <schwardt@sun.ac.za>
*/ */
#include <unistd.h> #include <unistd.h>
@ -193,7 +195,7 @@
/** Check return value of HPI function, and map it to PaError */ /** Check return value of HPI function, and map it to PaError */
#define PA_ASIHPI_UNLESS_(expr, paError) \ #define PA_ASIHPI_UNLESS_(expr, paError) \
do { \ do { \
HW16 hpiError = (expr); \ hpi_err_t hpiError = (expr); \
/* If HPI error occurred */ \ /* If HPI error occurred */ \
if( UNLIKELY( hpiError ) ) \ if( UNLIKELY( hpiError ) ) \
{ \ { \
@ -265,8 +267,6 @@ typedef struct PaAsiHpiHostApiRepresentation
/* implementation specific data goes here */ /* implementation specific data goes here */
PaHostApiIndex hostApiIndex; PaHostApiIndex hostApiIndex;
/** HPI subsystem pointer */
HPI_HSUBSYS *subSys;
} }
PaAsiHpiHostApiRepresentation; PaAsiHpiHostApiRepresentation;
@ -280,20 +280,18 @@ typedef struct PaAsiHpiDeviceInfo
/* implementation specific data goes here */ /* implementation specific data goes here */
/** HPI subsystem (required for most HPI calls) */
HPI_HSUBSYS *subSys;
/** Adapter index */ /** Adapter index */
HW16 adapterIndex; uint16_t adapterIndex;
/** Adapter model number (hex) */ /** Adapter model number (hex) */
HW16 adapterType; uint16_t adapterType;
/** Adapter HW/SW version */ /** Adapter HW/SW version */
HW16 adapterVersion; uint16_t adapterVersion;
/** Adapter serial number */ /** Adapter serial number */
HW32 adapterSerialNumber; uint32_t adapterSerialNumber;
/** Stream number */ /** Stream number */
HW16 streamIndex; uint16_t streamIndex;
/** 0=Input, 1=Output (HPI streams are either input or output but not both) */ /** 0=Input, 1=Output (HPI streams are either input or output but not both) */
HW16 streamIsOutput; uint16_t streamIsOutput;
} }
PaAsiHpiDeviceInfo; PaAsiHpiDeviceInfo;
@ -328,27 +326,25 @@ typedef struct PaAsiHpiStreamComponent
{ {
/** Device information (HPI handles, etc) */ /** Device information (HPI handles, etc) */
PaAsiHpiDeviceInfo *hpiDevice; PaAsiHpiDeviceInfo *hpiDevice;
/** Stream handle, as passed to HPI interface. /** Stream handle, as passed to HPI interface. */
HACK: we assume types HPI_HISTREAM and HPI_HOSTREAM are the same... hpi_handle_t hpiStream;
(both are HW32 up to version 3.00 of ASIHPI, and hopefully they stay that way) */
HPI_HISTREAM hpiStream;
/** Stream format, as passed to HPI interface */ /** Stream format, as passed to HPI interface */
HPI_FORMAT hpiFormat; struct hpi_format hpiFormat;
/** Number of bytes per frame, derived from hpiFormat and saved for convenience */ /** Number of bytes per frame, derived from hpiFormat and saved for convenience */
HW32 bytesPerFrame; uint32_t bytesPerFrame;
/** Size of hardware (on-card) buffer of stream in bytes */ /** Size of hardware (on-card) buffer of stream in bytes */
HW32 hardwareBufferSize; uint32_t hardwareBufferSize;
/** Size of host (BBM) buffer of stream in bytes (if used) */ /** Size of host (BBM) buffer of stream in bytes (if used) */
HW32 hostBufferSize; uint32_t hostBufferSize;
/** Upper limit on the utilization of output stream buffer (both hardware and host). /** Upper limit on the utilization of output stream buffer (both hardware and host).
This prevents large latencies in an output-only stream with a potentially huge buffer This prevents large latencies in an output-only stream with a potentially huge buffer
and a fast data generator, which would otherwise keep the hardware buffer filled to and a fast data generator, which would otherwise keep the hardware buffer filled to
capacity. See also the "Hardware Buffering=off" option in the AudioScience WAV driver. */ capacity. See also the "Hardware Buffering=off" option in the AudioScience WAV driver. */
HW32 outputBufferCap; uint32_t outputBufferCap;
/** Sample buffer (halfway station between HPI and buffer processor) */ /** Sample buffer (halfway station between HPI and buffer processor) */
HW8 *tempBuffer; uint8_t *tempBuffer;
/** Sample buffer size, in bytes */ /** Sample buffer size, in bytes */
HW32 tempBufferSize; uint32_t tempBufferSize;
} }
PaAsiHpiStreamComponent; PaAsiHpiStreamComponent;
@ -369,7 +365,7 @@ typedef struct PaAsiHpiStream
PaAsiHpiStreamComponent *input, *output; PaAsiHpiStreamComponent *input, *output;
/** Polling interval (in milliseconds) */ /** Polling interval (in milliseconds) */
HW32 pollingInterval; uint32_t pollingInterval;
/** Are we running in callback mode? */ /** Are we running in callback mode? */
int callbackMode; int callbackMode;
/** Number of frames to transfer at a time to/from HPI */ /** Number of frames to transfer at a time to/from HPI */
@ -401,23 +397,23 @@ PaAsiHpiStream;
typedef struct PaAsiHpiStreamInfo typedef struct PaAsiHpiStreamInfo
{ {
/** HPI stream state (HPI_STATE_STOPPED, HPI_STATE_PLAYING, etc.) */ /** HPI stream state (HPI_STATE_STOPPED, HPI_STATE_PLAYING, etc.) */
HW16 state; uint16_t state;
/** Size (in bytes) of recording/playback data buffer in HPI driver */ /** Size (in bytes) of recording/playback data buffer in HPI driver */
HW32 bufferSize; uint32_t bufferSize;
/** Amount of data (in bytes) available in the buffer */ /** Amount of data (in bytes) available in the buffer */
HW32 dataSize; uint32_t dataSize;
/** Number of frames played/recorded since last stream reset */ /** Number of frames played/recorded since last stream reset */
HW32 frameCounter; uint32_t frameCounter;
/** Amount of data (in bytes) in hardware (on-card) buffer. /** Amount of data (in bytes) in hardware (on-card) buffer.
This differs from dataSize if bus mastering (BBM) is used, which introduces another This differs from dataSize if bus mastering (BBM) is used, which introduces another
driver-level buffer to which dataSize/bufferSize then refers. */ driver-level buffer to which dataSize/bufferSize then refers. */
HW32 auxDataSize; uint32_t auxDataSize;
/** Total number of data frames currently buffered by HPI driver (host + hw buffers) */ /** Total number of data frames currently buffered by HPI driver (host + hw buffers) */
HW32 totalBufferedData; uint32_t totalBufferedData;
/** Size of immediately available data (for input) or space (for output) in frames. /** Size of immediately available data (for input) or space (for output) in frames.
This only checks the first-level buffer (typically host buffer). This amount can be This only checks the first-level buffer (typically host buffer). This amount can be
transferred immediately. */ transferred immediately. */
HW32 availableFrames; uint32_t availableFrames;
/** Indicates that hardware buffer is getting too full */ /** Indicates that hardware buffer is getting too full */
int overflow; int overflow;
/** Indicates that hardware buffer is getting too empty */ /** Indicates that hardware buffer is getting too empty */
@ -479,21 +475,21 @@ static void *CallbackThreadFunc( void *userData );
/* Functions specific to this API */ /* Functions specific to this API */
static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostApi ); static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostApi );
static HW16 PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat ); static uint16_t PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat );
static PaSampleFormat PaAsiHpi_HpiToPaFormat( HW16 hpiFormat ); static PaSampleFormat PaAsiHpi_HpiToPaFormat( uint16_t hpiFormat );
static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostApi, static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostApi,
const PaStreamParameters *parameters, double sampleRate, const PaStreamParameters *parameters, double sampleRate,
PaAsiHpiDeviceInfo **hpiDevice, HPI_FORMAT *hpiFormat ); PaAsiHpiDeviceInfo **hpiDevice, struct hpi_format *hpiFormat );
static PaError PaAsiHpi_OpenInput( struct PaUtilHostApiRepresentation *hostApi, static PaError PaAsiHpi_OpenInput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat, const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
HPI_HISTREAM *hpiStream ); hpi_handle_t *hpiStream );
static PaError PaAsiHpi_OpenOutput( struct PaUtilHostApiRepresentation *hostApi, static PaError PaAsiHpi_OpenOutput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat, const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
HPI_HOSTREAM *hpiStream ); hpi_handle_t *hpiStream );
static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStreamInfo *info ); static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStreamInfo *info );
static void PaAsiHpi_StreamComponentDump( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStream *stream ); static void PaAsiHpi_StreamComponentDump( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStream *stream );
static void PaAsiHpi_StreamDump( PaAsiHpiStream *stream ); static void PaAsiHpi_StreamDump( PaAsiHpiStream *stream );
static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32 pollingInterval, static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, uint32_t pollingInterval,
unsigned long framesPerPaHostBuffer, PaTime suggestedLatency ); unsigned long framesPerPaHostBuffer, PaTime suggestedLatency );
static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream ); static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream );
static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed ); static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed );
@ -529,43 +525,38 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
PaUtilHostApiRepresentation *hostApi = &hpiHostApi->baseHostApiRep; PaUtilHostApiRepresentation *hostApi = &hpiHostApi->baseHostApiRep;
PaHostApiInfo *baseApiInfo = &hostApi->info; PaHostApiInfo *baseApiInfo = &hostApi->info;
PaAsiHpiDeviceInfo *hpiDeviceList; PaAsiHpiDeviceInfo *hpiDeviceList;
HW16 adapterList[ HPI_MAX_ADAPTERS ]; int numAdapters;
HW16 numAdapters; hpi_err_t hpiError = 0;
HW16 hpiError = 0;
int i, j, deviceCount = 0, deviceIndex = 0; int i, j, deviceCount = 0, deviceIndex = 0;
assert( hpiHostApi ); assert( hpiHostApi );
assert( hpiHostApi->subSys );
/* Look for adapters (not strictly necessary, as AdapterOpen can do the same, but this */
/* way we have less errors since we do not try to open adapters we know aren't there) */
/* Errors not considered critical here (subsystem may report 0 devices), but report them */ /* Errors not considered critical here (subsystem may report 0 devices), but report them */
/* in debug mode. */ /* in debug mode. */
PA_ASIHPI_UNLESS_( HPI_SubSysFindAdapters( hpiHostApi->subSys, &numAdapters, PA_ASIHPI_UNLESS_( HPI_SubSysGetNumAdapters( NULL, &numAdapters), paNoError );
adapterList, HPI_MAX_ADAPTERS ), paNoError );
/* First open and count the number of devices (= number of streams), to ease memory allocation */ for( i=0; i < numAdapters; ++i )
for( i=0; i < HPI_MAX_ADAPTERS; ++i )
{ {
HW16 inStreams, outStreams; uint16_t inStreams, outStreams;
HW16 version; uint16_t version;
HW32 serial; uint32_t serial;
HW16 type; uint16_t type;
uint32_t idx;
/* If no adapter found at this index, skip it */ hpiError = HPI_SubSysGetAdapter(NULL, i, &idx, &type);
if( adapterList[i] == 0 ) if (hpiError)
continue; continue;
/* Try to open adapter */ /* Try to open adapter */
hpiError = HPI_AdapterOpen( hpiHostApi->subSys, i ); hpiError = HPI_AdapterOpen( NULL, idx );
/* Report error and skip to next device on failure */ /* Report error and skip to next device on failure */
if( hpiError ) if( hpiError )
{ {
PA_ASIHPI_REPORT_ERROR_( hpiError ); PA_ASIHPI_REPORT_ERROR_( hpiError );
continue; continue;
} }
hpiError = HPI_AdapterGetInfo( hpiHostApi->subSys, i, hpiError = HPI_AdapterGetInfo( NULL, idx, &outStreams, &inStreams,
&outStreams, &inStreams, &version, &serial, &type ); &version, &serial, &type );
/* Skip to next device on failure */ /* Skip to next device on failure */
if( hpiError ) if( hpiError )
{ {
@ -597,19 +588,20 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
paInsufficientMemory ); paInsufficientMemory );
/* Now query devices again for information */ /* Now query devices again for information */
for( i=0; i < HPI_MAX_ADAPTERS; ++i ) for( i=0; i < numAdapters; ++i )
{ {
HW16 inStreams, outStreams; uint16_t inStreams, outStreams;
HW16 version; uint16_t version;
HW32 serial; uint32_t serial;
HW16 type; uint16_t type;
uint32_t idx;
/* If no adapter found at this index, skip it */ hpiError = HPI_SubSysGetAdapter( NULL, i, &idx, &type );
if( adapterList[i] == 0 ) if (hpiError)
continue; continue;
/* Assume adapter is still open from previous round */ /* Assume adapter is still open from previous round */
hpiError = HPI_AdapterGetInfo( hpiHostApi->subSys, i, hpiError = HPI_AdapterGetInfo( NULL, idx,
&outStreams, &inStreams, &version, &serial, &type ); &outStreams, &inStreams, &version, &serial, &type );
/* Report error and skip to next device on failure */ /* Report error and skip to next device on failure */
if( hpiError ) if( hpiError )
@ -620,7 +612,7 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
else else
{ {
PA_DEBUG(( "Found HPI Adapter ID=%4X Idx=%d #In=%d #Out=%d S/N=%d HWver=%c%d DSPver=%03d\n", PA_DEBUG(( "Found HPI Adapter ID=%4X Idx=%d #In=%d #Out=%d S/N=%d HWver=%c%d DSPver=%03d\n",
type, i, inStreams, outStreams, serial, type, idx, inStreams, outStreams, serial,
((version>>3)&0xf)+'A', /* Hw version major */ ((version>>3)&0xf)+'A', /* Hw version major */
version&0x7, /* Hw version minor */ version&0x7, /* Hw version minor */
((version>>13)*100)+((version>>7)&0x3f) /* DSP code version */ ((version>>13)*100)+((version>>7)&0x3f) /* DSP code version */
@ -637,8 +629,7 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
memset( hpiDevice, 0, sizeof(PaAsiHpiDeviceInfo) ); memset( hpiDevice, 0, sizeof(PaAsiHpiDeviceInfo) );
/* Set implementation-specific device details */ /* Set implementation-specific device details */
hpiDevice->subSys = hpiHostApi->subSys; hpiDevice->adapterIndex = idx;
hpiDevice->adapterIndex = i;
hpiDevice->adapterType = type; hpiDevice->adapterType = type;
hpiDevice->adapterVersion = version; hpiDevice->adapterVersion = version;
hpiDevice->adapterSerialNumber = serial; hpiDevice->adapterSerialNumber = serial;
@ -680,8 +671,7 @@ static PaError PaAsiHpi_BuildDeviceList( PaAsiHpiHostApiRepresentation *hpiHostA
memset( hpiDevice, 0, sizeof(PaAsiHpiDeviceInfo) ); memset( hpiDevice, 0, sizeof(PaAsiHpiDeviceInfo) );
/* Set implementation-specific device details */ /* Set implementation-specific device details */
hpiDevice->subSys = hpiHostApi->subSys; hpiDevice->adapterIndex = idx;
hpiDevice->adapterIndex = i;
hpiDevice->adapterType = type; hpiDevice->adapterType = type;
hpiDevice->adapterVersion = version; hpiDevice->adapterVersion = version;
hpiDevice->adapterSerialNumber = serial; hpiDevice->adapterSerialNumber = serial;
@ -740,32 +730,31 @@ PaError PaAsiHpi_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiInd
PaAsiHpiHostApiRepresentation *hpiHostApi = NULL; PaAsiHpiHostApiRepresentation *hpiHostApi = NULL;
PaHostApiInfo *baseApiInfo; PaHostApiInfo *baseApiInfo;
/* Try to initialize HPI subsystem */
if (!HPI_SubSysCreate())
{
/* the V19 development docs say that if an implementation
* detects that it cannot be used, it should return a NULL
* interface and paNoError */
PA_DEBUG(( "Could not open HPI interface\n" ));
*hostApi = NULL;
return paNoError;
}
else
{
uint32_t hpiVersion;
PA_ASIHPI_UNLESS_( HPI_SubSysGetVersionEx( NULL, &hpiVersion ), paUnanticipatedHostError );
PA_DEBUG(( "HPI interface v%d.%02d.%02d\n",
hpiVersion >> 16, (hpiVersion >> 8) & 0x0F, (hpiVersion & 0x0F) ));
}
/* Allocate host API structure */ /* Allocate host API structure */
PA_UNLESS_( hpiHostApi = (PaAsiHpiHostApiRepresentation*) PaUtil_AllocateMemory( PA_UNLESS_( hpiHostApi = (PaAsiHpiHostApiRepresentation*) PaUtil_AllocateMemory(
sizeof(PaAsiHpiHostApiRepresentation) ), paInsufficientMemory ); sizeof(PaAsiHpiHostApiRepresentation) ), paInsufficientMemory );
PA_UNLESS_( hpiHostApi->allocations = PaUtil_CreateAllocationGroup(), paInsufficientMemory ); PA_UNLESS_( hpiHostApi->allocations = PaUtil_CreateAllocationGroup(), paInsufficientMemory );
hpiHostApi->hostApiIndex = hostApiIndex; hpiHostApi->hostApiIndex = hostApiIndex;
hpiHostApi->subSys = NULL;
/* Try to initialize HPI subsystem */
if( ( hpiHostApi->subSys = HPI_SubSysCreate() ) == NULL)
{
/* the V19 development docs say that if an implementation
* detects that it cannot be used, it should return a NULL
* interface and paNoError */
PA_DEBUG(( "Could not open HPI interface\n" ));
result = paNoError;
*hostApi = NULL;
goto error;
}
else
{
HW32 hpiVersion;
PA_ASIHPI_UNLESS_( HPI_SubSysGetVersion( hpiHostApi->subSys, &hpiVersion ), paUnanticipatedHostError );
PA_DEBUG(( "HPI interface v%d.%02d\n",
hpiVersion >> 8, 10*((hpiVersion & 0xF0) >> 4) + (hpiVersion & 0x0F) ));
}
*hostApi = &hpiHostApi->baseHostApiRep; *hostApi = &hpiHostApi->baseHostApiRep;
baseApiInfo = &((*hostApi)->info); baseApiInfo = &((*hostApi)->info);
@ -799,8 +788,8 @@ PaError PaAsiHpi_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiInd
return result; return result;
error: error:
/* Clean up memory */ if (hpiHostApi)
Terminate( (PaUtilHostApiRepresentation *)hpiHostApi ); PaUtil_FreeMemory( hpiHostApi );
return result; return result;
} }
@ -820,9 +809,7 @@ static void Terminate( struct PaUtilHostApiRepresentation *hostApi )
if( hpiHostApi ) if( hpiHostApi )
{ {
/* Get rid of HPI-specific structures */ /* Get rid of HPI-specific structures */
if( hpiHostApi->subSys ) uint16_t lastAdapterIndex = HPI_MAX_ADAPTERS;
{
HW16 lastAdapterIndex = HPI_MAX_ADAPTERS;
/* Iterate through device list and close adapters */ /* Iterate through device list and close adapters */
for( i=0; i < hostApi->info.deviceCount; ++i ) for( i=0; i < hostApi->info.deviceCount; ++i )
{ {
@ -831,14 +818,13 @@ static void Terminate( struct PaUtilHostApiRepresentation *hostApi )
if( hpiDevice->adapterIndex != lastAdapterIndex ) if( hpiDevice->adapterIndex != lastAdapterIndex )
{ {
/* Ignore errors (report only during debugging) */ /* Ignore errors (report only during debugging) */
PA_ASIHPI_UNLESS_( HPI_AdapterClose( hpiHostApi->subSys, PA_ASIHPI_UNLESS_( HPI_AdapterClose( NULL,
hpiDevice->adapterIndex ), paNoError ); hpiDevice->adapterIndex ), paNoError );
lastAdapterIndex = hpiDevice->adapterIndex; lastAdapterIndex = hpiDevice->adapterIndex;
} }
} }
/* Finally dismantle HPI subsystem */ /* Finally dismantle HPI subsystem */
HPI_SubSysFree( hpiHostApi->subSys ); HPI_SubSysFree( NULL );
}
if( hpiHostApi->allocations ) if( hpiHostApi->allocations )
{ {
@ -859,7 +845,7 @@ error:
@return HPI sample format @return HPI sample format
*/ */
static HW16 PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat ) static uint16_t PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat )
{ {
/* Ignore interleaving flag */ /* Ignore interleaving flag */
switch( paFormat & ~paNonInterleaved ) switch( paFormat & ~paNonInterleaved )
@ -893,7 +879,7 @@ static HW16 PaAsiHpi_PaToHpiFormat( PaSampleFormat paFormat )
@return PortAudio sample format @return PortAudio sample format
*/ */
static PaSampleFormat PaAsiHpi_HpiToPaFormat( HW16 hpiFormat ) static PaSampleFormat PaAsiHpi_HpiToPaFormat( uint16_t hpiFormat )
{ {
switch( hpiFormat ) switch( hpiFormat )
{ {
@ -938,11 +924,11 @@ static PaSampleFormat PaAsiHpi_HpiToPaFormat( HW16 hpiFormat )
*/ */
static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostApi, static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostApi,
const PaStreamParameters *parameters, double sampleRate, const PaStreamParameters *parameters, double sampleRate,
PaAsiHpiDeviceInfo **hpiDevice, HPI_FORMAT *hpiFormat ) PaAsiHpiDeviceInfo **hpiDevice, struct hpi_format *hpiFormat )
{ {
int maxChannelCount = 0; int maxChannelCount = 0;
PaSampleFormat hostSampleFormat = 0; PaSampleFormat hostSampleFormat = 0;
HW16 hpiError = 0; hpi_err_t hpiError = 0;
/* Unless alternate device specification is supported, reject the use of /* Unless alternate device specification is supported, reject the use of
paUseHostApiSpecificDeviceSpecification */ paUseHostApiSpecificDeviceSpecification */
@ -979,9 +965,9 @@ static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostAp
hostSampleFormat = PaUtil_SelectClosestAvailableFormat(PA_ASIHPI_AVAILABLE_FORMATS_, hostSampleFormat = PaUtil_SelectClosestAvailableFormat(PA_ASIHPI_AVAILABLE_FORMATS_,
parameters->sampleFormat ); parameters->sampleFormat );
/* Setup format + info objects */ /* Setup format + info objects */
hpiError = HPI_FormatCreate( hpiFormat, (HW16)parameters->channelCount, hpiError = HPI_FormatCreate( hpiFormat, (uint16_t)parameters->channelCount,
PaAsiHpi_PaToHpiFormat( hostSampleFormat ), PaAsiHpi_PaToHpiFormat( hostSampleFormat ),
(HW32)sampleRate, 0, 0 ); (uint32_t)sampleRate, 0, 0 );
if( hpiError ) if( hpiError )
{ {
PA_ASIHPI_REPORT_ERROR_( hpiError ); PA_ASIHPI_REPORT_ERROR_( hpiError );
@ -1016,25 +1002,25 @@ static PaError PaAsiHpi_CreateFormat( struct PaUtilHostApiRepresentation *hostAp
@return PortAudio error code (typically indicating a problem with stream format or device) @return PortAudio error code (typically indicating a problem with stream format or device)
*/ */
static PaError PaAsiHpi_OpenInput( struct PaUtilHostApiRepresentation *hostApi, static PaError PaAsiHpi_OpenInput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat, const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
HPI_HISTREAM *hpiStream ) hpi_handle_t *hpiStream )
{ {
PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi; PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi;
PaError result = paNoError; PaError result = paNoError;
HW16 hpiError = 0; hpi_err_t hpiError = 0;
/* Catch misplaced output devices, as they typically have 0 input channels */ /* Catch misplaced output devices, as they typically have 0 input channels */
PA_UNLESS_( !hpiDevice->streamIsOutput, paInvalidChannelCount ); PA_UNLESS_( !hpiDevice->streamIsOutput, paInvalidChannelCount );
/* Try to open input stream */ /* Try to open input stream */
PA_ASIHPI_UNLESS_( HPI_InStreamOpen( hpiHostApi->subSys, hpiDevice->adapterIndex, PA_ASIHPI_UNLESS_( HPI_InStreamOpen( NULL, hpiDevice->adapterIndex,
hpiDevice->streamIndex, hpiStream ), paDeviceUnavailable ); hpiDevice->streamIndex, hpiStream ), paDeviceUnavailable );
/* Set input format (checking it in the process) */ /* Set input format (checking it in the process) */
/* Could also use HPI_InStreamQueryFormat, but this economizes the process */ /* Could also use HPI_InStreamQueryFormat, but this economizes the process */
hpiError = HPI_InStreamSetFormat( hpiHostApi->subSys, *hpiStream, (HPI_FORMAT*)hpiFormat ); hpiError = HPI_InStreamSetFormat( NULL, *hpiStream, (struct hpi_format*)hpiFormat );
if( hpiError ) if( hpiError )
{ {
PA_ASIHPI_REPORT_ERROR_( hpiError ); PA_ASIHPI_REPORT_ERROR_( hpiError );
PA_ASIHPI_UNLESS_( HPI_InStreamClose( hpiHostApi->subSys, *hpiStream ), paNoError ); PA_ASIHPI_UNLESS_( HPI_InStreamClose( NULL, *hpiStream ), paNoError );
switch( hpiError ) switch( hpiError )
{ {
case HPI_ERROR_INVALID_FORMAT: case HPI_ERROR_INVALID_FORMAT:
@ -1071,25 +1057,25 @@ error:
@return PortAudio error code (typically indicating a problem with stream format or device) @return PortAudio error code (typically indicating a problem with stream format or device)
*/ */
static PaError PaAsiHpi_OpenOutput( struct PaUtilHostApiRepresentation *hostApi, static PaError PaAsiHpi_OpenOutput( struct PaUtilHostApiRepresentation *hostApi,
const PaAsiHpiDeviceInfo *hpiDevice, const HPI_FORMAT *hpiFormat, const PaAsiHpiDeviceInfo *hpiDevice, const struct hpi_format *hpiFormat,
HPI_HOSTREAM *hpiStream ) hpi_handle_t *hpiStream )
{ {
PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi; PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi;
PaError result = paNoError; PaError result = paNoError;
HW16 hpiError = 0; hpi_err_t hpiError = 0;
/* Catch misplaced input devices, as they typically have 0 output channels */ /* Catch misplaced input devices, as they typically have 0 output channels */
PA_UNLESS_( hpiDevice->streamIsOutput, paInvalidChannelCount ); PA_UNLESS_( hpiDevice->streamIsOutput, paInvalidChannelCount );
/* Try to open output stream */ /* Try to open output stream */
PA_ASIHPI_UNLESS_( HPI_OutStreamOpen( hpiHostApi->subSys, hpiDevice->adapterIndex, PA_ASIHPI_UNLESS_( HPI_OutStreamOpen( NULL, hpiDevice->adapterIndex,
hpiDevice->streamIndex, hpiStream ), paDeviceUnavailable ); hpiDevice->streamIndex, hpiStream ), paDeviceUnavailable );
/* Check output format (format is set on first write to output stream) */ /* Check output format (format is set on first write to output stream) */
hpiError = HPI_OutStreamQueryFormat( hpiHostApi->subSys, *hpiStream, (HPI_FORMAT*)hpiFormat ); hpiError = HPI_OutStreamQueryFormat( NULL, *hpiStream, (struct hpi_format*)hpiFormat );
if( hpiError ) if( hpiError )
{ {
PA_ASIHPI_REPORT_ERROR_( hpiError ); PA_ASIHPI_REPORT_ERROR_( hpiError );
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( hpiHostApi->subSys, *hpiStream ), paNoError ); PA_ASIHPI_UNLESS_( HPI_OutStreamClose( NULL, *hpiStream ), paNoError );
switch( hpiError ) switch( hpiError )
{ {
case HPI_ERROR_INVALID_FORMAT: case HPI_ERROR_INVALID_FORMAT:
@ -1135,12 +1121,12 @@ static PaError IsFormatSupported( struct PaUtilHostApiRepresentation *hostApi,
PaError result = paFormatIsSupported; PaError result = paFormatIsSupported;
PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi; PaAsiHpiHostApiRepresentation *hpiHostApi = (PaAsiHpiHostApiRepresentation*)hostApi;
PaAsiHpiDeviceInfo *hpiDevice = NULL; PaAsiHpiDeviceInfo *hpiDevice = NULL;
HPI_FORMAT hpiFormat; struct hpi_format hpiFormat;
/* Input stream */ /* Input stream */
if( inputParameters ) if( inputParameters )
{ {
HPI_HISTREAM hpiStream; hpi_handle_t hpiStream;
PA_DEBUG(( "%s: Checking input params: dev=%d, sr=%d, chans=%d, fmt=%d\n", PA_DEBUG(( "%s: Checking input params: dev=%d, sr=%d, chans=%d, fmt=%d\n",
__FUNCTION__, inputParameters->device, (int)sampleRate, __FUNCTION__, inputParameters->device, (int)sampleRate,
inputParameters->channelCount, inputParameters->sampleFormat )); inputParameters->channelCount, inputParameters->sampleFormat ));
@ -1150,13 +1136,13 @@ static PaError IsFormatSupported( struct PaUtilHostApiRepresentation *hostApi,
/* Open stream to further check format */ /* Open stream to further check format */
PA_ENSURE_( PaAsiHpi_OpenInput( hostApi, hpiDevice, &hpiFormat, &hpiStream ) ); PA_ENSURE_( PaAsiHpi_OpenInput( hostApi, hpiDevice, &hpiFormat, &hpiStream ) );
/* Close stream again */ /* Close stream again */
PA_ASIHPI_UNLESS_( HPI_InStreamClose( hpiHostApi->subSys, hpiStream ), paNoError ); PA_ASIHPI_UNLESS_( HPI_InStreamClose( NULL, hpiStream ), paNoError );
} }
/* Output stream */ /* Output stream */
if( outputParameters ) if( outputParameters )
{ {
HPI_HOSTREAM hpiStream; hpi_handle_t hpiStream;
PA_DEBUG(( "%s: Checking output params: dev=%d, sr=%d, chans=%d, fmt=%d\n", PA_DEBUG(( "%s: Checking output params: dev=%d, sr=%d, chans=%d, fmt=%d\n",
__FUNCTION__, outputParameters->device, (int)sampleRate, __FUNCTION__, outputParameters->device, (int)sampleRate,
outputParameters->channelCount, outputParameters->sampleFormat )); outputParameters->channelCount, outputParameters->sampleFormat ));
@ -1166,7 +1152,7 @@ static PaError IsFormatSupported( struct PaUtilHostApiRepresentation *hostApi,
/* Open stream to further check format */ /* Open stream to further check format */
PA_ENSURE_( PaAsiHpi_OpenOutput( hostApi, hpiDevice, &hpiFormat, &hpiStream ) ); PA_ENSURE_( PaAsiHpi_OpenOutput( hostApi, hpiDevice, &hpiFormat, &hpiStream ) );
/* Close stream again */ /* Close stream again */
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( hpiHostApi->subSys, hpiStream ), paNoError ); PA_ASIHPI_UNLESS_( HPI_OutStreamClose( NULL, hpiStream ), paNoError );
} }
error: error:
@ -1188,9 +1174,9 @@ error:
static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStreamInfo *info ) static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAsiHpiStreamInfo *info )
{ {
PaError result = paDeviceUnavailable; PaError result = paDeviceUnavailable;
HW16 state; uint16_t state;
HW32 bufferSize, dataSize, frameCounter, auxDataSize, threshold; uint32_t bufferSize, dataSize, frameCounter, auxDataSize, threshold;
HW32 hwBufferSize, hwDataSize; uint32_t hwBufferSize, hwDataSize;
assert( streamComp ); assert( streamComp );
assert( info ); assert( info );
@ -1212,14 +1198,14 @@ static PaError PaAsiHpi_GetStreamInfo( PaAsiHpiStreamComponent *streamComp, PaAs
/* Obtain detailed stream info (either input or output) */ /* Obtain detailed stream info (either input or output) */
if( streamComp->hpiDevice->streamIsOutput ) if( streamComp->hpiDevice->streamIsOutput )
{ {
PA_ASIHPI_UNLESS_( HPI_OutStreamGetInfoEx( streamComp->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_OutStreamGetInfoEx( NULL,
streamComp->hpiStream, streamComp->hpiStream,
&state, &bufferSize, &dataSize, &frameCounter, &state, &bufferSize, &dataSize, &frameCounter,
&auxDataSize ), paUnanticipatedHostError ); &auxDataSize ), paUnanticipatedHostError );
} }
else else
{ {
PA_ASIHPI_UNLESS_( HPI_InStreamGetInfoEx( streamComp->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_InStreamGetInfoEx( NULL,
streamComp->hpiStream, streamComp->hpiStream,
&state, &bufferSize, &dataSize, &frameCounter, &state, &bufferSize, &dataSize, &frameCounter,
&auxDataSize ), paUnanticipatedHostError ); &auxDataSize ), paUnanticipatedHostError );
@ -1479,7 +1465,7 @@ static void PaAsiHpi_StreamDump( PaAsiHpiStream *stream )
@return PortAudio error code (possibly paBufferTooBig or paInsufficientMemory) @return PortAudio error code (possibly paBufferTooBig or paInsufficientMemory)
*/ */
static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32 pollingInterval, static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, uint32_t pollingInterval,
unsigned long framesPerPaHostBuffer, PaTime suggestedLatency ) unsigned long framesPerPaHostBuffer, PaTime suggestedLatency )
{ {
PaError result = paNoError; PaError result = paNoError;
@ -1499,8 +1485,8 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
/* Check if BBM (background bus mastering) is to be enabled */ /* Check if BBM (background bus mastering) is to be enabled */
if( PA_ASIHPI_USE_BBM_ ) if( PA_ASIHPI_USE_BBM_ )
{ {
HW32 bbmBufferSize = 0, preLatencyBufferSize = 0; uint32_t bbmBufferSize = 0, preLatencyBufferSize = 0;
HW16 hpiError = 0; hpi_err_t hpiError = 0;
PaTime pollingOverhead; PaTime pollingOverhead;
/* Check overhead of Pa_Sleep() call (minimum sleep duration in ms -> OS dependent) */ /* Check overhead of Pa_Sleep() call (minimum sleep duration in ms -> OS dependent) */
@ -1510,7 +1496,7 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
PA_DEBUG(( "polling overhead = %f ms (length of 0-second sleep)\n", pollingOverhead )); PA_DEBUG(( "polling overhead = %f ms (length of 0-second sleep)\n", pollingOverhead ));
/* Obtain minimum recommended size for host buffer (in bytes) */ /* Obtain minimum recommended size for host buffer (in bytes) */
PA_ASIHPI_UNLESS_( HPI_StreamEstimateBufferSize( &streamComp->hpiFormat, PA_ASIHPI_UNLESS_( HPI_StreamEstimateBufferSize( &streamComp->hpiFormat,
pollingInterval + (HW32)ceil( pollingOverhead ), pollingInterval + (uint32_t)ceil( pollingOverhead ),
&bbmBufferSize ), paUnanticipatedHostError ); &bbmBufferSize ), paUnanticipatedHostError );
/* BBM places more stringent requirements on buffer size (see description */ /* BBM places more stringent requirements on buffer size (see description */
/* of HPI_StreamEstimateBufferSize in HPI API document) */ /* of HPI_StreamEstimateBufferSize in HPI API document) */
@ -1528,27 +1514,26 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
{ {
/* Save old buffer size, to be retried if new size proves too big */ /* Save old buffer size, to be retried if new size proves too big */
preLatencyBufferSize = bbmBufferSize; preLatencyBufferSize = bbmBufferSize;
bbmBufferSize = (HW32)ceil( suggestedLatency * streamComp->bytesPerFrame bbmBufferSize = (uint32_t)ceil( suggestedLatency * streamComp->bytesPerFrame
* streamComp->hpiFormat.dwSampleRate ); * streamComp->hpiFormat.dwSampleRate );
} }
} }
/* Choose closest memory block boundary (HPI API document states that /* Choose closest memory block boundary (HPI API document states that
"a buffer size of Nx4096 - 20 makes the best use of memory" "a buffer size of Nx4096 - 20 makes the best use of memory"
(under the entry for HPI_StreamEstimateBufferSize)) */ (under the entry for HPI_StreamEstimateBufferSize)) */
bbmBufferSize = ((HW32)ceil((bbmBufferSize + 20)/4096.0))*4096 - 20; bbmBufferSize = ((uint32_t)ceil((bbmBufferSize + 20)/4096.0))*4096 - 20;
streamComp->hostBufferSize = bbmBufferSize; streamComp->hostBufferSize = bbmBufferSize;
/* Allocate BBM host buffer (this enables bus mastering transfers in background) */ /* Allocate BBM host buffer (this enables bus mastering transfers in background) */
if( streamComp->hpiDevice->streamIsOutput ) if( streamComp->hpiDevice->streamIsOutput )
hpiError = HPI_OutStreamHostBufferAllocate( streamComp->hpiDevice->subSys, hpiError = HPI_OutStreamHostBufferAllocate( NULL,
streamComp->hpiStream, streamComp->hpiStream,
bbmBufferSize ); bbmBufferSize );
else else
hpiError = HPI_InStreamHostBufferAllocate( streamComp->hpiDevice->subSys, hpiError = HPI_InStreamHostBufferAllocate( NULL,
streamComp->hpiStream, streamComp->hpiStream,
bbmBufferSize ); bbmBufferSize );
if( hpiError ) if( hpiError )
{ {
PA_ASIHPI_REPORT_ERROR_( hpiError );
/* Indicate that BBM is disabled */ /* Indicate that BBM is disabled */
streamComp->hostBufferSize = 0; streamComp->hostBufferSize = 0;
/* Retry with smaller buffer size (transfers will still work, but not via BBM) */ /* Retry with smaller buffer size (transfers will still work, but not via BBM) */
@ -1561,11 +1546,11 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
preLatencyBufferSize, bbmBufferSize )); preLatencyBufferSize, bbmBufferSize ));
bbmBufferSize = preLatencyBufferSize; bbmBufferSize = preLatencyBufferSize;
if( streamComp->hpiDevice->streamIsOutput ) if( streamComp->hpiDevice->streamIsOutput )
hpiError = HPI_OutStreamHostBufferAllocate( streamComp->hpiDevice->subSys, hpiError = HPI_OutStreamHostBufferAllocate( NULL,
streamComp->hpiStream, streamComp->hpiStream,
bbmBufferSize ); bbmBufferSize );
else else
hpiError = HPI_InStreamHostBufferAllocate( streamComp->hpiDevice->subSys, hpiError = HPI_InStreamHostBufferAllocate( NULL,
streamComp->hpiStream, streamComp->hpiStream,
bbmBufferSize ); bbmBufferSize );
/* Another round of error checking */ /* Another round of error checking */
@ -1598,8 +1583,10 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
} }
/* If BBM not supported, foreground transfers will be used, but not a show-stopper */ /* If BBM not supported, foreground transfers will be used, but not a show-stopper */
/* Anything else is an error */ /* Anything else is an error */
else if( hpiError != HPI_ERROR_INVALID_OPERATION ) else if (( hpiError != HPI_ERROR_INVALID_OPERATION ) &&
( hpiError != HPI_ERROR_INVALID_FUNC ))
{ {
PA_ASIHPI_REPORT_ERROR_( hpiError );
result = paUnanticipatedHostError; result = paUnanticipatedHostError;
goto error; goto error;
} }
@ -1623,7 +1610,7 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
PaTime latency = suggestedLatency > 0.0 ? suggestedLatency : PaTime latency = suggestedLatency > 0.0 ? suggestedLatency :
streamComp->hpiDevice->baseDeviceInfo.defaultHighOutputLatency; streamComp->hpiDevice->baseDeviceInfo.defaultHighOutputLatency;
streamComp->outputBufferCap = streamComp->outputBufferCap =
(HW32)ceil( latency * streamComp->bytesPerFrame * streamComp->hpiFormat.dwSampleRate ); (uint32_t)ceil( latency * streamComp->bytesPerFrame * streamComp->hpiFormat.dwSampleRate );
/* The cap should not be too small, to prevent underflow */ /* The cap should not be too small, to prevent underflow */
if( streamComp->outputBufferCap < 4*paHostBufferSize ) if( streamComp->outputBufferCap < 4*paHostBufferSize )
streamComp->outputBufferCap = 4*paHostBufferSize; streamComp->outputBufferCap = 4*paHostBufferSize;
@ -1635,7 +1622,7 @@ static PaError PaAsiHpi_SetupBuffers( PaAsiHpiStreamComponent *streamComp, HW32
/* Temp buffer size should be multiple of PA host buffer size (or 1x, if using fixed blocks) */ /* Temp buffer size should be multiple of PA host buffer size (or 1x, if using fixed blocks) */
streamComp->tempBufferSize = paHostBufferSize; streamComp->tempBufferSize = paHostBufferSize;
/* Allocate temp buffer */ /* Allocate temp buffer */
PA_UNLESS_( streamComp->tempBuffer = (HW8 *)PaUtil_AllocateMemory( streamComp->tempBufferSize ), PA_UNLESS_( streamComp->tempBuffer = (uint8_t *)PaUtil_AllocateMemory( streamComp->tempBufferSize ),
paInsufficientMemory ); paInsufficientMemory );
error: error:
return result; return result;
@ -1725,7 +1712,7 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
By keeping the frames a multiple of 4, this is ensured even for 8-bit mono sound. */ By keeping the frames a multiple of 4, this is ensured even for 8-bit mono sound. */
framesPerHostBuffer = (framesPerHostBuffer / 4) * 4; framesPerHostBuffer = (framesPerHostBuffer / 4) * 4;
/* Polling is based on time length (in milliseconds) of user-requested block size */ /* Polling is based on time length (in milliseconds) of user-requested block size */
stream->pollingInterval = (HW32)ceil( 1000.0*framesPerHostBuffer/sampleRate ); stream->pollingInterval = (uint32_t)ceil( 1000.0*framesPerHostBuffer/sampleRate );
assert( framesPerHostBuffer > 0 ); assert( framesPerHostBuffer > 0 );
/* Open underlying streams, check formats and allocate buffers */ /* Open underlying streams, check formats and allocate buffers */
@ -1890,7 +1877,7 @@ static PaError CloseStream( PaStream *s )
/* Close HPI stream (freeing BBM host buffer in the process, if used) */ /* Close HPI stream (freeing BBM host buffer in the process, if used) */
if( stream->input->hpiStream ) if( stream->input->hpiStream )
{ {
PA_ASIHPI_UNLESS_( HPI_InStreamClose( stream->input->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_InStreamClose( NULL,
stream->input->hpiStream ), paUnanticipatedHostError ); stream->input->hpiStream ), paUnanticipatedHostError );
} }
/* Free temp buffer and stream component */ /* Free temp buffer and stream component */
@ -1902,7 +1889,7 @@ static PaError CloseStream( PaStream *s )
/* Close HPI stream (freeing BBM host buffer in the process, if used) */ /* Close HPI stream (freeing BBM host buffer in the process, if used) */
if( stream->output->hpiStream ) if( stream->output->hpiStream )
{ {
PA_ASIHPI_UNLESS_( HPI_OutStreamClose( stream->output->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_OutStreamClose( NULL,
stream->output->hpiStream ), paUnanticipatedHostError ); stream->output->hpiStream ), paUnanticipatedHostError );
} }
/* Free temp buffer and stream component */ /* Free temp buffer and stream component */
@ -1933,9 +1920,6 @@ static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream )
PaAsiHpiStreamComponent *out; PaAsiHpiStreamComponent *out;
PaUtilZeroer *zeroer; PaUtilZeroer *zeroer;
PaSampleFormat outputFormat; PaSampleFormat outputFormat;
#if (HPI_VER < HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
HPI_DATA data;
#endif
assert( stream ); assert( stream );
out = stream->output; out = stream->output;
/* Only continue if stream has output channels */ /* Only continue if stream has output channels */
@ -1944,28 +1928,19 @@ static PaError PaAsiHpi_PrimeOutputWithSilence( PaAsiHpiStream *stream )
assert( out->tempBuffer ); assert( out->tempBuffer );
/* Clear all existing data in hardware playback buffer */ /* Clear all existing data in hardware playback buffer */
PA_ASIHPI_UNLESS_( HPI_OutStreamReset( out->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_OutStreamReset( NULL,
out->hpiStream ), paUnanticipatedHostError ); out->hpiStream ), paUnanticipatedHostError );
/* Fill temp buffer with silence */ /* Fill temp buffer with silence */
outputFormat = PaAsiHpi_HpiToPaFormat( out->hpiFormat.wFormat ); outputFormat = PaAsiHpi_HpiToPaFormat( out->hpiFormat.wFormat );
zeroer = PaUtil_SelectZeroer( outputFormat ); zeroer = PaUtil_SelectZeroer( outputFormat );
zeroer(out->tempBuffer, 1, out->tempBufferSize / Pa_GetSampleSize(outputFormat) ); zeroer(out->tempBuffer, 1, out->tempBufferSize / Pa_GetSampleSize(outputFormat) );
/* Write temp buffer to hardware fifo twice, to get started */ /* Write temp buffer to hardware fifo twice, to get started */
#if (HPI_VER >= HPI_VERSION_CONSTRUCTOR( 3, 5, 5 )) PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( NULL, out->hpiStream,
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( out->hpiDevice->subSys, out->hpiStream,
out->tempBuffer, out->tempBufferSize, &out->hpiFormat), out->tempBuffer, out->tempBufferSize, &out->hpiFormat),
paUnanticipatedHostError ); paUnanticipatedHostError );
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( out->hpiDevice->subSys, out->hpiStream, PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( NULL, out->hpiStream,
out->tempBuffer, out->tempBufferSize, &out->hpiFormat), out->tempBuffer, out->tempBufferSize, &out->hpiFormat),
paUnanticipatedHostError ); paUnanticipatedHostError );
#else
PA_ASIHPI_UNLESS_( HPI_DataCreate( &data, &out->hpiFormat, out->tempBuffer, out->tempBufferSize ),
paUnanticipatedHostError );
PA_ASIHPI_UNLESS_( HPI_OutStreamWrite( out->hpiDevice->subSys,
out->hpiStream, &data ), paUnanticipatedHostError );
PA_ASIHPI_UNLESS_( HPI_OutStreamWrite( out->hpiDevice->subSys,
out->hpiStream, &data ), paUnanticipatedHostError );
#endif
error: error:
return result; return result;
} }
@ -1989,7 +1964,7 @@ static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed )
if( stream->input ) if( stream->input )
{ {
PA_ASIHPI_UNLESS_( HPI_InStreamStart( stream->input->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_InStreamStart( NULL,
stream->input->hpiStream ), paUnanticipatedHostError ); stream->input->hpiStream ), paUnanticipatedHostError );
} }
if( stream->output ) if( stream->output )
@ -1999,7 +1974,7 @@ static PaError PaAsiHpi_StartStream( PaAsiHpiStream *stream, int outputPrimed )
/* Buffer isn't primed, so load stream with silence */ /* Buffer isn't primed, so load stream with silence */
PA_ENSURE_( PaAsiHpi_PrimeOutputWithSilence( stream ) ); PA_ENSURE_( PaAsiHpi_PrimeOutputWithSilence( stream ) );
} }
PA_ASIHPI_UNLESS_( HPI_OutStreamStart( stream->output->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_OutStreamStart( NULL,
stream->output->hpiStream ), paUnanticipatedHostError ); stream->output->hpiStream ), paUnanticipatedHostError );
} }
stream->state = paAsiHpiActiveState; stream->state = paAsiHpiActiveState;
@ -2071,7 +2046,7 @@ static PaError PaAsiHpi_StopStream( PaAsiHpiStream *stream, int abort )
/* Input channels */ /* Input channels */
if( stream->input ) if( stream->input )
{ {
PA_ASIHPI_UNLESS_( HPI_InStreamReset( stream->input->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_InStreamReset( NULL,
stream->input->hpiStream ), paUnanticipatedHostError ); stream->input->hpiStream ), paUnanticipatedHostError );
} }
/* Output channels */ /* Output channels */
@ -2097,7 +2072,7 @@ static PaError PaAsiHpi_StopStream( PaAsiHpiStream *stream, int abort )
Pa_Sleep( (long)ceil( timeLeft ) ); Pa_Sleep( (long)ceil( timeLeft ) );
} }
} }
PA_ASIHPI_UNLESS_( HPI_OutStreamReset( stream->output->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_OutStreamReset( NULL,
stream->output->hpiStream ), paUnanticipatedHostError ); stream->output->hpiStream ), paUnanticipatedHostError );
} }
@ -2315,7 +2290,7 @@ static PaError PaAsiHpi_WaitForFrames( PaAsiHpiStream *stream, unsigned long *fr
PaError result = paNoError; PaError result = paNoError;
double sampleRate; double sampleRate;
unsigned long framesTarget; unsigned long framesTarget;
HW32 outputData = 0, outputSpace = 0, inputData = 0, framesLeft = 0; uint32_t outputData = 0, outputSpace = 0, inputData = 0, framesLeft = 0;
assert( stream ); assert( stream );
assert( stream->input || stream->output ); assert( stream->input || stream->output );
@ -2485,10 +2460,7 @@ static PaError PaAsiHpi_BeginProcessing( PaAsiHpiStream *stream, unsigned long *
{ {
PaAsiHpiStreamInfo info; PaAsiHpiStreamInfo info;
#if (HPI_VER < HPI_VERSION_CONSTRUCTOR( 3, 5, 5 )) uint32_t framesToGet = *numFrames;
HPI_DATA data;
#endif
HW32 framesToGet = *numFrames;
/* Check for overflows and underflows yet again */ /* Check for overflows and underflows yet again */
PA_ENSURE_( PaAsiHpi_GetStreamInfo( stream->input, &info ) ); PA_ENSURE_( PaAsiHpi_GetStreamInfo( stream->input, &info ) );
@ -2513,22 +2485,12 @@ static PaError PaAsiHpi_BeginProcessing( PaAsiHpiStream *stream, unsigned long *
stream->input->tempBufferSize / Pa_GetSampleSize(inputFormat) ); stream->input->tempBufferSize / Pa_GetSampleSize(inputFormat) );
} }
#if (HPI_VER >= HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
/* Read block of data into temp buffer */ /* Read block of data into temp buffer */
PA_ASIHPI_UNLESS_( HPI_InStreamReadBuf( stream->input->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_InStreamReadBuf( NULL,
stream->input->hpiStream, stream->input->hpiStream,
stream->input->tempBuffer, stream->input->tempBuffer,
framesToGet * stream->input->bytesPerFrame), framesToGet * stream->input->bytesPerFrame),
paUnanticipatedHostError ); paUnanticipatedHostError );
#else
/* Setup HPI data structure around temp buffer */
HPI_DataCreate( &data, &stream->input->hpiFormat, stream->input->tempBuffer,
framesToGet * stream->input->bytesPerFrame );
/* Read block of data into temp buffer */
PA_ASIHPI_UNLESS_( HPI_InStreamRead( stream->input->hpiDevice->subSys,
stream->input->hpiStream, &data ),
paUnanticipatedHostError );
#endif
/* Register temp buffer with buffer processor (always FULL buffer) */ /* Register temp buffer with buffer processor (always FULL buffer) */
PaUtil_SetInputFrameCount( &stream->bufferProcessor, *numFrames ); PaUtil_SetInputFrameCount( &stream->bufferProcessor, *numFrames );
/* HPI interface only allows interleaved channels */ /* HPI interface only allows interleaved channels */
@ -2572,9 +2534,6 @@ static PaError PaAsiHpi_EndProcessing( PaAsiHpiStream *stream, unsigned long num
if( stream->output ) if( stream->output )
{ {
PaAsiHpiStreamInfo info; PaAsiHpiStreamInfo info;
#if (HPI_VER < HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
HPI_DATA data;
#endif
/* Check for underflows after the (potentially time-consuming) callback */ /* Check for underflows after the (potentially time-consuming) callback */
PA_ENSURE_( PaAsiHpi_GetStreamInfo( stream->output, &info ) ); PA_ENSURE_( PaAsiHpi_GetStreamInfo( stream->output, &info ) );
if( info.underflow ) if( info.underflow )
@ -2582,23 +2541,13 @@ static PaError PaAsiHpi_EndProcessing( PaAsiHpiStream *stream, unsigned long num
*cbFlags |= paOutputUnderflow; *cbFlags |= paOutputUnderflow;
} }
#if (HPI_VER >= HPI_VERSION_CONSTRUCTOR( 3, 5, 5 ))
/* Write temp buffer to HPI stream */ /* Write temp buffer to HPI stream */
PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( stream->output->hpiDevice->subSys, PA_ASIHPI_UNLESS_( HPI_OutStreamWriteBuf( NULL,
stream->output->hpiStream, stream->output->hpiStream,
stream->output->tempBuffer, stream->output->tempBuffer,
numFrames * stream->output->bytesPerFrame, numFrames * stream->output->bytesPerFrame,
&stream->output->hpiFormat), &stream->output->hpiFormat),
paUnanticipatedHostError ); paUnanticipatedHostError );
#else
/* Setup HPI data structure around temp buffer */
HPI_DataCreate( &data, &stream->output->hpiFormat, stream->output->tempBuffer,
numFrames * stream->output->bytesPerFrame );
/* Write temp buffer to HPI stream */
PA_ASIHPI_UNLESS_( HPI_OutStreamWrite( stream->output->hpiDevice->subSys,
stream->output->hpiStream, &data ),
paUnanticipatedHostError );
#endif
} }
error: error:

View File

@ -4,64 +4,56 @@ This document contains information to help you compile PortAudio with
ASIO support. If you find any omissions or errors in this document ASIO support. If you find any omissions or errors in this document
please notify us on the PortAudio mailing list. please notify us on the PortAudio mailing list.
NOTE: The Macintosh sections of this document are provided for historical
reference. They refer to pre-OS X Macintosh. PortAudio no longer
supports pre-OS X Macintosh. Steinberg does not support ASIO on Mac OS X.
Building PortAudio with ASIO support Building PortAudio with ASIO support
------------------------------------ ------------------------------------
To build PortAudio with ASIO support you need to compile and link with To build PortAudio with ASIO support you need to compile and link with
pa_asio.c, and files from the ASIO SDK (see below), along with the common pa_asio.c, and files from the ASIO SDK (see below), along with the common
files from src/common/ and platform specific files from src/os/win/ (for Win32) PortAudio files from src/common/ and platform specific files from
or src/os/mac/ (for Macintosh). src/os/win/ (for Win32).
If you are compiling with a non-Microsoft compiler on Windows, also If you are compiling with a non-Microsoft compiler on Windows, also
compile and link with iasiothiscallresolver.cpp (see below for compile and link with iasiothiscallresolver.cpp (see below for
an explanation). an explanation).
For some platforms (MingW, possibly Mac), you may simply For some platforms (MingW, Cygwin/MingW), you may simply
be able to type: be able to type:
./configure --with-host_os=mingw --with-winapi=asio [--with-asiodir=/usr/local/asiosdk2] ./configure --with-host_os=mingw --with-winapi=asio [--with-asiodir=/usr/local/asiosdk2]
make make
./configure --with-host_os=darwin --with-winapi=asio [--with-asiodir=/usr/local/asiosdk2] and life will be good. Make sure you update the above with the correct local
make path to the ASIO SDK.
For Microsoft Visual C++ there is an build tutorial here:
http://www.portaudio.com/trac/wiki/TutorialDir/Compile/WindowsASIOMSVC
and life will be good.
Obtaining the ASIO SDK Obtaining the ASIO SDK
---------------------- ----------------------
In order to build PortAudio with ASIO support, you need to download In order to build PortAudio with ASIO support, you need to download
the ASIO SDK (version 2.0) from Steinberg. Steinberg makes the ASIO the ASIO SDK (version 2.0 or later) from Steinberg. Steinberg makes the ASIO
SDK available to anyone free of charge, however they do not permit its SDK available to anyone free of charge, however they do not permit its
source code to be distributed. source code to be distributed.
NOTE: In some cases the ASIO SDK may require patching, see below NOTE: In some cases the ASIO SDK may require patching, see below
for further details. for further details.
http://www.steinberg.de/329+M52087573ab0.html http://www.steinberg.net/en/company/developer.html
If the above link is broken search Google for: If the above link is broken search Google for:
"download steinberg ASIO SDK" "download steinberg ASIO SDK"
Building the ASIO SDK on Macintosh
----------------------------------
To build the ASIO SDK on Macintosh you need to compile and link with the
following files from the ASIO SDK:
host/asiodrivers.cpp
host/mac/asioshlib.cpp
host/mac/codefragements.cpp
You may also need to adjust your include paths to support inclusion of
header files from the above directories.
Building the ASIO SDK on Windows Building the ASIO SDK on Windows
-------------------------------- --------------------------------
@ -103,12 +95,27 @@ If you use configure and make (see above), this should be handled
automatically for you. automatically for you.
For further information about the IASIO thiscall problem see this page: For further information about the IASIO thiscall problem see this page:
http://www.audiomulch.com/~rossb/code/calliasio http://www.rossbencina.com/code/iasio-thiscall-resolver
Macintosh ASIO SDK Bug Patch Building the ASIO SDK on (Pre-OS X) Macintosh
---------------------------- ---------------------------------------------
To build the ASIO SDK on Macintosh you need to compile and link with the
following files from the ASIO SDK:
host/asiodrivers.cpp
host/mac/asioshlib.cpp
host/mac/codefragements.cpp
You may also need to adjust your include paths to support inclusion of
header files from the above directories.
(Pre-OS X) Macintosh ASIO SDK Bug Patch
---------------------------------------
There is a bug in the ASIO SDK that causes the Macintosh version to There is a bug in the ASIO SDK that causes the Macintosh version to
often fail during initialization. Below is a patch that you can apply. often fail during initialization. Below is a patch that you can apply.
@ -137,4 +144,4 @@ bool CodeFragments::getFrontProcessDirectory(void *specs)
} }
--- ###

View File

@ -1,5 +1,5 @@
/* /*
* $Id: pa_asio.cpp 1681 2011-05-10 15:58:15Z rossb $ * $Id: pa_asio.cpp 1778 2011-11-10 13:59:53Z rossb $
* Portable Audio I/O Library for ASIO Drivers * Portable Audio I/O Library for ASIO Drivers
* *
* Author: Stephane Letz * Author: Stephane Letz
@ -1214,7 +1214,7 @@ PaError PaAsio_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex
if( foundDefaultSampleRate ){ if( foundDefaultSampleRate ){
/* calculate default latency values from bufferPreferredSize /* calculate default latency values from bufferPreferredSize
for default low latency, and bufferPreferredSize * 3 for default low latency, and bufferMaxSize
for default high latency. for default high latency.
use the default sample rate to convert from samples to use the default sample rate to convert from samples to
seconds. Without knowing what sample rate the user will seconds. Without knowing what sample rate the user will
@ -1227,17 +1227,11 @@ PaError PaAsio_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex
deviceInfo->defaultLowInputLatency = defaultLowLatency; deviceInfo->defaultLowInputLatency = defaultLowLatency;
deviceInfo->defaultLowOutputLatency = defaultLowLatency; deviceInfo->defaultLowOutputLatency = defaultLowLatency;
long defaultHighLatencyBufferSize =
paAsioDriverInfo.bufferPreferredSize * 3;
if( defaultHighLatencyBufferSize > paAsioDriverInfo.bufferMaxSize )
defaultHighLatencyBufferSize = paAsioDriverInfo.bufferMaxSize;
double defaultHighLatency = double defaultHighLatency =
defaultHighLatencyBufferSize / deviceInfo->defaultSampleRate; paAsioDriverInfo.bufferMaxSize / deviceInfo->defaultSampleRate;
if( defaultHighLatency < defaultLowLatency ) if( defaultHighLatency < defaultLowLatency )
defaultHighLatency = defaultLowLatency; /* just incase the driver returns something strange */ defaultHighLatency = defaultLowLatency; /* just in case the driver returns something strange */
deviceInfo->defaultHighInputLatency = defaultHighLatency; deviceInfo->defaultHighInputLatency = defaultHighLatency;
deviceInfo->defaultHighOutputLatency = defaultHighLatency; deviceInfo->defaultHighOutputLatency = defaultHighLatency;
@ -1629,33 +1623,60 @@ static void ZeroOutputBuffers( PaAsioStream *stream, long index )
} }
static unsigned long SelectHostBufferSize( unsigned long suggestedLatencyFrames, unsigned long userFramesPerBuffer, /* return the next power of two >= x.
PaAsioDriverInfo *driverInfo ) Returns the input parameter if it is already a power of two.
http://stackoverflow.com/questions/364985/algorithm-for-finding-the-smallest-power-of-two-thats-greater-or-equal-to-a-giv
*/
static unsigned long NextPowerOfTwo( unsigned long x )
{ {
--x;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
/* If you needed to deal with numbers > 2^32 the following would be needed.
For latencies, we don't deal with values this large.
x |= x >> 16;
*/
return x + 1;
}
static unsigned long SelectHostBufferSizeForUnspecifiedUserFramesPerBuffer(
unsigned long targetBufferingLatencyFrames, PaAsioDriverInfo *driverInfo )
{
/* Choose a host buffer size based only on targetBufferingLatencyFrames and the
device's supported buffer sizes. Always returns a valid value.
*/
unsigned long result; unsigned long result;
if( suggestedLatencyFrames == 0 ) if( targetBufferingLatencyFrames <= (unsigned long)driverInfo->bufferMinSize )
{
result = driverInfo->bufferPreferredSize;
}
else{
if( suggestedLatencyFrames <= (unsigned long)driverInfo->bufferMinSize )
{ {
result = driverInfo->bufferMinSize; result = driverInfo->bufferMinSize;
} }
else if( suggestedLatencyFrames >= (unsigned long)driverInfo->bufferMaxSize ) else if( targetBufferingLatencyFrames >= (unsigned long)driverInfo->bufferMaxSize )
{ {
result = driverInfo->bufferMaxSize; result = driverInfo->bufferMaxSize;
} }
else else
{ {
if( driverInfo->bufferGranularity == -1 ) if( driverInfo->bufferGranularity == 0 ) /* single fixed host buffer size */
{ {
/* power-of-two */ /* The documentation states that bufferGranularity should be zero
result = 2; when bufferMinSize, bufferMaxSize and bufferPreferredSize are the
same. We assume that is the case.
*/
while( result < suggestedLatencyFrames ) result = driverInfo->bufferPreferredSize;
result *= 2; }
else if( driverInfo->bufferGranularity == -1 ) /* power-of-two */
{
/* We assume bufferMinSize and bufferMaxSize are powers of two. */
result = NextPowerOfTwo( targetBufferingLatencyFrames );
if( result < (unsigned long)driverInfo->bufferMinSize ) if( result < (unsigned long)driverInfo->bufferMinSize )
result = driverInfo->bufferMinSize; result = driverInfo->bufferMinSize;
@ -1663,36 +1684,150 @@ static unsigned long SelectHostBufferSize( unsigned long suggestedLatencyFrames,
if( result > (unsigned long)driverInfo->bufferMaxSize ) if( result > (unsigned long)driverInfo->bufferMaxSize )
result = driverInfo->bufferMaxSize; result = driverInfo->bufferMaxSize;
} }
else if( driverInfo->bufferGranularity == 0 ) else /* modulo bufferGranularity */
{ {
/* the documentation states that bufferGranularity should be /* round up to the next multiple of granularity */
zero when bufferMinSize, bufferMaxSize and unsigned long n = (targetBufferingLatencyFrames + driverInfo->bufferGranularity - 1)
bufferPreferredSize are the same. We assume that is the case. / driverInfo->bufferGranularity;
*/
result = driverInfo->bufferPreferredSize; result = n * driverInfo->bufferGranularity;
}
else
{
/* modulo granularity */
unsigned long remainder = if( result < (unsigned long)driverInfo->bufferMinSize )
suggestedLatencyFrames % driverInfo->bufferGranularity; result = driverInfo->bufferMinSize;
if( remainder == 0 )
{
result = suggestedLatencyFrames;
}
else
{
result = suggestedLatencyFrames
+ (driverInfo->bufferGranularity - remainder);
if( result > (unsigned long)driverInfo->bufferMaxSize ) if( result > (unsigned long)driverInfo->bufferMaxSize )
result = driverInfo->bufferMaxSize; result = driverInfo->bufferMaxSize;
} }
} }
return result;
}
static unsigned long SelectHostBufferSizeForSpecifiedUserFramesPerBuffer(
unsigned long targetBufferingLatencyFrames, unsigned long userFramesPerBuffer,
PaAsioDriverInfo *driverInfo )
{
/* Select a host buffer size conforming to targetBufferingLatencyFrames
and the device's supported buffer sizes.
The return value will always be a multiple of userFramesPerBuffer.
If a valid buffer size can not be found the function returns 0.
The current implementation uses a simple iterative search for clarity.
Feel free to suggest a closed form solution.
*/
unsigned long result = 0;
assert( userFramesPerBuffer != 0 );
if( driverInfo->bufferGranularity == 0 ) /* single fixed host buffer size */
{
/* The documentation states that bufferGranularity should be zero
when bufferMinSize, bufferMaxSize and bufferPreferredSize are the
same. We assume that is the case.
*/
if( (driverInfo->bufferPreferredSize % userFramesPerBuffer) == 0 )
result = driverInfo->bufferPreferredSize;
} }
else if( driverInfo->bufferGranularity == -1 ) /* power-of-two */
{
/* We assume bufferMinSize and bufferMaxSize are powers of two. */
/* Search all powers of two in the range [bufferMinSize,bufferMaxSize]
for multiples of userFramesPerBuffer. We prefer the first multiple
that is equal or greater than targetBufferingLatencyFrames, or
failing that, the largest multiple less than
targetBufferingLatencyFrames.
*/
unsigned long x = (unsigned long)driverInfo->bufferMinSize;
do {
if( (x % userFramesPerBuffer) == 0 )
{
/* any power-of-two multiple of userFramesPerBuffer is acceptable */
result = x;
if( result >= targetBufferingLatencyFrames )
break; /* stop. a value >= to targetBufferingLatencyFrames is ideal. */
}
x *= 2;
} while( x <= (unsigned long)driverInfo->bufferMaxSize );
}
else /* modulo granularity */
{
/* We assume bufferMinSize is a multiple of bufferGranularity. */
/* Search all multiples of bufferGranularity in the range
[bufferMinSize,bufferMaxSize] for multiples of userFramesPerBuffer.
We prefer the first multiple that is equal or greater than
targetBufferingLatencyFrames, or failing that, the largest multiple
less than targetBufferingLatencyFrames.
*/
unsigned long x = (unsigned long)driverInfo->bufferMinSize;
do {
if( (x % userFramesPerBuffer) == 0 )
{
/* any power-of-two multiple of userFramesPerBuffer is acceptable */
result = x;
if( result >= targetBufferingLatencyFrames )
break; /* stop. a value >= to targetBufferingLatencyFrames is ideal. */
}
x += driverInfo->bufferGranularity;
} while( x <= (unsigned long)driverInfo->bufferMaxSize );
}
return result;
}
static unsigned long SelectHostBufferSize(
unsigned long targetBufferingLatencyFrames,
unsigned long userFramesPerBuffer, PaAsioDriverInfo *driverInfo )
{
unsigned long result = 0;
/* We select a host buffer size based on the following requirements
(in priority order):
1. The host buffer size must be permissible according to the ASIO
driverInfo buffer size constraints (min, max, granularity or
powers-of-two).
2. If the user specifies a non-zero framesPerBuffer parameter
(userFramesPerBuffer here) the host buffer should be a multiple of
this (subject to the constraints in (1) above).
[NOTE: Where no permissible host buffer size is a multiple of
userFramesPerBuffer, we choose a value as if userFramesPerBuffer were
zero (i.e. we ignore it). This strategy is open for review ~ perhaps
there are still "more optimal" buffer sizes related to
userFramesPerBuffer that we could use.]
3. The host buffer size should be greater than or equal to
targetBufferingLatencyFrames, subject to (1) and (2) above. Where it
is not possible to select a host buffer size equal or greater than
targetBufferingLatencyFrames, the highest buffer size conforming to
(1) and (2) should be chosen.
*/
if( userFramesPerBuffer != 0 )
{
/* userFramesPerBuffer is specified, try to find a buffer size that's
a multiple of it */
result = SelectHostBufferSizeForSpecifiedUserFramesPerBuffer(
targetBufferingLatencyFrames, userFramesPerBuffer, driverInfo );
}
if( result == 0 )
{
/* either userFramesPerBuffer was not specified, or we couldn't find a
host buffer size that is a multiple of it. Select a host buffer size
according to targetBufferingLatencyFrames and the ASIO driverInfo
buffer size constraints.
*/
result = SelectHostBufferSizeForUnspecifiedUserFramesPerBuffer(
targetBufferingLatencyFrames, driverInfo );
} }
return result; return result;
@ -2101,10 +2236,29 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
} }
else /* Using callback interface... */ else /* Using callback interface... */
{ {
framesPerHostBuffer = SelectHostBufferSize( /* Select the host buffer size based on user framesPerBuffer and the
maximum of suggestedInputLatencyFrames and
suggestedOutputLatencyFrames.
We should subtract any fixed known driver latency from
suggestedLatencyFrames before computing the host buffer size.
However, the ASIO API doesn't provide a method for determining fixed
latencies independent of the host buffer size. ASIOGetLatencies()
only returns latencies after the buffer size has been configured, so
we can't reliably use it to determine fixed latencies here.
We could set the preferred buffer size and then subtract it from
the values returned from ASIOGetLatencies, but this would not be 100%
reliable, so we don't do it.
*/
unsigned long targetBufferingLatencyFrames =
(( suggestedInputLatencyFrames > suggestedOutputLatencyFrames ) (( suggestedInputLatencyFrames > suggestedOutputLatencyFrames )
? suggestedInputLatencyFrames : suggestedOutputLatencyFrames), framesPerBuffer, ? suggestedInputLatencyFrames
driverInfo ); : suggestedOutputLatencyFrames);
framesPerHostBuffer = SelectHostBufferSize( targetBufferingLatencyFrames,
framesPerBuffer, driverInfo );
} }
@ -2254,7 +2408,8 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
stream->outputBufferConverter = 0; stream->outputBufferConverter = 0;
} }
/* Values returned by ASIOGetLatencies() include the latency introduced by
the ASIO double buffer. */
ASIOGetLatencies( &stream->asioInputLatencyFrames, &stream->asioOutputLatencyFrames ); ASIOGetLatencies( &stream->asioInputLatencyFrames, &stream->asioOutputLatencyFrames );

View File

@ -990,18 +990,19 @@ static void UpdateTimeStampOffsets( PaMacCoreStream *stream )
} }
/* ================================================================================= */ /* ================================================================================= */
/* Query sample rate property. */
static OSStatus UpdateSampleRateFromDeviceProperty( PaMacCoreStream *stream, AudioDeviceID deviceID, Boolean isInput ) /* can be used to update from nominal or actual sample rate */
static OSStatus UpdateSampleRateFromDeviceProperty( PaMacCoreStream *stream, AudioDeviceID deviceID, Boolean isInput, AudioDevicePropertyID sampleRatePropertyID )
{ {
PaMacCoreDeviceProperties * deviceProperties = isInput ? &stream->inputProperties : &stream->outputProperties; PaMacCoreDeviceProperties * deviceProperties = isInput ? &stream->inputProperties : &stream->outputProperties;
/* FIXME: not sure if this should be the sample rate of the output device or the output unit */
Float64 actualSampleRate = deviceProperties->sampleRate; Float64 sampleRate = 0.0;
UInt32 propSize = sizeof(Float64); UInt32 propSize = sizeof(Float64);
OSStatus osErr = AudioDeviceGetProperty( deviceID, 0, isInput, kAudioDevicePropertyActualSampleRate, &propSize, &actualSampleRate); OSStatus osErr = AudioDeviceGetProperty( deviceID, 0, isInput, sampleRatePropertyID, &propSize, &sampleRate);
if( (osErr == noErr) && (actualSampleRate > 1000.0) ) // avoid divide by zero if there's an error if( (osErr == noErr) && (sampleRate > 1000.0) ) /* avoid divide by zero if there's an error */
{ {
deviceProperties->sampleRate = actualSampleRate; deviceProperties->sampleRate = sampleRate;
deviceProperties->samplePeriod = 1.0 / actualSampleRate; deviceProperties->samplePeriod = 1.0 / sampleRate;
} }
return osErr; return osErr;
} }
@ -1013,7 +1014,7 @@ static OSStatus AudioDevicePropertyActualSampleRateListenerProc( AudioDeviceID i
// Make sure the callback is operating on a stream that is still valid! // Make sure the callback is operating on a stream that is still valid!
assert( stream->streamRepresentation.magic == PA_STREAM_MAGIC ); assert( stream->streamRepresentation.magic == PA_STREAM_MAGIC );
OSStatus osErr = UpdateSampleRateFromDeviceProperty( stream, inDevice, isInput ); OSStatus osErr = UpdateSampleRateFromDeviceProperty( stream, inDevice, isInput, kAudioDevicePropertyActualSampleRate );
if( osErr == noErr ) if( osErr == noErr )
{ {
UpdateTimeStampOffsets( stream ); UpdateTimeStampOffsets( stream );
@ -1077,9 +1078,6 @@ static OSStatus SetupDevicePropertyListeners( PaMacCoreStream *stream, AudioDevi
OSStatus osErr = noErr; OSStatus osErr = noErr;
PaMacCoreDeviceProperties *deviceProperties = isInput ? &stream->inputProperties : &stream->outputProperties; PaMacCoreDeviceProperties *deviceProperties = isInput ? &stream->inputProperties : &stream->outputProperties;
// Start with the current values for the device properties.
UpdateSampleRateFromDeviceProperty( stream, deviceID, isInput );
if( (osErr = QueryUInt32DeviceProperty( deviceID, isInput, if( (osErr = QueryUInt32DeviceProperty( deviceID, isInput,
kAudioDevicePropertyLatency, &deviceProperties->deviceLatency )) != noErr ) return osErr; kAudioDevicePropertyLatency, &deviceProperties->deviceLatency )) != noErr ) return osErr;
if( (osErr = QueryUInt32DeviceProperty( deviceID, isInput, if( (osErr = QueryUInt32DeviceProperty( deviceID, isInput,
@ -1579,7 +1577,7 @@ static UInt32 CalculateOptimalBufferSize( PaMacAUHAL *auhalHostApi,
double sampleRate, double sampleRate,
UInt32 requestedFramesPerBuffer ) UInt32 requestedFramesPerBuffer )
{ {
UInt32 suggested = 0; UInt32 resultBufferSizeFrames = 0;
// Use maximum of suggested input and output latencies. // Use maximum of suggested input and output latencies.
if( inputParameters ) if( inputParameters )
{ {
@ -1588,28 +1586,25 @@ static UInt32 CalculateOptimalBufferSize( PaMacAUHAL *auhalHostApi,
SInt32 variableLatencyFrames = suggestedLatencyFrames - fixedInputLatency; SInt32 variableLatencyFrames = suggestedLatencyFrames - fixedInputLatency;
// Prevent negative latency. // Prevent negative latency.
variableLatencyFrames = MAX( variableLatencyFrames, 0 ); variableLatencyFrames = MAX( variableLatencyFrames, 0 );
suggested = MAX( suggested, (UInt32) variableLatencyFrames ); resultBufferSizeFrames = MAX( resultBufferSizeFrames, (UInt32) variableLatencyFrames );
} }
if( outputParameters ) if( outputParameters )
{ {
UInt32 suggestedLatencyFrames = outputParameters->suggestedLatency * sampleRate; UInt32 suggestedLatencyFrames = outputParameters->suggestedLatency * sampleRate;
SInt32 variableLatencyFrames = suggestedLatencyFrames - fixedOutputLatency; SInt32 variableLatencyFrames = suggestedLatencyFrames - fixedOutputLatency;
variableLatencyFrames = MAX( variableLatencyFrames, 0 ); variableLatencyFrames = MAX( variableLatencyFrames, 0 );
suggested = MAX( suggested, (UInt32) variableLatencyFrames ); resultBufferSizeFrames = MAX( resultBufferSizeFrames, (UInt32) variableLatencyFrames );
} }
VDBUG( ("Block Size unspecified. Based on Latency, the user wants a Block Size near: %ld.\n",
suggested ) );
if( requestedFramesPerBuffer != paFramesPerBufferUnspecified ) if( requestedFramesPerBuffer != paFramesPerBufferUnspecified )
{ {
if( suggested > (requestedFramesPerBuffer + 1) ) // make host buffer the next highest integer multiple of user frames per buffer
{ UInt32 n = (resultBufferSizeFrames + requestedFramesPerBuffer - 1) / requestedFramesPerBuffer;
// If the user asks for higher latency than the requested buffer size would provide resultBufferSizeFrames = n * requestedFramesPerBuffer;
// then put multiple user buffers in one host buffer.
UInt32 userBuffersPerHostBuffer = (suggested + (requestedFramesPerBuffer - 1)) / requestedFramesPerBuffer; }else{
suggested = userBuffersPerHostBuffer * requestedFramesPerBuffer; VDBUG( ("Block Size unspecified. Based on Latency, the user wants a Block Size near: %ld.\n",
} resultBufferSizeFrames ) );
} }
// Clip to the capabilities of the device. // Clip to the capabilities of the device.
@ -1617,16 +1612,16 @@ static UInt32 CalculateOptimalBufferSize( PaMacAUHAL *auhalHostApi,
{ {
ClipToDeviceBufferSize( auhalHostApi->devIds[inputParameters->device], ClipToDeviceBufferSize( auhalHostApi->devIds[inputParameters->device],
true, // In the old code isInput was false! true, // In the old code isInput was false!
suggested, &suggested ); resultBufferSizeFrames, &resultBufferSizeFrames );
} }
if( outputParameters ) if( outputParameters )
{ {
ClipToDeviceBufferSize( auhalHostApi->devIds[outputParameters->device], ClipToDeviceBufferSize( auhalHostApi->devIds[outputParameters->device],
false, suggested, &suggested ); false, resultBufferSizeFrames, &resultBufferSizeFrames );
} }
VDBUG(("After querying hardware, setting block size to %ld.\n", suggested)); VDBUG(("After querying hardware, setting block size to %ld.\n", resultBufferSizeFrames));
return suggested; return resultBufferSizeFrames;
} }
/* =================================================================================================== */ /* =================================================================================================== */
@ -1997,52 +1992,47 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
stream->streamRepresentation.streamInfo.sampleRate = sampleRate; stream->streamRepresentation.streamInfo.sampleRate = sampleRate;
stream->sampleRate = sampleRate; stream->sampleRate = sampleRate;
stream->outDeviceSampleRate = 0;
if( stream->outputUnit ) {
Float64 rate;
UInt32 size = sizeof( rate );
result = ERR( AudioDeviceGetProperty( stream->outputDevice,
0,
FALSE,
kAudioDevicePropertyNominalSampleRate,
&size, &rate ) );
if( result )
goto error;
stream->outDeviceSampleRate = rate;
}
stream->inDeviceSampleRate = 0;
if( stream->inputUnit ) {
Float64 rate;
UInt32 size = sizeof( rate );
result = ERR( AudioDeviceGetProperty( stream->inputDevice,
0,
TRUE,
kAudioDevicePropertyNominalSampleRate,
&size, &rate ) );
if( result )
goto error;
stream->inDeviceSampleRate = rate;
}
stream->userInChan = inputChannelCount; stream->userInChan = inputChannelCount;
stream->userOutChan = outputChannelCount; stream->userOutChan = outputChannelCount;
// Setup property listeners for timestamp and latency calculations. // Setup property listeners for timestamp and latency calculations.
pthread_mutex_init( &stream->timingInformationMutex, NULL ); pthread_mutex_init( &stream->timingInformationMutex, NULL );
stream->timingInformationMutexIsInitialized = 1; stream->timingInformationMutexIsInitialized = 1;
InitializeDeviceProperties( &stream->inputProperties ); InitializeDeviceProperties( &stream->inputProperties ); // zeros the struct. doesn't actually init it to useful values
InitializeDeviceProperties( &stream->outputProperties ); InitializeDeviceProperties( &stream->outputProperties ); // zeros the struct. doesn't actually init it to useful values
if( stream->outputUnit ) if( stream->outputUnit )
{ {
Boolean isInput = FALSE; Boolean isInput = FALSE;
// Start with the current values for the device properties.
// Init with nominal sample rate. Use actual sample rate where available
result = ERR( UpdateSampleRateFromDeviceProperty(
stream, stream->outputDevice, isInput, kAudioDevicePropertyNominalSampleRate ) );
if( result )
goto error; /* fail if we can't even get a nominal device sample rate */
UpdateSampleRateFromDeviceProperty( stream, stream->outputDevice, isInput, kAudioDevicePropertyActualSampleRate );
SetupDevicePropertyListeners( stream, stream->outputDevice, isInput ); SetupDevicePropertyListeners( stream, stream->outputDevice, isInput );
} }
if( stream->inputUnit ) if( stream->inputUnit )
{ {
Boolean isInput = TRUE; Boolean isInput = TRUE;
// as above
result = ERR( UpdateSampleRateFromDeviceProperty(
stream, stream->inputDevice, isInput, kAudioDevicePropertyNominalSampleRate ) );
if( result )
goto error;
UpdateSampleRateFromDeviceProperty( stream, stream->inputDevice, isInput, kAudioDevicePropertyActualSampleRate );
SetupDevicePropertyListeners( stream, stream->inputDevice, isInput ); SetupDevicePropertyListeners( stream, stream->inputDevice, isInput );
} }
UpdateTimeStampOffsets( stream ); UpdateTimeStampOffsets( stream );
// Setup copies to be used by audio callback. // Setup timestamp copies to be used by audio callback.
stream->timestampOffsetCombined_ioProcCopy = stream->timestampOffsetCombined; stream->timestampOffsetCombined_ioProcCopy = stream->timestampOffsetCombined;
stream->timestampOffsetInputDevice_ioProcCopy = stream->timestampOffsetInputDevice; stream->timestampOffsetInputDevice_ioProcCopy = stream->timestampOffsetInputDevice;
stream->timestampOffsetOutputDevice_ioProcCopy = stream->timestampOffsetOutputDevice; stream->timestampOffsetOutputDevice_ioProcCopy = stream->timestampOffsetOutputDevice;

View File

@ -120,7 +120,11 @@ typedef struct PaMacCoreDeviceProperties
UInt32 bufferFrameSize; UInt32 bufferFrameSize;
// UInt32 streamLatency; // Seems to be the same as deviceLatency!? // UInt32 streamLatency; // Seems to be the same as deviceLatency!?
UInt32 deviceLatency; UInt32 deviceLatency;
/* Current device sample rate. May change! */ /* Current device sample rate. May change!
These are initialized to the nominal device sample rate,
and updated with the actual sample rate, when/where available.
Note that these are the *device* sample rates, prior to any required
SR conversion. */
Float64 sampleRate; Float64 sampleRate;
Float64 samplePeriod; // reciprocal Float64 samplePeriod; // reciprocal
} }
@ -166,10 +170,6 @@ typedef struct PaMacCoreStream
ACTIVE = 3 /* The stream is active and running. */ ACTIVE = 3 /* The stream is active and running. */
} state; } state;
double sampleRate; double sampleRate;
//these may be different from the stream sample rate due to SR conversion:
double outDeviceSampleRate;
double inDeviceSampleRate;
PaMacCoreDeviceProperties inputProperties; PaMacCoreDeviceProperties inputProperties;
PaMacCoreDeviceProperties outputProperties; PaMacCoreDeviceProperties outputProperties;

View File

@ -1,5 +1,5 @@
/* /*
* $Id: pa_win_ds.c 1744 2011-08-25 15:59:32Z rossb $ * $Id: pa_win_ds.c 1794 2011-11-24 18:11:33Z rossb $
* Portable Audio I/O Library DirectSound implementation * Portable Audio I/O Library DirectSound implementation
* *
* Authors: Phil Burk, Robert Marsanyi & Ross Bencina * Authors: Phil Burk, Robert Marsanyi & Ross Bencina
@ -152,6 +152,13 @@ PA_THREAD_FUNC ProcessingThreadProc( void *pArg );
#define PA_DS_WIN_WDM_DEFAULT_LATENCY_ (.120) #define PA_DS_WIN_WDM_DEFAULT_LATENCY_ (.120)
/* we allow the polling period to range between 1 and 100ms.
prior to August 2011 we limited the minimum polling period to 10ms.
*/
#define PA_DS_MINIMUM_POLLING_PERIOD_SECONDS (0.001) /* 1ms */
#define PA_DS_MAXIMUM_POLLING_PERIOD_SECONDS (0.100) /* 100ms */
#define PA_DS_POLLING_JITTER_SECONDS (0.001) /* 1ms */
#define SECONDS_PER_MSEC (0.001) #define SECONDS_PER_MSEC (0.001)
#define MSECS_PER_SECOND (1000) #define MSECS_PER_SECOND (1000)
@ -1336,6 +1343,13 @@ static PaError ValidateWinDirectSoundSpecificStreamInfo(
{ {
return paIncompatibleHostApiSpecificStreamInfo; return paIncompatibleHostApiSpecificStreamInfo;
} }
if( streamInfo->flags & paWinDirectSoundUseLowLevelLatencyParameters )
{
if( streamInfo->framesPerBuffer <= 0 )
return paIncompatibleHostApiSpecificStreamInfo;
}
} }
return paNoError; return paNoError;
@ -1541,7 +1555,13 @@ static HRESULT InitFullDuplexInputOutputBuffers( PaWinDsStream *stream,
#endif /* PAWIN_USE_DIRECTSOUNDFULLDUPLEXCREATE */ #endif /* PAWIN_USE_DIRECTSOUNDFULLDUPLEXCREATE */
static HRESULT InitInputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device, PaSampleFormat sampleFormat, unsigned long nFrameRate, WORD nChannels, int bytesPerBuffer, PaWinWaveFormatChannelMask channelMask ) static HRESULT InitInputBuffer( PaWinDsStream *stream,
PaWinDsDeviceInfo *device,
PaSampleFormat sampleFormat,
unsigned long nFrameRate,
WORD nChannels,
int bytesPerBuffer,
PaWinWaveFormatChannelMask channelMask )
{ {
DSCBUFFERDESC captureDesc; DSCBUFFERDESC captureDesc;
PaWinWaveFormat waveFormat; PaWinWaveFormat waveFormat;
@ -1582,7 +1602,10 @@ static HRESULT InitInputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device
} }
static HRESULT InitOutputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device, PaSampleFormat sampleFormat, unsigned long nFrameRate, WORD nChannels, int bytesPerBuffer, PaWinWaveFormatChannelMask channelMask ) static HRESULT InitOutputBuffer( PaWinDsStream *stream, PaWinDsDeviceInfo *device,
PaSampleFormat sampleFormat, unsigned long nFrameRate,
WORD nChannels, int bytesPerBuffer,
PaWinWaveFormatChannelMask channelMask )
{ {
HRESULT result; HRESULT result;
HWND hWnd; HWND hWnd;
@ -1680,18 +1703,15 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
unsigned long suggestedOutputLatencyFrames, unsigned long suggestedOutputLatencyFrames,
double sampleRate, unsigned long userFramesPerBuffer ) double sampleRate, unsigned long userFramesPerBuffer )
{ {
/* we allow the polling period to range between 1 and 100ms. unsigned long minimumPollingPeriodFrames = sampleRate * PA_DS_MINIMUM_POLLING_PERIOD_SECONDS;
prior to August 2011 we limited the minimum polling period to 10ms. unsigned long maximumPollingPeriodFrames = sampleRate * PA_DS_MAXIMUM_POLLING_PERIOD_SECONDS;
*/ unsigned long pollingJitterFrames = sampleRate * PA_DS_POLLING_JITTER_SECONDS;
unsigned long minimumPollingPeriodFrames = sampleRate / 1000; /* 1ms */
unsigned long maximumPollingPeriodFrames = sampleRate / 10; /* 100ms */
unsigned long pollingJitterFrames = sampleRate / 1000; /* 1ms */
if( userFramesPerBuffer == paFramesPerBufferUnspecified ) if( userFramesPerBuffer == paFramesPerBufferUnspecified )
{ {
unsigned long suggestedLatencyFrames = max( suggestedInputLatencyFrames, suggestedOutputLatencyFrames ); unsigned long targetBufferingLatencyFrames = max( suggestedInputLatencyFrames, suggestedOutputLatencyFrames );
*pollingPeriodFrames = suggestedLatencyFrames / 4; *pollingPeriodFrames = targetBufferingLatencyFrames / 4;
if( *pollingPeriodFrames < minimumPollingPeriodFrames ) if( *pollingPeriodFrames < minimumPollingPeriodFrames )
{ {
*pollingPeriodFrames = minimumPollingPeriodFrames; *pollingPeriodFrames = minimumPollingPeriodFrames;
@ -1702,14 +1722,14 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
} }
*hostBufferSizeFrames = *pollingPeriodFrames *hostBufferSizeFrames = *pollingPeriodFrames
+ max( *pollingPeriodFrames + pollingJitterFrames, suggestedLatencyFrames); + max( *pollingPeriodFrames + pollingJitterFrames, targetBufferingLatencyFrames);
} }
else else
{ {
unsigned long suggestedLatencyFrames = suggestedInputLatencyFrames; unsigned long targetBufferingLatencyFrames = suggestedInputLatencyFrames;
if( isFullDuplex ) if( isFullDuplex )
{ {
/* in full duplex streams we know that the buffer adapter adds userFramesPerBuffer /* In full duplex streams we know that the buffer adapter adds userFramesPerBuffer
extra fixed latency. so we subtract it here as a fixed latency before computing extra fixed latency. so we subtract it here as a fixed latency before computing
the buffer size. being careful not to produce an unrepresentable negative result. the buffer size. being careful not to produce an unrepresentable negative result.
@ -1723,21 +1743,21 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
suggestedOutputLatencyFrames - userFramesPerBuffer; suggestedOutputLatencyFrames - userFramesPerBuffer;
/* maximum of input and adjusted output suggested latency */ /* maximum of input and adjusted output suggested latency */
if( adjustedSuggestedOutputLatencyFrames > suggestedInputLatencyFrames ) if( adjustedSuggestedOutputLatencyFrames > targetBufferingLatencyFrames )
suggestedLatencyFrames = adjustedSuggestedOutputLatencyFrames; targetBufferingLatencyFrames = adjustedSuggestedOutputLatencyFrames;
} }
} }
else else
{ {
/* maximum of input and output suggested latency */ /* maximum of input and output suggested latency */
if( suggestedOutputLatencyFrames > suggestedInputLatencyFrames ) if( suggestedOutputLatencyFrames > suggestedInputLatencyFrames )
suggestedLatencyFrames = suggestedOutputLatencyFrames; targetBufferingLatencyFrames = suggestedOutputLatencyFrames;
} }
*hostBufferSizeFrames = userFramesPerBuffer *hostBufferSizeFrames = userFramesPerBuffer
+ max( userFramesPerBuffer + pollingJitterFrames, suggestedLatencyFrames); + max( userFramesPerBuffer + pollingJitterFrames, targetBufferingLatencyFrames);
*pollingPeriodFrames = max( max(1, userFramesPerBuffer / 4), suggestedLatencyFrames / 16 ); *pollingPeriodFrames = max( max(1, userFramesPerBuffer / 4), targetBufferingLatencyFrames / 16 );
if( *pollingPeriodFrames > maximumPollingPeriodFrames ) if( *pollingPeriodFrames > maximumPollingPeriodFrames )
{ {
@ -1747,6 +1767,23 @@ static void CalculateBufferSettings( unsigned long *hostBufferSizeFrames,
} }
static void CalculatePollingPeriodFrames( unsigned long hostBufferSizeFrames,
unsigned long *pollingPeriodFrames,
double sampleRate, unsigned long userFramesPerBuffer )
{
unsigned long minimumPollingPeriodFrames = sampleRate * PA_DS_MINIMUM_POLLING_PERIOD_SECONDS;
unsigned long maximumPollingPeriodFrames = sampleRate * PA_DS_MAXIMUM_POLLING_PERIOD_SECONDS;
unsigned long pollingJitterFrames = sampleRate * PA_DS_POLLING_JITTER_SECONDS;
*pollingPeriodFrames = max( max(1, userFramesPerBuffer / 4), hostBufferSizeFrames / 16 );
if( *pollingPeriodFrames > maximumPollingPeriodFrames )
{
*pollingPeriodFrames = maximumPollingPeriodFrames;
}
}
static void SetStreamInfoLatencies( PaWinDsStream *stream, static void SetStreamInfoLatencies( PaWinDsStream *stream,
unsigned long userFramesPerBuffer, unsigned long userFramesPerBuffer,
unsigned long pollingPeriodFrames, unsigned long pollingPeriodFrames,
@ -1808,6 +1845,8 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
int inputChannelCount, outputChannelCount; int inputChannelCount, outputChannelCount;
PaSampleFormat inputSampleFormat, outputSampleFormat; PaSampleFormat inputSampleFormat, outputSampleFormat;
PaSampleFormat hostInputSampleFormat, hostOutputSampleFormat; PaSampleFormat hostInputSampleFormat, hostOutputSampleFormat;
int userRequestedHostInputBufferSizeFrames = 0;
int userRequestedHostOutputBufferSizeFrames = 0;
unsigned long suggestedInputLatencyFrames, suggestedOutputLatencyFrames; unsigned long suggestedInputLatencyFrames, suggestedOutputLatencyFrames;
PaWinDirectSoundStreamInfo *inputStreamInfo, *outputStreamInfo; PaWinDirectSoundStreamInfo *inputStreamInfo, *outputStreamInfo;
PaWinWaveFormatChannelMask inputChannelMask, outputChannelMask; PaWinWaveFormatChannelMask inputChannelMask, outputChannelMask;
@ -1840,6 +1879,9 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
result = ValidateWinDirectSoundSpecificStreamInfo( inputParameters, inputStreamInfo ); result = ValidateWinDirectSoundSpecificStreamInfo( inputParameters, inputStreamInfo );
if( result != paNoError ) return result; if( result != paNoError ) return result;
if( inputStreamInfo && inputStreamInfo->flags & paWinDirectSoundUseLowLevelLatencyParameters )
userRequestedHostInputBufferSizeFrames = inputStreamInfo->framesPerBuffer;
if( inputStreamInfo && inputStreamInfo->flags & paWinDirectSoundUseChannelMask ) if( inputStreamInfo && inputStreamInfo->flags & paWinDirectSoundUseChannelMask )
inputChannelMask = inputStreamInfo->channelMask; inputChannelMask = inputStreamInfo->channelMask;
else else
@ -1877,6 +1919,9 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
result = ValidateWinDirectSoundSpecificStreamInfo( outputParameters, outputStreamInfo ); result = ValidateWinDirectSoundSpecificStreamInfo( outputParameters, outputStreamInfo );
if( result != paNoError ) return result; if( result != paNoError ) return result;
if( outputStreamInfo && outputStreamInfo->flags & paWinDirectSoundUseLowLevelLatencyParameters )
userRequestedHostOutputBufferSizeFrames = outputStreamInfo->framesPerBuffer;
if( outputStreamInfo && outputStreamInfo->flags & paWinDirectSoundUseChannelMask ) if( outputStreamInfo && outputStreamInfo->flags & paWinDirectSoundUseChannelMask )
outputChannelMask = outputStreamInfo->channelMask; outputChannelMask = outputStreamInfo->channelMask;
else else
@ -1889,6 +1934,16 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
suggestedOutputLatencyFrames = 0; suggestedOutputLatencyFrames = 0;
} }
/*
If low level host buffer size is specified for both input and output
the current code requires the sizes to match.
*/
if( (userRequestedHostInputBufferSizeFrames > 0 && userRequestedHostOutputBufferSizeFrames > 0)
&& userRequestedHostInputBufferSizeFrames != userRequestedHostOutputBufferSizeFrames )
return paIncompatibleHostApiSpecificStreamInfo;
/* /*
IMPLEMENT ME: IMPLEMENT ME:
@ -2027,14 +2082,34 @@ static PaError OpenStream( struct PaUtilHostApiRepresentation *hostApi,
/* set up i/o parameters */ /* set up i/o parameters */
if( userRequestedHostInputBufferSizeFrames > 0 || userRequestedHostOutputBufferSizeFrames > 0 )
{
/* use low level parameters */
/* since we use the same host buffer size for input and output
we choose the highest user specified value.
*/
stream->hostBufferSizeFrames = max( userRequestedHostInputBufferSizeFrames, userRequestedHostOutputBufferSizeFrames );
CalculatePollingPeriodFrames(
stream->hostBufferSizeFrames, &pollingPeriodFrames,
sampleRate, framesPerBuffer );
}
else
{
CalculateBufferSettings( &stream->hostBufferSizeFrames, &pollingPeriodFrames, CalculateBufferSettings( &stream->hostBufferSizeFrames, &pollingPeriodFrames,
/* isFullDuplex = */ (inputParameters && outputParameters), /* isFullDuplex = */ (inputParameters && outputParameters),
suggestedInputLatencyFrames, suggestedInputLatencyFrames,
suggestedOutputLatencyFrames, suggestedOutputLatencyFrames,
sampleRate, framesPerBuffer ); sampleRate, framesPerBuffer );
}
stream->pollingPeriodSeconds = pollingPeriodFrames / sampleRate; stream->pollingPeriodSeconds = pollingPeriodFrames / sampleRate;
DBUG(("DirectSound host buffer size frames: %d, polling period seconds: %f, @ sr: %f\n",
stream->hostBufferSizeFrames, stream->pollingPeriodSeconds, sampleRate ));
/* ------------------ OUTPUT */ /* ------------------ OUTPUT */
if( outputParameters ) if( outputParameters )
{ {
@ -2348,7 +2423,8 @@ static int TimeSlice( PaWinDsStream *stream )
long bytesProcessed; long bytesProcessed;
HRESULT hresult; HRESULT hresult;
double outputLatency = 0; double outputLatency = 0;
PaStreamCallbackTimeInfo timeInfo = {0,0,0}; /** @todo implement inputBufferAdcTime */ double inputLatency = 0;
PaStreamCallbackTimeInfo timeInfo = {0,0,0};
/* Input */ /* Input */
LPBYTE lpInBuf1 = NULL; LPBYTE lpInBuf1 = NULL;
@ -2377,11 +2453,12 @@ static int TimeSlice( PaWinDsStream *stream )
filled = readPos - stream->readOffset; filled = readPos - stream->readOffset;
if( filled < 0 ) filled += stream->inputBufferSizeBytes; // unwrap offset if( filled < 0 ) filled += stream->inputBufferSizeBytes; // unwrap offset
bytesFilled = filled; bytesFilled = filled;
inputLatency = ((double)bytesFilled) * stream->secondsPerHostByte;
} }
// FIXME: what happens if IDirectSoundCaptureBuffer_GetCurrentPosition fails? // FIXME: what happens if IDirectSoundCaptureBuffer_GetCurrentPosition fails?
framesToXfer = numInFramesReady = bytesFilled / stream->inputFrameSizeBytes; framesToXfer = numInFramesReady = bytesFilled / stream->inputFrameSizeBytes;
outputLatency = ((double)bytesFilled) * stream->secondsPerHostByte; // FIXME: this doesn't look right. we're calculating output latency in input branch. also secondsPerHostByte is only initialized for the output stream
/** @todo Check for overflow */ /** @todo Check for overflow */
} }
@ -2396,6 +2473,14 @@ static int TimeSlice( PaWinDsStream *stream )
/* Check for underflow */ /* Check for underflow */
if( stream->outputUnderflowCount != previousUnderflowCount ) if( stream->outputUnderflowCount != previousUnderflowCount )
stream->callbackFlags |= paOutputUnderflow; stream->callbackFlags |= paOutputUnderflow;
/* We are about to compute audio into the first byte of empty space in the output buffer.
This audio will reach the DAC after all of the current (non-empty) audio
in the buffer has played. Therefore the output time is the current time
plus the time it takes to play the non-empty bytes in the buffer,
computed here:
*/
outputLatency = ((double)(stream->outputBufferSizeBytes - bytesEmpty)) * stream->secondsPerHostByte;
} }
/* if it's a full duplex stream, set framesToXfer to the minimum of input and output frames ready */ /* if it's a full duplex stream, set framesToXfer to the minimum of input and output frames ready */
@ -2411,8 +2496,6 @@ static int TimeSlice( PaWinDsStream *stream )
/* The outputBufferDacTime parameter should indicates the time at which /* The outputBufferDacTime parameter should indicates the time at which
the first sample of the output buffer is heard at the DACs. */ the first sample of the output buffer is heard at the DACs. */
timeInfo.currentTime = PaUtil_GetTime(); timeInfo.currentTime = PaUtil_GetTime();
timeInfo.outputBufferDacTime = timeInfo.currentTime + outputLatency; // FIXME: QueryOutputSpace gets the playback position, we could use that (?)
PaUtil_BeginBufferProcessing( &stream->bufferProcessor, &timeInfo, stream->callbackFlags ); PaUtil_BeginBufferProcessing( &stream->bufferProcessor, &timeInfo, stream->callbackFlags );
stream->callbackFlags = 0; stream->callbackFlags = 0;
@ -2420,6 +2503,8 @@ static int TimeSlice( PaWinDsStream *stream )
/* Input */ /* Input */
if( stream->bufferProcessor.inputChannelCount > 0 ) if( stream->bufferProcessor.inputChannelCount > 0 )
{ {
timeInfo.inputBufferAdcTime = timeInfo.currentTime - inputLatency;
bytesToXfer = framesToXfer * stream->inputFrameSizeBytes; bytesToXfer = framesToXfer * stream->inputFrameSizeBytes;
hresult = IDirectSoundCaptureBuffer_Lock ( stream->pDirectSoundInputBuffer, hresult = IDirectSoundCaptureBuffer_Lock ( stream->pDirectSoundInputBuffer,
stream->readOffset, bytesToXfer, stream->readOffset, bytesToXfer,
@ -2449,6 +2534,13 @@ static int TimeSlice( PaWinDsStream *stream )
/* Output */ /* Output */
if( stream->bufferProcessor.outputChannelCount > 0 ) if( stream->bufferProcessor.outputChannelCount > 0 )
{ {
/*
We don't currently add outputLatency here because it appears to produce worse
results than non adding it. Need to do more testing to verify this.
*/
/* timeInfo.outputBufferDacTime = timeInfo.currentTime + outputLatency; */
timeInfo.outputBufferDacTime = timeInfo.currentTime;
bytesToXfer = framesToXfer * stream->outputFrameSizeBytes; bytesToXfer = framesToXfer * stream->outputFrameSizeBytes;
hresult = IDirectSoundBuffer_Lock ( stream->pDirectSoundOutputBuffer, hresult = IDirectSoundBuffer_Lock ( stream->pDirectSoundOutputBuffer,
stream->outputBufferWriteOffsetBytes, bytesToXfer, stream->outputBufferWriteOffsetBytes, bytesToXfer,

View File

@ -125,13 +125,13 @@
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
#define NOMMIDS //#define NOMMIDS
#define DYNAMIC_GUID(data) {data} #define DYNAMIC_GUID(data) {data}
#define _NTRTL_ /* Turn off default definition of DEFINE_GUIDEX */ //#define _NTRTL_ /* Turn off default definition of DEFINE_GUIDEX */
#undef DEFINE_GUID //#undef DEFINE_GUID
#define DEFINE_GUID(n,data) EXTERN_C const GUID n = {data} //#define DEFINE_GUID(n,data) EXTERN_C const GUID n = {data}
#define DEFINE_GUID_THUNK(n,data) DEFINE_GUID(n,data) //#define DEFINE_GUID_THUNK(n,data) DEFINE_GUID(n,data)
#define DEFINE_GUIDEX(n) DEFINE_GUID_THUNK(n, STATIC_##n) //#define DEFINE_GUIDEX(n) DEFINE_GUID_THUNK(n, STATIC_##n)
#endif #endif
#include <mmreg.h> #include <mmreg.h>

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/sh
flags="" flags=""
args="$@" args="$@"

View File

@ -135,9 +135,11 @@ void SaveStateBase::gifPathFreeze(u32 path) {
pxAssertDev(!gifPath.gsPack.readAmount, "GS Pack readAmount should be 0!"); pxAssertDev(!gifPath.gsPack.readAmount, "GS Pack readAmount should be 0!");
pxAssertDev(!gifPath.GetPendingGSPackets(), "MTVU GS Pack Queue should be 0!"); pxAssertDev(!gifPath.GetPendingGSPackets(), "MTVU GS Pack Queue should be 0!");
if (!gifPath.isMTVU()) { // FixMe: savestate freeze bug (Gust games) with MTVU enabled
if (IsSaving()) { // Move all the buffered data to the start of buffer if (IsSaving()) { // Move all the buffered data to the start of buffer
gifPath.RealignPacket(); // May add readAmount which we need to clear on load gifPath.RealignPacket(); // May add readAmount which we need to clear on load
} }
}
u8* bufferPtr = gifPath.buffer; // Backup current buffer ptr u8* bufferPtr = gifPath.buffer; // Backup current buffer ptr
Freeze(gifPath.mtvu.fakePackets); Freeze(gifPath.mtvu.fakePackets);
FreezeMem(&gifPath, sizeof(gifPath) - sizeof(gifPath.mtvu)); FreezeMem(&gifPath, sizeof(gifPath) - sizeof(gifPath.mtvu));

View File

@ -176,6 +176,9 @@ SaveStateBase& SaveStateBase::FreezeMainMemory()
SaveStateBase& SaveStateBase::FreezeInternals() SaveStateBase& SaveStateBase::FreezeInternals()
{ {
vu1Thread.WaitVU(); // Finish VU1 just in-case... vu1Thread.WaitVU(); // Finish VU1 just in-case...
// Print this until the MTVU problem in gifPathFreeze is taken care of (rama)
if (THREAD_VU1) Console.Warning("MTVU speedhack is enabled, saved states may not be stable");
if (IsLoading()) PreLoadPrep(); if (IsLoading()) PreLoadPrep();
// Second Block - Various CPU Registers and States // Second Block - Various CPU Registers and States

View File

@ -870,6 +870,7 @@ bool AppConfig::IsOkApplyPreset(int n)
//Have some original and default values at hand to be used later. //Have some original and default values at hand to be used later.
Pcsx2Config::GSOptions original_GS = EmuOptions.GS; Pcsx2Config::GSOptions original_GS = EmuOptions.GS;
AppConfig::FramerateOptions original_Framerate = Framerate;
AppConfig default_AppConfig; AppConfig default_AppConfig;
Pcsx2Config default_Pcsx2Config; Pcsx2Config default_Pcsx2Config;
@ -878,7 +879,7 @@ bool AppConfig::IsOkApplyPreset(int n)
// 1. The panels/entities should prevent manual modifications (by graying out) of settings which the presets control. // 1. The panels/entities should prevent manual modifications (by graying out) of settings which the presets control.
// 2. The panels should not apply values which the presets don't control if the value is initiated by a preset. // 2. The panels should not apply values which the presets don't control if the value is initiated by a preset.
// Currently controlled by the presets: // Currently controlled by the presets:
// - AppConfig: Framerate, EnableSpeedHacks, EnableGameFixes. // - AppConfig: Framerate (except turbo/slowmo factors), EnableSpeedHacks, EnableGameFixes.
// - EmuOptions: Cpu, Gamefixes, SpeedHacks, EnablePatches, GS (except for FrameLimitEnable, VsyncEnable and ManagedVsync). // - EmuOptions: Cpu, Gamefixes, SpeedHacks, EnablePatches, GS (except for FrameLimitEnable, VsyncEnable and ManagedVsync).
// //
// This essentially currently covers all the options on all the panels except for framelimiter which isn't // This essentially currently covers all the options on all the panels except for framelimiter which isn't
@ -891,6 +892,9 @@ bool AppConfig::IsOkApplyPreset(int n)
//Force some settings as a (current) base for all presets. //Force some settings as a (current) base for all presets.
Framerate = default_AppConfig.Framerate; Framerate = default_AppConfig.Framerate;
Framerate.SlomoScalar = original_Framerate.SlomoScalar;
Framerate.TurboScalar = original_Framerate.TurboScalar;
EnableSpeedHacks = false; EnableSpeedHacks = false;
EnableGameFixes = false; EnableGameFixes = false;

View File

@ -114,19 +114,23 @@ void Panels::FramelimiterPanel::ApplyConfigToGui( AppConfig& configToApply, int
const AppConfig::FramerateOptions& appfps( configToApply.Framerate ); const AppConfig::FramerateOptions& appfps( configToApply.Framerate );
const Pcsx2Config::GSOptions& gsconf( configToApply.EmuOptions.GS ); const Pcsx2Config::GSOptions& gsconf( configToApply.EmuOptions.GS );
if( ! (flags & AppConfig::APPLY_FLAG_FROM_PRESET) ) //Presets don't control this: only change if config doesn't come from preset. if( ! (flags & AppConfig::APPLY_FLAG_FROM_PRESET) ){ //Presets don't control these: only change if config doesn't come from preset.
m_check_LimiterDisable->SetValue( !gsconf.FrameLimitEnable ); m_check_LimiterDisable->SetValue( !gsconf.FrameLimitEnable );
m_spin_NominalPct ->SetValue( appfps.NominalScalar.Raw );
m_spin_TurboPct ->SetValue( appfps.TurboScalar.Raw ); m_spin_TurboPct ->SetValue( appfps.TurboScalar.Raw );
m_spin_SlomoPct ->SetValue( appfps.SlomoScalar.Raw ); m_spin_SlomoPct ->SetValue( appfps.SlomoScalar.Raw );
m_spin_TurboPct ->Enable( 1 );
m_spin_SlomoPct ->Enable( 1 );
}
m_text_BaseNtsc ->ChangeValue( gsconf.FramerateNTSC.ToString() ); m_text_BaseNtsc ->ChangeValue( gsconf.FramerateNTSC.ToString() );
m_text_BasePal ->ChangeValue( gsconf.FrameratePAL.ToString() ); m_text_BasePal ->ChangeValue( gsconf.FrameratePAL.ToString() );
m_spin_NominalPct ->SetValue( appfps.NominalScalar.Raw );
m_spin_NominalPct ->Enable(!configToApply.EnablePresets); m_spin_NominalPct ->Enable(!configToApply.EnablePresets);
m_spin_TurboPct ->Enable(!configToApply.EnablePresets);
m_spin_SlomoPct ->Enable(!configToApply.EnablePresets);
// Vsync timing controls only on devel builds / via manual ini editing // Vsync timing controls only on devel builds / via manual ini editing
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
m_text_BaseNtsc ->Enable(!configToApply.EnablePresets); m_text_BaseNtsc ->Enable(!configToApply.EnablePresets);

View File

@ -121,6 +121,9 @@ void RecentIsoManager::Repopulate()
m_Separator = m_Menu->AppendSeparator(); m_Separator = m_Menu->AppendSeparator();
// The following line is important
m_Menu->Remove( m_Menu->Append( -1, wxEmptyString ) );
//Note: the internal recent iso list (m_Items) has the most recent item last (also at the INI file) //Note: the internal recent iso list (m_Items) has the most recent item last (also at the INI file)
// but the menu is composed in reverse order such that the most recent item appears at the top. // but the menu is composed in reverse order such that the most recent item appears at the top.
for( int i=cnt-1; i>=0; --i ) for( int i=cnt-1; i>=0; --i )
@ -176,7 +179,7 @@ void RecentIsoManager::InsertIntoMenu( int id )
if (this->m_firstIdForMenuItems_or_wxID_ANY != wxID_ANY) if (this->m_firstIdForMenuItems_or_wxID_ANY != wxID_ANY)
wxid = this->m_firstIdForMenuItems_or_wxID_ANY + id; wxid = this->m_firstIdForMenuItems_or_wxID_ANY + id;
curitem.ItemPtr = m_Menu->Append( wxid, Path::GetFilename(curitem.Filename), curitem.Filename, wxITEM_RADIO ); curitem.ItemPtr = m_Menu->AppendRadioItem( wxid, Path::GetFilename(curitem.Filename), curitem.Filename );
bool exists = wxFileExists( curitem.Filename ); bool exists = wxFileExists( curitem.Filename );
if( m_cursel == id && exists ) if( m_cursel == id && exists )

View File

@ -35,9 +35,9 @@ GPUDrawScanline::~GPUDrawScanline()
{ {
} }
void GPUDrawScanline::BeginDraw(const void* param) void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
{ {
memcpy(&m_global, param, sizeof(m_global)); memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
if(m_global.sel.tme && m_global.sel.twin) if(m_global.sel.tme && m_global.sel.twin)
{ {
@ -83,7 +83,7 @@ void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) void GPUDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
{ {
GPUScanlineSelector sel = m_global.sel; GPUScanlineSelector sel = m_global.sel;
@ -93,7 +93,7 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
{ {
if(sel.sprite) if(sel.sprite)
{ {
GSVector4i t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001(); GSVector4i t = (GSVector4i(vertex[index[1]].t) >> 8) - GSVector4i::x00000001();
t = t.ps32(t); t = t.ps32(t);
t = t.upl16(t); t = t.upl16(t);

View File

@ -29,6 +29,25 @@
class GPUDrawScanline : public IDrawScanline class GPUDrawScanline : public IDrawScanline
{ {
public:
class SharedData : public GSRasterizerData
{
public:
GPUScanlineGlobalData global;
public:
SharedData()
{
global.clut = NULL;
}
virtual ~SharedData()
{
if(global.clut) _aligned_free(global.clut);
}
};
protected:
GPUScanlineGlobalData m_global; GPUScanlineGlobalData m_global;
GPUScanlineLocalData m_local; GPUScanlineLocalData m_local;
@ -41,12 +60,12 @@ public:
// IDrawScanline // IDrawScanline
void BeginDraw(const void* param); void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels); void EndDraw(uint64 frame, uint64 ticks, int pixels);
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan); void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
void DrawRect(const GSVector4i& r, const GSVertexSW& v); void DrawRect(const GSVector4i& r, const GSVertexSW& v);

View File

@ -69,9 +69,11 @@ GSTexture* GPURendererSW::GetOutput()
void GPURendererSW::Draw() void GPURendererSW::Draw()
{ {
shared_ptr<GSRasterizerData> data(new GPURasterizerData()); GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData();
GPUScanlineGlobalData& gd = *(GPUScanlineGlobalData*)data->param; shared_ptr<GSRasterizerData> data(sd);
GPUScanlineGlobalData& gd = sd->global;
const GPUDrawingEnvironment& env = m_env; const GPUDrawingEnvironment& env = m_env;
@ -114,22 +116,26 @@ void GPURendererSW::Draw()
gd.vm = m_mem.GetPixelAddress(0, 0); gd.vm = m_mem.GetPixelAddress(0, 0);
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count);
data->count = m_count;
data->frame = m_perfmon.GetFrame();
data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x; data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y; data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth()); data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight()); data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_count;
memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count);
data->frame = m_perfmon.GetFrame();
int prims = 0;
switch(env.PRIM.TYPE) switch(env.PRIM.TYPE)
{ {
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; break; case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break;
case GPU_LINE: data->primclass = GS_LINE_CLASS; break; case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break;
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; break; case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break;
default: __assume(0); default: __assume(0);
} }
@ -138,9 +144,9 @@ void GPURendererSW::Draw()
GSVector4 tl(+1e10f); GSVector4 tl(+1e10f);
GSVector4 br(-1e10f); GSVector4 br(-1e10f);
GSVertexSW* v = data->vertices; GSVertexSW* v = data->vertex;
for(int i = 0, j = m_count; i < j; i++) for(int i = 0, j = data->vertex_count; i < j; i++)
{ {
GSVector4 p = v[i].p; GSVector4 p = v[i].p;
@ -163,9 +169,9 @@ void GPURendererSW::Draw()
m_rl->Sync(); m_rl->Sync();
// TODO: m_perfmon.Put(GSPerfMon::Draw, 1); m_perfmon.Put(GSPerfMon::Draw, 1);
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims); m_perfmon.Put(GSPerfMon::Prim, prims);
// TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
} }
void GPURendererSW::VertexKick() void GPURendererSW::VertexKick()

View File

@ -26,28 +26,6 @@
class GPURendererSW : public GPURendererT<GSVertexSW> class GPURendererSW : public GPURendererT<GSVertexSW>
{ {
class GPURasterizerData : public GSRasterizerData
{
public:
GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)_aligned_malloc(sizeof(GPUScanlineGlobalData), 32);
gd->clut = NULL;
param = gd;
}
virtual ~GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
_aligned_free(gd);
}
};
protected: protected:
IRasterizer* m_rl; IRasterizer* m_rl;
GSTexture* m_texture; GSTexture* m_texture;

View File

@ -27,6 +27,11 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize) GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
, m_local(*(GPUScanlineLocalData*)param) , m_local(*(GPUScanlineLocalData*)param)
@ -50,7 +55,12 @@ void GPUSetupPrimCodeGenerator::Generate()
{ {
// t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001(); // t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
cvttps2dq(xmm1, ptr[ecx + sizeof(GSVertexSW) * 1 + offsetof(GSVertexSW, t)]); mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
cvttps2dq(xmm1, ptr[ecx + offsetof(GSVertexSW, t)]);
psrld(xmm1, 8); psrld(xmm1, 8);
psrld(xmm0, 31); psrld(xmm0, 31);
psubd(xmm1, xmm0); psubd(xmm1, xmm0);
@ -86,6 +96,8 @@ void GPUSetupPrimCodeGenerator::Generate()
if(m_sel.tme || m_sel.iip && m_sel.tfx != 3) if(m_sel.tme || m_sel.iip && m_sel.tfx != 3)
{ {
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 3; i++) for(int i = 0; i < 3; i++)
{ {
movaps(Xmm(5 + i), ptr[&m_shift[i]]); movaps(Xmm(5 + i), ptr[&m_shift[i]]);

View File

@ -33,6 +33,7 @@
#include "GSRendererDX11.h" #include "GSRendererDX11.h"
#include "GSDevice9.h" #include "GSDevice9.h"
#include "GSDevice11.h" #include "GSDevice11.h"
#include "GSRendererCS.h"
#include "GSSettingsDlg.h" #include "GSSettingsDlg.h"
static HRESULT s_hr = E_FAIL; static HRESULT s_hr = E_FAIL;
@ -213,6 +214,28 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
s_gs = NULL; s_gs = NULL;
} }
if(renderer == 12)
{
#ifdef _WINDOWS
dev = new GSDevice11();
if(dev == NULL)
{
return -1;
}
if(s_gs == NULL)
{
s_gs = new GSRendererCS();
s_renderer = renderer;
}
#endif
}
else
{
switch(renderer / 3) switch(renderer / 3)
{ {
default: default:
@ -222,11 +245,9 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
#endif #endif
#ifdef ENABLE_SDL_DEV #ifdef ENABLE_SDL_DEV
case 2: dev = new GSDeviceSDL(); break; case 2: dev = new GSDeviceSDL(); break;
#else
case 2: dev = NULL; break;
#endif #endif
case 3: dev = new GSDeviceNull(); break; case 3: dev = new GSDeviceNull(); break;
case 4: dev = new GSDeviceOGL(); break; case 5: dev = new GSDeviceOGL(); break;
} }
if(dev == NULL) if(dev == NULL)
@ -240,11 +261,11 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
{ {
default: default:
case 0: case 0:
#ifdef _WINDOWS #ifdef _WINDOWS
s_gs = (renderer / 3) == 0 ? (GSRenderer*)new GSRendererDX9() : (GSRenderer*)new GSRendererDX11(); s_gs = (renderer / 3) == 0 ? (GSRenderer*)new GSRendererDX9() : (GSRenderer*)new GSRendererDX11();
#else #else
s_gs = (GSRenderer*)new GSRendererOGL(); s_gs = (GSRenderer*)new GSRendererOGL();
#endif #endif
break; break;
case 1: case 1:
s_gs = new GSRendererSW(threads); s_gs = new GSRendererSW(threads);
@ -257,6 +278,7 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
s_renderer = renderer; s_renderer = renderer;
} }
} }
}
catch(std::exception& ex) catch(std::exception& ex)
{ {
// Allowing std exceptions to escape the scope of the plugin callstack could // Allowing std exceptions to escape the scope of the plugin callstack could
@ -768,8 +790,6 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
vector<uint8> buff;
if(FILE* fp = fopen(lpszCmdLine, "rb")) if(FILE* fp = fopen(lpszCmdLine, "rb"))
{ {
Console console("GSdx", true); Console console("GSdx", true);
@ -802,10 +822,128 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
GSvsync(1); GSvsync(1);
struct Packet {uint8 type, param; uint32 size, addr; vector<uint8> buff;};
list<Packet*> packets;
vector<uint8> buff;
int type;
while((type = fgetc(fp)) != EOF)
{
Packet* p = new Packet();
p->type = (uint8)type;
switch(type)
{
case 0:
p->param = (uint8)fgetc(fp);
fread(&p->size, 4, 1, fp);
switch(p->param)
{
case 0:
p->buff.resize(0x4000);
p->addr = 0x4000 - p->size;
fread(&p->buff[p->addr], p->size, 1, fp);
break;
case 1:
case 2:
case 3:
p->buff.resize(p->size);
fread(&p->buff[0], p->size, 1, fp);
break;
}
break;
case 1:
p->param = (uint8)fgetc(fp);
break;
case 2:
fread(&p->size, 4, 1, fp);
break;
case 3:
p->buff.resize(0x2000);
fread(&p->buff[0], 0x2000, 1, fp);
break;
}
packets.push_back(p);
}
Sleep(100); Sleep(100);
while(IsWindowVisible(hWnd))
{
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
{
Packet* p = *i;
switch(p->type)
{
case 0:
switch(p->param)
{
case 0: GSgifTransfer1(&p->buff[0], p->addr); break;
case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break;
case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break;
case 3: GSgifTransfer(&p->buff[0], p->size / 16); break;
}
break;
case 1:
GSvsync(p->param);
break;
case 2:
if(buff.size() < p->size) buff.resize(p->size);
GSreadFIFO2(&buff[0], p->size / 16);
break;
case 3:
memcpy(regs, &p->buff[0], 0x2000);
break;
}
}
}
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
{
delete *i;
}
packets.clear();
Sleep(100);
/*
vector<uint8> buff;
bool exit = false; bool exit = false;
int round = 0;
while(!exit) while(!exit)
{ {
uint32 index; uint32 index;
@ -819,6 +957,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
case EOF: case EOF:
fseek(fp, start, 0); fseek(fp, start, 0);
exit = !IsWindowVisible(hWnd); exit = !IsWindowVisible(hWnd);
//exit = ++round == 60;
break; break;
case 0: case 0:
@ -871,6 +1010,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
break; break;
} }
} }
*/
GSclose(); GSclose();
GSshutdown(); GSshutdown();

View File

@ -28,8 +28,14 @@
#define PLUGIN_VERSION 16 #define PLUGIN_VERSION 16
#define MAX_PAGES 512 #define VM_SIZE 4194304
#define MAX_BLOCKS 16384 #define PAGE_SIZE 8192
#define BLOCK_SIZE 256
#define COLUMN_SIZE 64
#define MAX_PAGES (VM_SIZE / PAGE_SIZE)
#define MAX_BLOCKS (VM_SIZE / BLOCK_SIZE)
#define MAX_COLUMNS (VM_SIZE / COLUMN_SIZE)
//if defined, will send much info in reply to the API title info queri from PCSX2 //if defined, will send much info in reply to the API title info queri from PCSX2
//default should be undefined //default should be undefined
@ -638,8 +644,8 @@ REG64_(GIFReg, FINISH)
REG_END REG_END
REG64_(GIFReg, FOG) REG64_(GIFReg, FOG)
uint8 _PAD1[4+3]; uint8 _PAD1[7];
uint8 F:8; uint8 F;
REG_END REG_END
REG64_(GIFReg, FOGCOL) REG64_(GIFReg, FOGCOL)
@ -1021,7 +1027,6 @@ REG128_(GIFPacked, XYZF2)
uint16 _PAD1; uint16 _PAD1;
uint16 Y; uint16 Y;
uint16 _PAD2; uint16 _PAD2;
uint32 _PAD3:4; uint32 _PAD3:4;
uint32 Z:24; uint32 Z:24;
uint32 _PAD4:4; uint32 _PAD4:4;
@ -1030,7 +1035,9 @@ REG128_(GIFPacked, XYZF2)
uint32 _PAD6:3; uint32 _PAD6:3;
uint32 ADC:1; uint32 ADC:1;
uint32 _PAD7:16; uint32 _PAD7:16;
REG_END REG_END2
uint32 Skip() const {return u32[3] & 0x8000;}
REG_END2
REG128_(GIFPacked, XYZ2) REG128_(GIFPacked, XYZ2)
uint16 X; uint16 X;
@ -1041,7 +1048,9 @@ REG128_(GIFPacked, XYZ2)
uint32 _PAD3:15; uint32 _PAD3:15;
uint32 ADC:1; uint32 ADC:1;
uint32 _PAD4:16; uint32 _PAD4:16;
REG_END REG_END2
uint32 Skip() const {return u32[3] & 0x8000;}
REG_END2
REG128_(GIFPacked, FOG) REG128_(GIFPacked, FOG)
uint32 _PAD1; uint32 _PAD1;
@ -1093,19 +1102,24 @@ __aligned(struct, 32) GIFPath
GSVector4i::store<true>(&tag, v); GSVector4i::store<true>(&tag, v);
reg = 0; reg = 0;
regs = v.uph8(v >> 4) & 0x0f0f0f0f; regs = v.uph8(v >> 4) & 0x0f0f0f0f;
nreg = tag.NREG; nreg = tag.NREG ? tag.NREG : 16;
nloop = tag.NLOOP; nloop = tag.NLOOP;
adonly = nreg == 1 && regs.u8[0] == GIF_REG_A_D; adonly = regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1;
} }
__forceinline uint8 GetReg() __forceinline uint8 GetReg()
{ {
return regs.u8[reg]; // GET_GIF_REG(tag, reg); return regs.u8[reg];
}
__forceinline uint8 GetReg(uint32 index)
{
return regs.u8[index];
} }
__forceinline bool StepReg() __forceinline bool StepReg()
{ {
if((++reg & 0xf) == nreg) if(++reg == nreg)
{ {
reg = 0; reg = 0;

View File

@ -43,16 +43,31 @@ class GSBlock
public: public:
template<int i, bool aligned, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template<int i, bool aligned, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{
GSVector4i v0, v1, v2, v3;
if(aligned)
{ {
const GSVector4i* s0 = (const GSVector4i*)&src[srcpitch * 0]; const GSVector4i* s0 = (const GSVector4i*)&src[srcpitch * 0];
const GSVector4i* s1 = (const GSVector4i*)&src[srcpitch * 1]; const GSVector4i* s1 = (const GSVector4i*)&src[srcpitch * 1];
GSVector4i v0 = GSVector4i::load<aligned>(&s0[0]); v0 = GSVector4i::load<aligned>(&s0[0]);
GSVector4i v1 = GSVector4i::load<aligned>(&s0[1]); v1 = GSVector4i::load<aligned>(&s0[1]);
GSVector4i v2 = GSVector4i::load<aligned>(&s1[0]); v2 = GSVector4i::load<aligned>(&s1[0]);
GSVector4i v3 = GSVector4i::load<aligned>(&s1[1]); v3 = GSVector4i::load<aligned>(&s1[1]);
GSVector4i::sw64(v0, v2, v1, v3); GSVector4i::sw64(v0, v2, v1, v3);
}
else
{
const uint8* s0 = &src[srcpitch * 0];
const uint8* s1 = &src[srcpitch * 1];
v0 = GSVector4i::load(&s0[0], &s1[0]);
v1 = GSVector4i::load(&s0[8], &s1[8]);
v2 = GSVector4i::load(&s0[16], &s1[16]);
v3 = GSVector4i::load(&s0[24], &s1[24]);
}
if(mask == 0xffffffff) if(mask == 0xffffffff)
{ {
@ -263,15 +278,27 @@ public:
} }
template<int i, bool aligned> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) template<int i, bool aligned> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
GSVector4i v0, v1, v2, v3;
if(aligned)
{ {
const GSVector4i* s = (const GSVector4i*)src; const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0 = s[i * 4 + 0]; v0 = s[i * 4 + 0];
GSVector4i v1 = s[i * 4 + 1]; v1 = s[i * 4 + 1];
GSVector4i v2 = s[i * 4 + 2]; v2 = s[i * 4 + 2];
GSVector4i v3 = s[i * 4 + 3]; v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3); GSVector4i::sw64(v0, v1, v2, v3);
}
else
{
v0 = GSVector4i::load(&src[i * 64 + 0], &src[i * 64 + 16]);
v1 = GSVector4i::load(&src[i * 64 + 32], &src[i * 64 + 48]);
v2 = GSVector4i::load(&src[i * 64 + 8], &src[i * 64 + 24]);
v3 = GSVector4i::load(&src[i * 64 + 40], &src[i * 64 + 56]);
}
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];

View File

@ -35,7 +35,8 @@ GSDevice::GSDevice()
, m_1x1(NULL) , m_1x1(NULL)
, m_frame(0) , m_frame(0)
{ {
memset(&m_vertices, 0, sizeof(m_vertices)); memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
} }
GSDevice::~GSDevice() GSDevice::~GSDevice()
@ -135,8 +136,10 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, bool msaa, int format)
void GSDevice::EndScene() void GSDevice::EndScene()
{ {
m_vertices.start += m_vertices.count; m_vertex.start += m_vertex.count;
m_vertices.count = 0; m_vertex.count = 0;
m_index.start += m_index.count;
m_index.count = 0;
} }
void GSDevice::Recycle(GSTexture* t) void GSDevice::Recycle(GSTexture* t)

View File

@ -72,7 +72,8 @@ protected:
GSTexture* m_fxaa; GSTexture* m_fxaa;
GSTexture* m_1x1; GSTexture* m_1x1;
GSTexture* m_current; GSTexture* m_current;
struct {size_t stride, start, count, limit;} m_vertices; struct {size_t stride, start, count, limit;} m_vertex;
struct {size_t start, count, limit;} m_index;
unsigned int m_frame; // for ageing the pool unsigned int m_frame; // for ageing the pool
virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0; virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0;
@ -101,6 +102,7 @@ public:
virtual void BeginScene() {} virtual void BeginScene() {}
virtual void DrawPrimitive() {}; virtual void DrawPrimitive() {};
virtual void DrawIndexedPrimitive() {}
virtual void EndScene(); virtual void EndScene();
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {} virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}

View File

@ -144,7 +144,7 @@ bool GSDevice11::Create(GSWnd* wnd)
for(int i = 0; i < countof(m_convert.ps); i++) for(int i = 0; i < countof(m_convert.ps); i++)
{ {
hr = CompileShader(IDR_CONVERT_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]); hr = CompileShader(IDR_CONVERT_FX, format("ps_main%d", i).c_str(), NULL, &m_convert.ps[i]);
} }
memset(&dsd, 0, sizeof(dsd)); memset(&dsd, 0, sizeof(dsd));
@ -172,7 +172,7 @@ bool GSDevice11::Create(GSWnd* wnd)
for(int i = 0; i < countof(m_merge.ps); i++) for(int i = 0; i < countof(m_merge.ps); i++)
{ {
hr = CompileShader(IDR_MERGE_FX, format("ps_main%d", i), NULL, &m_merge.ps[i]); hr = CompileShader(IDR_MERGE_FX, format("ps_main%d", i).c_str(), NULL, &m_merge.ps[i]);
} }
memset(&bsd, 0, sizeof(bsd)); memset(&bsd, 0, sizeof(bsd));
@ -200,7 +200,7 @@ bool GSDevice11::Create(GSWnd* wnd)
for(int i = 0; i < countof(m_interlace.ps); i++) for(int i = 0; i < countof(m_interlace.ps); i++)
{ {
hr = CompileShader(IDR_INTERLACE_FX, format("ps_main%d", i), NULL, &m_interlace.ps[i]); hr = CompileShader(IDR_INTERLACE_FX, format("ps_main%d", i).c_str(), NULL, &m_interlace.ps[i]);
} }
// fxaa // fxaa
@ -352,7 +352,17 @@ void GSDevice11::Flip()
void GSDevice11::DrawPrimitive() void GSDevice11::DrawPrimitive()
{ {
m_ctx->Draw(m_vertices.count, m_vertices.start); m_ctx->Draw(m_vertex.count, m_vertex.start);
}
void GSDevice11::DrawIndexedPrimitive()
{
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
}
void GSDevice11::Dispatch(uint32 x, uint32 y, uint32 z)
{
m_ctx->Dispatch(x, y, z);
} }
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -709,18 +719,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
} }
} }
void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t count) void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{ {
ASSERT(m_vertices.count == 0); ASSERT(m_vertex.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride) if(count * stride > m_vertex.limit * m_vertex.stride)
{ {
m_vb_old = m_vb; m_vb_old = m_vb;
m_vb = NULL; m_vb = NULL;
m_vertices.start = 0; m_vertex.start = 0;
m_vertices.count = 0; m_vertex.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 11000); m_vertex.limit = std::max<int>(count * 3 / 2, 11000);
} }
if(m_vb == NULL) if(m_vb == NULL)
@ -730,7 +740,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
memset(&bd, 0, sizeof(bd)); memset(&bd, 0, sizeof(bd));
bd.Usage = D3D11_USAGE_DYNAMIC; bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = m_vertices.limit * stride; bd.ByteWidth = m_vertex.limit * stride;
bd.BindFlags = D3D11_BIND_VERTEX_BUFFER; bd.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
@ -743,9 +753,9 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride) if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{ {
m_vertices.start = 0; m_vertex.start = 0;
type = D3D11_MAP_WRITE_DISCARD; type = D3D11_MAP_WRITE_DISCARD;
} }
@ -754,13 +764,13 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m))) if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
{ {
GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride); GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
m_ctx->Unmap(m_vb, 0); m_ctx->Unmap(m_vb, 0);
} }
m_vertices.count = count; m_vertex.count = count;
m_vertices.stride = stride; m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride); IASetVertexBuffer(m_vb, stride);
} }
@ -779,6 +789,70 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
} }
} }
void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
m_index.count = 0;
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
{
D3D11_BUFFER_DESC bd;
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = m_index.limit * sizeof(uint32);
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr;
hr = m_dev->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return;
}
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_index.start + count > m_index.limit)
{
m_index.start = 0;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m)))
{
memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32));
m_ctx->Unmap(m_ib, 0);
}
m_index.count = count;
IASetIndexBuffer(m_ib);
}
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib)
{
if(m_state.ib != ib)
{
m_state.ib = ib;
m_ctx->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0);
}
}
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{ {
if(m_state.layout != layout) if(m_state.layout != layout)
@ -868,7 +942,7 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
m_ctx->PSSetShader(ps, NULL, 0); m_ctx->PSSetShader(ps, NULL, 0);
} }
if (m_srv_changed) if(m_srv_changed)
{ {
m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv); m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv);
@ -890,6 +964,38 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
} }
} }
void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
{
// TODO: if(m_state.cs_srv[i] != srv)
{
// TODO: m_state.cs_srv[i] = srv;
m_ctx->CSSetShaderResources(i, 1, &srv);
}
}
void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav)
{
// TODO: if(m_state.cs_uav[i] != uav)
{
// TODO: m_state.cs_uav[i] = uav;
// uint32 count[] = {-1};
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, NULL);
}
}
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
{
if(m_state.cs != cs)
{
m_state.cs = cs;
m_ctx->CSSetShader(cs, NULL, 0);
}
}
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref) void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
{ {
if(m_state.dss != dss || m_state.sref != sref) if(m_state.dss != dss || m_state.sref != sref)
@ -958,7 +1064,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
} }
} }
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il) HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il)
{ {
HRESULT hr; HRESULT hr;
@ -968,7 +1074,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
CComPtr<ID3D11Blob> shader, error; CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL); hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error) if(error)
{ {
@ -997,7 +1103,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
return hr; return hr;
} }
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs) HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs)
{ {
HRESULT hr; HRESULT hr;
@ -1007,7 +1113,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
CComPtr<ID3D11Blob> shader, error; CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL); hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error) if(error)
{ {
@ -1029,7 +1135,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
return hr; return hr;
} }
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps) HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps)
{ {
HRESULT hr; HRESULT hr;
@ -1039,7 +1145,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
CComPtr<ID3D11Blob> shader, error; CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL); hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error) if(error)
{ {
@ -1061,3 +1167,67 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
return hr; return hr;
} }
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
{
HRESULT hr;
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro);
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
printf("%s\n", (const char*)error->GetBufferPointer());
}
if(FAILED(hr))
{
return hr;
}
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, cs);
if(FAILED(hr))
{
return hr;
}
return hr;
}
HRESULT GSDevice11::CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
{
HRESULT hr;
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro);
CComPtr<ID3D11Blob> shader, error;
hr = D3DX11CompileFromFile(fn, &m[0], NULL, entry, m_shader.cs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
printf("%s\n", (const char*)error->GetBufferPointer());
}
if(FAILED(hr))
{
return hr;
}
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, cs);
if(FAILED(hr))
{
return hr;
}
return hr;
}

View File

@ -45,6 +45,8 @@ class GSDevice11 : public GSDeviceDX
CComPtr<IDXGISwapChain> m_swapchain; CComPtr<IDXGISwapChain> m_swapchain;
CComPtr<ID3D11Buffer> m_vb; CComPtr<ID3D11Buffer> m_vb;
CComPtr<ID3D11Buffer> m_vb_old; CComPtr<ID3D11Buffer> m_vb_old;
CComPtr<ID3D11Buffer> m_ib;
CComPtr<ID3D11Buffer> m_ib_old;
bool m_srv_changed, m_ss_changed; bool m_srv_changed, m_ss_changed;
@ -52,6 +54,7 @@ class GSDevice11 : public GSDeviceDX
{ {
ID3D11Buffer* vb; ID3D11Buffer* vb;
size_t vb_stride; size_t vb_stride;
ID3D11Buffer* ib;
ID3D11InputLayout* layout; ID3D11InputLayout* layout;
D3D11_PRIMITIVE_TOPOLOGY topology; D3D11_PRIMITIVE_TOPOLOGY topology;
ID3D11VertexShader* vs; ID3D11VertexShader* vs;
@ -61,6 +64,7 @@ class GSDevice11 : public GSDeviceDX
ID3D11PixelShader* ps; ID3D11PixelShader* ps;
ID3D11Buffer* ps_cb; ID3D11Buffer* ps_cb;
ID3D11SamplerState* ps_ss[3]; ID3D11SamplerState* ps_ss[3];
ID3D11ComputeShader* cs;
GSVector2i viewport; GSVector2i viewport;
GSVector4i scissor; GSVector4i scissor;
ID3D11DepthStencilState* dss; ID3D11DepthStencilState* dss;
@ -141,6 +145,8 @@ public:
void SetExclusive(bool isExcl); void SetExclusive(bool isExcl);
void DrawPrimitive(); void DrawPrimitive();
void DrawIndexedPrimitive();
void Dispatch(uint32 x, uint32 y, uint32 z);
void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, const GSVector4& c);
void ClearRenderTarget(GSTexture* t, uint32 c); void ClearRenderTarget(GSTexture* t, uint32 c);
@ -162,8 +168,10 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count); void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride); void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(ID3D11Buffer* ib);
void IASetInputLayout(ID3D11InputLayout* layout); void IASetInputLayout(ID3D11InputLayout* layout);
void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology); void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology);
void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb); void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb);
@ -172,11 +180,14 @@ public:
void PSSetShaderResource(int i, GSTexture* sr); void PSSetShaderResource(int i, GSTexture* sr);
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb); void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL); void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL);
void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv);
void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav);
void CSSetShader(ID3D11ComputeShader* cs);
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref); void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
void OMSetBlendState(ID3D11BlendState* bs, float bf); void OMSetBlendState(ID3D11BlendState* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
void SetupIA(const void* vertices, int count, int prim); void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel); void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
@ -189,8 +200,10 @@ public:
operator ID3D11Device*() {return m_dev;} operator ID3D11Device*() {return m_dev;}
operator ID3D11DeviceContext*() {return m_ctx;} operator ID3D11DeviceContext*() {return m_ctx;}
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il); HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs); HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps); HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps);
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
HRESULT CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
}; };

View File

@ -352,8 +352,10 @@ bool GSDevice9::Reset(int w, int h)
m_vb = NULL; m_vb = NULL;
m_vb_old = NULL; m_vb_old = NULL;
m_vertices.start = 0; m_vertex.start = 0;
m_vertices.count = 0; m_vertex.count = 0;
m_index.start = 0;
m_index.count = 0;
if(m_state.vs_cb) _aligned_free(m_state.vs_cb); if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
if(m_state.ps_cb) _aligned_free(m_state.ps_cb); if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
@ -510,25 +512,52 @@ void GSDevice9::DrawPrimitive()
switch(m_state.topology) switch(m_state.topology)
{ {
case D3DPT_TRIANGLELIST: case D3DPT_POINTLIST:
prims = m_vertices.count / 3; prims = m_vertex.count;
break; break;
case D3DPT_LINELIST: case D3DPT_LINELIST:
prims = m_vertices.count / 2; prims = m_vertex.count / 2;
break; break;
case D3DPT_POINTLIST: case D3DPT_LINESTRIP:
prims = m_vertices.count; prims = m_vertex.count - 1;
break;
case D3DPT_TRIANGLELIST:
prims = m_vertex.count / 3;
break; break;
case D3DPT_TRIANGLESTRIP: case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN: case D3DPT_TRIANGLEFAN:
prims = m_vertices.count - 2; prims = m_vertex.count - 2;
break;
case D3DPT_LINESTRIP:
prims = m_vertices.count - 1;
break; break;
default:
__assume(0);
} }
m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims); m_dev->DrawPrimitive(m_state.topology, m_vertex.start, prims);
}
void GSDevice9::DrawIndexedPrimitive()
{
int prims = 0;
switch(m_state.topology)
{
case D3DPT_POINTLIST:
prims = m_index.count;
break;
case D3DPT_LINELIST:
case D3DPT_LINESTRIP:
prims = m_index.count / 2;
break;
case D3DPT_TRIANGLELIST:
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = m_index.count / 3;
break;
default:
__assume(0);
}
m_dev->DrawIndexedPrimitive(m_state.topology, m_vertex.start, 0, m_index.count, m_index.start, prims);
} }
void GSDevice9::EndScene() void GSDevice9::EndScene()
@ -881,49 +910,49 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
} }
} }
void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count) void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{ {
ASSERT(m_vertices.count == 0); ASSERT(m_vertex.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride) if(count * stride > m_vertex.limit * m_vertex.stride)
{ {
m_vb_old = m_vb; m_vb_old = m_vb;
m_vb = NULL; m_vb = NULL;
m_vertices.start = 0; m_vertex.start = 0;
m_vertices.count = 0; m_vertex.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 10000); m_vertex.limit = std::max<int>(count * 3 / 2, 10000);
} }
if(m_vb == NULL) if(m_vb == NULL)
{ {
HRESULT hr; HRESULT hr;
hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
if(FAILED(hr)) return; if(FAILED(hr)) return;
} }
uint32 flags = D3DLOCK_NOOVERWRITE; uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride) if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{ {
m_vertices.start = 0; m_vertex.start = 0;
flags = D3DLOCK_DISCARD; flags = D3DLOCK_DISCARD;
} }
void* v = NULL; void* ptr = NULL;
if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
{ {
GSVector4i::storent(v, vertices, count * stride); GSVector4i::storent(ptr, vertex, count * stride);
m_vb->Unlock(); m_vb->Unlock();
} }
m_vertices.count = count; m_vertex.count = count;
m_vertices.stride = stride; m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride); IASetVertexBuffer(m_vb, stride);
} }
@ -939,6 +968,61 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
} }
} }
void GSDevice9::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
m_index.count = 0;
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
{
HRESULT hr;
hr = m_dev->CreateIndexBuffer(m_index.limit * sizeof(uint32), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &m_ib, NULL);
if(FAILED(hr)) return;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_index.start + count > m_index.limit)
{
m_index.start = 0;
flags = D3DLOCK_DISCARD;
}
void* ptr = NULL;
if(SUCCEEDED(m_ib->Lock(m_index.start * sizeof(uint32), count * sizeof(uint32), &ptr, flags)))
{
memcpy(ptr, index, count * sizeof(uint32));
m_ib->Unlock();
}
m_index.count = count;
IASetIndexBuffer(m_ib);
}
void GSDevice9::IASetIndexBuffer(IDirect3DIndexBuffer9* ib)
{
if(m_state.ib != ib)
{
m_state.ib = ib;
m_dev->SetIndices(ib);
}
}
void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout)
{ {
if(m_state.layout != layout) if(m_state.layout != layout)

View File

@ -82,6 +82,8 @@ class GSDevice9 : public GSDeviceDX
CComPtr<IDirect3DSwapChain9> m_swapchain; CComPtr<IDirect3DSwapChain9> m_swapchain;
CComPtr<IDirect3DVertexBuffer9> m_vb; CComPtr<IDirect3DVertexBuffer9> m_vb;
CComPtr<IDirect3DVertexBuffer9> m_vb_old; CComPtr<IDirect3DVertexBuffer9> m_vb_old;
CComPtr<IDirect3DIndexBuffer9> m_ib;
CComPtr<IDirect3DIndexBuffer9> m_ib_old;
bool m_lost; bool m_lost;
D3DFORMAT m_depth_format; D3DFORMAT m_depth_format;
@ -89,6 +91,7 @@ class GSDevice9 : public GSDeviceDX
{ {
IDirect3DVertexBuffer9* vb; IDirect3DVertexBuffer9* vb;
size_t vb_stride; size_t vb_stride;
IDirect3DIndexBuffer9* ib;
IDirect3DVertexDeclaration9* layout; IDirect3DVertexDeclaration9* layout;
D3DPRIMITIVETYPE topology; D3DPRIMITIVETYPE topology;
IDirect3DVertexShader9* vs; IDirect3DVertexShader9* vs;
@ -169,6 +172,7 @@ public:
void BeginScene(); void BeginScene();
void DrawPrimitive(); void DrawPrimitive();
void DrawIndexedPrimitive();
void EndScene(); void EndScene();
void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, const GSVector4& c);
@ -191,8 +195,10 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count); void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride); void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
void IASetInputLayout(IDirect3DVertexDeclaration9* layout); void IASetInputLayout(IDirect3DVertexDeclaration9* layout);
void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology); void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology);
void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len); void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len);
@ -210,7 +216,7 @@ public:
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il); HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps); HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
void SetupIA(const void* vertices, int count, int prim); void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel) {} void SetupGS(GSSelector sel) {}
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);

View File

@ -67,18 +67,21 @@ bool GSDeviceDX::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
m_shader.vs = "vs_4_0"; m_shader.vs = "vs_4_0";
m_shader.gs = "gs_4_0"; m_shader.gs = "gs_4_0";
m_shader.ps = "ps_4_0"; m_shader.ps = "ps_4_0";
m_shader.cs = "cs_4_0";
break; break;
case D3D_FEATURE_LEVEL_10_1: case D3D_FEATURE_LEVEL_10_1:
m_shader.model = "0x401"; m_shader.model = "0x401";
m_shader.vs = "vs_4_1"; m_shader.vs = "vs_4_1";
m_shader.gs = "gs_4_1"; m_shader.gs = "gs_4_1";
m_shader.ps = "ps_4_1"; m_shader.ps = "ps_4_1";
m_shader.cs = "cs_4_1";
break; break;
case D3D_FEATURE_LEVEL_11_0: case D3D_FEATURE_LEVEL_11_0:
m_shader.model = "0x500"; m_shader.model = "0x500";
m_shader.vs = "vs_5_0"; m_shader.vs = "vs_5_0";
m_shader.gs = "gs_5_0"; m_shader.gs = "gs_5_0";
m_shader.ps = "ps_5_0"; m_shader.ps = "ps_5_0";
m_shader.cs = "cs_5_0";
break; break;
default: default:
ASSERT(0); ASSERT(0);

View File

@ -266,7 +266,7 @@ public:
#pragma pack(pop) #pragma pack(pop)
protected: protected:
struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader; struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps, cs;} m_shader;
uint32 m_msaa; uint32 m_msaa;
DXGI_SAMPLE_DESC m_msaa_desc; DXGI_SAMPLE_DESC m_msaa_desc;
@ -277,8 +277,9 @@ public:
virtual ~GSDeviceDX(); virtual ~GSDeviceDX();
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode); bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;}
virtual void SetupIA(const void* vertices, int count, int prim) = 0; virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0; virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
virtual void SetupGS(GSSelector sel) = 0; virtual void SetupGS(GSSelector sel) = 0;
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0; virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;

View File

@ -36,9 +36,9 @@ GSDrawScanline::~GSDrawScanline()
{ {
} }
void GSDrawScanline::BeginDraw(const void* param) void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
{ {
memcpy(&m_global, param, sizeof(m_global)); memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
if(m_global.sel.mmin && m_global.sel.lcm) if(m_global.sel.mmin && m_global.sel.lcm)
{ {
@ -87,7 +87,7 @@ void GSDrawScanline::BeginDraw(const void* param)
sel.tcc = m_global.sel.tcc; sel.tcc = m_global.sel.tcc;
sel.fst = m_global.sel.fst; sel.fst = m_global.sel.fst;
sel.fge = m_global.sel.fge; sel.fge = m_global.sel.fge;
sel.sprite = m_global.sel.sprite; sel.prim = m_global.sel.prim;
sel.fb = m_global.sel.fb; sel.fb = m_global.sel.fb;
sel.zb = m_global.sel.zb; sel.zb = m_global.sel.zb;
sel.zoverflow = m_global.sel.zoverflow; sel.zoverflow = m_global.sel.zoverflow;
@ -102,7 +102,9 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) // FIXME: something's not right with the sky in burnout 3
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
{ {
GSScanlineSelector sel = m_global.sel; GSScanlineSelector sel = m_global.sel;
@ -115,7 +117,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
if(has_z || has_f) if(has_z || has_f)
{ {
if(!sel.sprite) if(sel.prim != GS_SPRITE_CLASS)
{ {
if(has_f) if(has_f)
{ {
@ -145,12 +147,12 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
{ {
if(has_f) if(has_f)
{ {
m_local.p.f = GSVector4i(vertices[0].p).zzzzh().zzzz(); m_local.p.f = GSVector4i(vertex[index[1]].p).zzzzh().zzzz();
} }
if(has_z) if(has_z)
{ {
m_local.p.z = vertices[0].t.u32[3]; // uint32 z is bypassed in t.w m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
} }
} }
} }
@ -234,7 +236,17 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
} }
else else
{ {
GSVector4i c = GSVector4i(vertices[0].c); int last = 0;
switch(sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
GSVector4i c = GSVector4i(vertex[index[last]].c);
c = c.upl16(c.zwxy()); c = c.upl16(c.zwxy());
@ -271,7 +283,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
if(!sel.sprite) if(sel.prim != GS_SPRITE_CLASS)
{ {
if(sel.fwrite && sel.fge) if(sel.fwrite && sel.fge)
{ {
@ -300,7 +312,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s); GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s);
GSVector4i v = vt.yyyy(); GSVector4i v = vt.yyyy();
if(!sel.sprite || sel.mmin) if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
{ {
v += GSVector4i::cast(m_local.d[skip].t); v += GSVector4i::cast(m_local.d[skip].t);
} }
@ -354,7 +366,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{ {
za = fza_base->y + fza_offset->y; za = fza_base->y + fza_offset->y;
if(!sel.sprite) if(sel.prim != GS_SPRITE_CLASS)
{ {
GSVector4 z = scan.p.zzzz() + zo; GSVector4 z = scan.p.zzzz() + zo;
@ -754,7 +766,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{ {
uf = u.xxzzlh().srl16(1); uf = u.xxzzlh().srl16(1);
if(!sel.sprite) if(sel.prim != GS_SPRITE_CLASS)
{ {
vf = v.xxzzlh().srl16(1); vf = v.xxzzlh().srl16(1);
} }
@ -936,7 +948,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(sel.fwrite && sel.fge) if(sel.fwrite && sel.fge)
{ {
GSVector4i fog = !sel.sprite ? f : m_local.p.f; GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f;
rb = m_global.frb.lerp16<0>(rb, fog); rb = m_global.frb.lerp16<0>(rb, fog);
ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga); ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga);
@ -1211,7 +1223,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
fza_offset++; fza_offset++;
if(!sel.sprite) if(sel.prim != GS_SPRITE_CLASS)
{ {
if(sel.zb) if(sel.zb)
{ {
@ -1234,7 +1246,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx()); s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx());
if(!sel.sprite || sel.mmin) if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
{ {
t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy()); t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy());
} }

View File

@ -29,6 +29,14 @@
class GSDrawScanline : public IDrawScanline class GSDrawScanline : public IDrawScanline
{ {
public:
class SharedData : public GSRasterizerData
{
public:
GSScanlineGlobalData global;
};
protected:
GSScanlineGlobalData m_global; GSScanlineGlobalData m_global;
GSScanlineLocalData m_local; GSScanlineLocalData m_local;
@ -50,14 +58,14 @@ public:
// IDrawScanline // IDrawScanline
void BeginDraw(const void* param); void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels); void EndDraw(uint64 frame, uint64 ticks, int pixels);
void DrawRect(const GSVector4i& r, const GSVertexSW& v); void DrawRect(const GSVector4i& r, const GSVertexSW& v);
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan); void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
lea(edi, ptr[ebx * 2]); lea(edi, ptr[ebx * 2]);
add(edi, ptr[&m_local.gd->fzbc]); add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{ {
// edx = &m_local.d[skip] // edx = &m_local.d[skip]
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
mov(ebx, ptr[esp + _v]); mov(ebx, ptr[esp + _v]);
} }
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
if(m_sel.fwrite && m_sel.fge || m_sel.zb) if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{ {
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite || m_sel.mmin) if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{ {
vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
} }
@ -455,7 +455,7 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8); add(edi, 8);
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
// z += m_local.d4.z; // z += m_local.d4.z;
@ -501,7 +501,7 @@ void GSDrawScanlineCodeGenerator::Step()
vpaddd(xmm2, ptr[&m_local.temp.s]); vpaddd(xmm2, ptr[&m_local.temp.s]);
vmovdqa(ptr[&m_local.temp.s], xmm2); vmovdqa(ptr[&m_local.temp.s], xmm2);
if(!m_sel.sprite || m_sel.mmin) if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{ {
vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
vpaddd(xmm3, ptr[&m_local.temp.t]); vpaddd(xmm3, ptr[&m_local.temp.t]);
@ -597,7 +597,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi; // GSVector4i zs = zi;
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
if(m_sel.zoverflow) if(m_sel.zoverflow)
{ {
@ -733,7 +733,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpsrlw(xmm0, 1); vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.uf], xmm0); vmovdqa(ptr[&m_local.temp.uf], xmm0);
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
// GSVector4i vf = v.xxzzlh().srl16(1); // GSVector4i vf = v.xxzzlh().srl16(1);
@ -2227,7 +2227,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_local.gd->frb.lerp16<0>(rb, f); // rb = m_local.gd->frb.lerp16<0>(rb, f);
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
vmovdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]); vmovdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
vmovdqa(xmm1, xmm6); vmovdqa(xmm1, xmm6);
vmovdqa(xmm2, ptr[&m_local.gd->frb]); vmovdqa(xmm2, ptr[&m_local.gd->frb]);
@ -2350,7 +2350,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
bool fast = m_sel.ztest && m_sel.zpsm < 2; bool fast = m_sel.ztest && m_sel.zpsm < 2;
vmovdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]); vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
if(fast) if(fast)
{ {

View File

@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
lea(edi, ptr[ebx * 2]); lea(edi, ptr[ebx * 2]);
add(edi, ptr[&m_local.gd->fzbc]); add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{ {
// edx = &m_local.d[skip] // edx = &m_local.d[skip]
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
mov(ebx, ptr[esp + _v]); mov(ebx, ptr[esp + _v]);
} }
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
if(m_sel.fwrite && m_sel.fge || m_sel.zb) if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{ {
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite || m_sel.mmin) if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{ {
paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
} }
@ -458,7 +458,7 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8); add(edi, 8);
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
// z += m_local.d4.z; // z += m_local.d4.z;
@ -504,7 +504,7 @@ void GSDrawScanlineCodeGenerator::Step()
paddd(xmm2, ptr[&m_local.temp.s]); paddd(xmm2, ptr[&m_local.temp.s]);
movdqa(ptr[&m_local.temp.s], xmm2); movdqa(ptr[&m_local.temp.s], xmm2);
if(!m_sel.sprite || m_sel.mmin) if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{ {
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
paddd(xmm3, ptr[&m_local.temp.t]); paddd(xmm3, ptr[&m_local.temp.t]);
@ -602,7 +602,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi; // GSVector4i zs = zi;
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
if(m_sel.zoverflow) if(m_sel.zoverflow)
{ {
@ -738,7 +738,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
psrlw(xmm0, 1); psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.uf], xmm0); movdqa(ptr[&m_local.temp.uf], xmm0);
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
// GSVector4i vf = v.xxzzlh().srl16(1); // GSVector4i vf = v.xxzzlh().srl16(1);
@ -2341,7 +2341,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_local.gd->frb.lerp16<0>(rb, f); // rb = m_local.gd->frb.lerp16<0>(rb, f);
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
movdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]); movdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
movdqa(xmm1, xmm6); movdqa(xmm1, xmm6);
movdqa(xmm2, ptr[&m_local.gd->frb]); movdqa(xmm2, ptr[&m_local.gd->frb]);
@ -2464,7 +2464,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
bool fast = m_sel.ztest && m_sel.zpsm < 2; bool fast = m_sel.ztest && m_sel.zpsm < 2;
movdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]); movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
if(fast) if(fast)
{ {

View File

@ -41,12 +41,11 @@ public:
GIFRegFRAME FRAME; GIFRegFRAME FRAME;
GIFRegZBUF ZBUF; GIFRegZBUF ZBUF;
__aligned(struct, 32) struct
{ {
GSVector4i dx10;
GSVector4 dx9;
GSVector4 in; GSVector4 in;
GSVector4 ex; GSVector4 ofex;
uint32 ofxy;
} scissor; } scissor;
struct struct
@ -83,25 +82,22 @@ public:
void UpdateScissor() void UpdateScissor()
{ {
scissor.dx10 = GSVector4i( scissor.ofex = GSVector4(
(int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX), (int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX),
(int)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY), (int)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY),
(int)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX), (int)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX),
(int)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY)); (int)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY));
scissor.dx9 = GSVector4(scissor.dx10);
scissor.in = GSVector4( scissor.in = GSVector4(
(int)SCISSOR.SCAX0, (int)SCISSOR.SCAX0,
(int)SCISSOR.SCAY0, (int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1 + 1, (int)SCISSOR.SCAX1 + 1,
(int)SCISSOR.SCAY1 + 1); (int)SCISSOR.SCAY1 + 1);
scissor.ex = GSVector4( uint16 ofx = (uint16)XYOFFSET.OFX - 15;
(int)SCISSOR.SCAX0, uint16 ofy = (uint16)XYOFFSET.OFY - 15;
(int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1, scissor.ofxy = ((ofy << 16) | ofx); // ceil(xy) => (xy - offset + 15) >> 4 => (xy - [offset - 15]) >> 4
(int)SCISSOR.SCAY1);
} }
bool DepthRead() const bool DepthRead() const

View File

@ -24,10 +24,7 @@
GSDump::GSDump() GSDump::GSDump()
: m_gs(NULL) : m_gs(NULL)
, m_obj(NULL)
, m_frames(0) , m_frames(0)
, m_objects(0)
, m_vertices(0)
{ {
} }
@ -39,11 +36,8 @@ GSDump::~GSDump()
void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs) void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs)
{ {
m_gs = fopen((fn + ".gs").c_str(), "wb"); m_gs = fopen((fn + ".gs").c_str(), "wb");
m_obj = fopen((fn + ".obj").c_str(), "wt");
m_frames = 0; m_frames = 0;
m_objects = 0;
m_vertices = 0;
if(m_gs) if(m_gs)
{ {
@ -57,7 +51,6 @@ void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GS
void GSDump::Close() void GSDump::Close()
{ {
if(m_gs) {fclose(m_gs); m_gs = NULL;} if(m_gs) {fclose(m_gs); m_gs = NULL;}
if(m_obj) {fclose(m_obj); m_obj = NULL;}
} }
void GSDump::Transfer(int index, const uint8* mem, size_t size) void GSDump::Transfer(int index, const uint8* mem, size_t size)
@ -96,67 +89,3 @@ void GSDump::VSync(int field, bool last, const GSPrivRegSet* regs)
} }
} }
} }
void GSDump::Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass)
{
if(m_obj)
{
switch(primclass)
{
case GS_POINT_CLASS:
// TODO
break;
case GS_LINE_CLASS:
// TODO
break;
case GS_TRIANGLE_CLASS:
for(int i = 0; i < count; i++)
{
float x = vertices[i].p.x;
float y = vertices[i].p.y;
float z = vertices[i].p.z;
fprintf(m_obj, "v %f %f %f\n", x, y, z);
}
for(int i = 0; i < count; i++)
{
fprintf(m_obj, "vt %f %f %f\n", vertices[i].t.x, vertices[i].t.y, vertices[i].t.z);
}
for(int i = 0; i < count; i++)
{
fprintf(m_obj, "vn %f %f %f\n", 0.0f, 0.0f, 0.0f);
}
fprintf(m_obj, "g f%d_o%d_p%d_v%d\n", m_frames, m_objects, primclass, count);
for(int i = 0; i < count; i += 3)
{
int a = m_vertices + i + 1;
int b = m_vertices + i + 2;
int c = m_vertices + i + 3;
fprintf(m_obj, "f %d/%d/%d %d/%d/%d %d/%d/%d\n", a, a, a, b, b, b, c, c, c);
}
m_vertices += count;
m_objects++;
break;
case GS_SPRITE_CLASS:
// TODO
break;
}
}
}

View File

@ -46,10 +46,7 @@ Regs data (id == 3)
class GSDump class GSDump
{ {
FILE* m_gs; FILE* m_gs;
FILE* m_obj;
int m_frames; int m_frames;
int m_objects;
int m_vertices;
public: public:
GSDump(); GSDump();
@ -60,6 +57,5 @@ public:
void ReadFIFO(uint32 size); void ReadFIFO(uint32 size);
void Transfer(int index, const uint8* mem, size_t size); void Transfer(int index, const uint8* mem, size_t size);
void VSync(int field, bool last, const GSPrivRegSet* regs); void VSync(int field, bool last, const GSPrivRegSet* regs);
void Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass);
operator bool() {return m_gs != NULL;} operator bool() {return m_gs != NULL;}
}; };

View File

@ -76,8 +76,8 @@ GtkWidget* CreateRenderComboBox()
case 8 : renderer_box_position = 1; break; case 8 : renderer_box_position = 1; break;
case 10: renderer_box_position = 2; break; case 10: renderer_box_position = 2; break;
case 11: renderer_box_position = 3; break; case 11: renderer_box_position = 3; break;
case 12: renderer_box_position = 4; break; case 15: renderer_box_position = 4; break;
case 13: renderer_box_position = 5; break; case 16: renderer_box_position = 5; break;
} }
gtk_combo_box_set_active(GTK_COMBO_BOX(render_combo_box), renderer_box_position); gtk_combo_box_set_active(GTK_COMBO_BOX(render_combo_box), renderer_box_position);
return render_combo_box; return render_combo_box;
@ -375,8 +375,8 @@ bool RunLinuxDialog()
case 1: theApp.SetConfig("renderer", 8); break; case 1: theApp.SetConfig("renderer", 8); break;
case 2: theApp.SetConfig("renderer", 10); break; case 2: theApp.SetConfig("renderer", 10); break;
case 3: theApp.SetConfig("renderer", 11); break; case 3: theApp.SetConfig("renderer", 11); break;
case 4: theApp.SetConfig("renderer", 12); break; case 4: theApp.SetConfig("renderer", 15); break;
case 5: theApp.SetConfig("renderer", 13); break; case 5: theApp.SetConfig("renderer", 16); break;
} }
} }

View File

@ -449,7 +449,7 @@ GSLocalMemory::~GSLocalMemory()
for_each(m_omap.begin(), m_omap.end(), aligned_free_second()); for_each(m_omap.begin(), m_omap.end(), aligned_free_second());
for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second()); for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second());
for(hash_map<uint32, list<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++) for(hash_map<uint64, vector<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++)
{ {
delete [] i->second; delete [] i->second;
} }
@ -500,6 +500,11 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32); GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32);
o->hash = hash; o->hash = hash;
o->fbp = fbp;
o->zbp = zbp;
o->fpsm = fpsm;
o->zpsm = zpsm;
o->bw = bw;
pixelAddress fpa = m_psm[fpsm].pa; pixelAddress fpa = m_psm[fpsm].pa;
pixelAddress zpa = m_psm[zpsm].pa; pixelAddress zpa = m_psm[zpsm].pa;
@ -526,11 +531,11 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;} static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;}
list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
{ {
uint32 hash = TEX0.TBP0 | (TEX0.TBW << 14) | (TEX0.PSM << 20) | (TEX0.TW << 26); uint64 hash = TEX0.u64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH
hash_map<uint32, list<GSVector2i>*>::iterator i = m_p2tmap.find(hash); hash_map<uint64, vector<GSVector2i>*>::iterator i = m_p2tmap.find(hash);
if(i != m_p2tmap.end()) if(i != m_p2tmap.end())
{ {
@ -540,13 +545,13 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
GSVector2i bs = m_psm[TEX0.PSM].bs; GSVector2i bs = m_psm[TEX0.PSM].bs;
int tw = std::max<int>(1 << TEX0.TW, bs.x); int tw = std::max<int>(1 << TEX0.TW, bs.x);
// int th = std::max<int>(1 << TEX0.TH, bs.y); int th = std::max<int>(1 << TEX0.TH, bs.y);
const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
hash_map<uint32, hash_set<uint32> > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks) hash_map<uint32, hash_set<uint32> > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
for(int y = 0; y < 1024; y += bs.y) // the hash is a little short on bits for TEX0.TH, hard-coding it to 1024 lines for(int y = 0; y < th; y += bs.y)
{ {
uint32 base = o->block.row[y >> 3]; uint32 base = o->block.row[y >> 3];
@ -563,7 +568,7 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
// combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array // combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array
list<GSVector2i>* p2t = new list<GSVector2i>[MAX_PAGES]; vector<GSVector2i>* p2t = new vector<GSVector2i>[MAX_PAGES];
for(hash_map<uint32, hash_set<uint32> >::iterator i = tmp.begin(); i != tmp.end(); i++) for(hash_map<uint32, hash_set<uint32> >::iterator i = tmp.begin(); i != tmp.end(); i++)
{ {
@ -594,16 +599,12 @@ list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
// sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y) // sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y)
vector<GSVector2i> tmp;
for(hash_map<uint32, uint32>::iterator j = m.begin(); j != m.end(); j++) for(hash_map<uint32, uint32>::iterator j = m.begin(); j != m.end(); j++)
{ {
tmp.push_back(GSVector2i(j->first, ~j->second)); p2t[page].push_back(GSVector2i(j->first, ~j->second));
} }
std::sort(tmp.begin(), tmp.end(), cmp_vec2x); std::sort(p2t[page].begin(), p2t[page].end(), cmp_vec2x);
p2t[page].insert(p2t[page].end(), tmp.begin(), tmp.end());
} }
m_p2tmap[hash] = p2t; m_p2tmap[hash] = p2t;
@ -1305,13 +1306,13 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
{ {
if(len <= 0) return; if(len <= 0) return;
uint8* pb = (uint8*)dst; uint8* RESTRICT pb = (uint8*)dst;
uint16* pw = (uint16*)dst; uint16* RESTRICT pw = (uint16*)dst;
uint32* pd = (uint32*)dst; uint32* RESTRICT pd = (uint32*)dst;
uint32 bp = BITBLTBUF.SBP; uint32 bp = BITBLTBUF.SBP;
uint32 bw = BITBLTBUF.SBW; uint32 bw = BITBLTBUF.SBW;
psm_t* psm = &m_psm[BITBLTBUF.SPSM]; psm_t* RESTRICT psm = &m_psm[BITBLTBUF.SPSM];
int x = tx; int x = tx;
int y = ty; int y = ty;
@ -1323,16 +1324,26 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMCT32: case PSM_PSMCT32:
case PSM_PSMZ32: case PSM_PSMZ32:
// MGS1 intro, fade effect between two scenes (airplane outside-inside transition)
len /= 4; len /= 4;
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); int* RESTRICT offset = psm->rowOffset[y & 7];
int* offset = psm->rowOffset[y & 7]; uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pd += 4)
{
pd[0] = ps[offset[x + 0]];
pd[1] = ps[offset[x + 1]];
pd[2] = ps[offset[x + 2]];
pd[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pd++) for(; len > 0 && x < ex; len--, x++, pd++)
{ {
*pd = ReadPixel32(addr + offset[x]); *pd = ps[offset[x]];
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1347,16 +1358,16 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); int* RESTRICT offset = psm->rowOffset[y & 7];
int* offset = psm->rowOffset[y & 7]; uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x++, pb += 3) for(; len > 0 && x < ex; len--, x++, pb += 3)
{ {
uint32 c = ReadPixel32(addr + offset[x]); uint32 c = ps[offset[x]];
pb[0] = ((uint8*)&c)[0]; pb[0] = (uint8)(c);
pb[1] = ((uint8*)&c)[1]; pb[1] = (uint8)(c >> 8);
pb[2] = ((uint8*)&c)[2]; pb[2] = (uint8)(c >> 16);
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1373,12 +1384,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); int* RESTRICT offset = psm->rowOffset[y & 7];
int* offset = psm->rowOffset[y & 7]; uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
{
pw[0] = ps[offset[x + 0]];
pw[1] = ps[offset[x + 1]];
pw[2] = ps[offset[x + 2]];
pw[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pw++) for(; len > 0 && x < ex; len--, x++, pw++)
{ {
*pw = ReadPixel16(addr + offset[x]); *pw = ps[offset[x]];
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1390,12 +1409,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); int* RESTRICT offset = psm->rowOffset[y & 7];
int* offset = psm->rowOffset[y & 7]; uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{
pb[0] = ps[offset[x + 0]];
pb[1] = ps[offset[x + 1]];
pb[2] = ps[offset[x + 2]];
pb[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pb++) for(; len > 0 && x < ex; len--, x++, pb++)
{ {
*pb = ReadPixel8(addr + offset[x]); *pb = ps[offset[x]];
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1408,7 +1435,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
for(; len > 0 && x < ex; len--, x += 2, pb++) for(; len > 0 && x < ex; len--, x += 2, pb++)
{ {
@ -1424,12 +1451,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); int* RESTRICT offset = psm->rowOffset[y & 7];
int* offset = psm->rowOffset[y & 7]; uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{
pb[0] = (uint8)(ps[offset[x + 0]] >> 24);
pb[1] = (uint8)(ps[offset[x + 1]] >> 24);
pb[2] = (uint8)(ps[offset[x + 2]] >> 24);
pb[3] = (uint8)(ps[offset[x + 3]] >> 24);
}
for(; len > 0 && x < ex; len--, x++, pb++) for(; len > 0 && x < ex; len--, x++, pb++)
{ {
*pb = ReadPixel8H(addr + offset[x]); *pb = (uint8)(ps[offset[x]] >> 24);
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1441,12 +1476,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x += 2, pb++) for(; len > 0 && x < ex; len--, x += 2, pb++)
{ {
*pb = ReadPixel4HL(addr + offset[x + 0]) | (ReadPixel4HL(addr + offset[x + 1]) << 4); uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f;
uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0;
*pb = (uint8)(c0 | c1);
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1458,12 +1496,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0) while(len > 0)
{ {
uint32 addr = psm->pa(0, y, bp, bw); int* RESTRICT offset = psm->rowOffset[y & 7];
int* offset = psm->rowOffset[y & 7]; uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x += 2, pb++) for(; len > 0 && x < ex; len--, x += 2, pb++)
{ {
*pb = ReadPixel4HH(addr + offset[x + 0]) | (ReadPixel4HH(addr + offset[x + 1]) << 4); uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f;
uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0;
*pb = (uint8)(c0 | c1);
} }
if(x == ex) {x = sx; y++;} if(x == ex) {x = sx; y++;}
@ -1994,13 +2035,9 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
GSOffset::~GSOffset() GSOffset::~GSOffset()
{ {
for(hash_map<uint64, list<uint32>*>::iterator i = m_cache.begin(); i != m_cache.end(); i++)
{
delete i->second;
}
} }
list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox) uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox)
{ {
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
@ -2008,55 +2045,61 @@ list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
if(bbox != NULL) *bbox = r; if(bbox != NULL) *bbox = r;
uint64 r_hash; // worst case:
// bp page-aligned: (w * h) / (64 * 32)
// bp block-aligned: (w * h) / (8 * 8)
GSVector4i::storel(&r_hash, r.sra32(3).ps32()); // max 19-bit coordinates, should not be a problem (can shift right by 3 because it is mod8, smallest block size) int size = r.width() * r.height();
hash_map<uint64, list<uint32>*>::iterator i = m_cache.find(r_hash); int limit = MAX_PAGES + 1;
if(i != m_cache.end()) if(pages == NULL)
{ {
return i->second; limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
pages = new uint32[limit];
} }
uint32 tmp[16]; __aligned(uint32, 16) tmp[16];
memset(tmp, 0, sizeof(tmp)); ((GSVector4i*)tmp)[0] = GSVector4i::zero();
((GSVector4i*)tmp)[1] = GSVector4i::zero();
((GSVector4i*)tmp)[2] = GSVector4i::zero();
((GSVector4i*)tmp)[3] = GSVector4i::zero();
r = r.sra32(3);
bs.x >>= 3;
bs.y >>= 3;
uint32* RESTRICT p = pages;
for(int y = r.top; y < r.bottom; y += bs.y) for(int y = r.top; y < r.bottom; y += bs.y)
{ {
uint32 base = block.row[y >> 3]; uint32 base = block.row[y];
for(int x = r.left; x < r.right; x += bs.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 n = (base + block.col[x >> 3]) >> 5; uint32 n = (base + block.col[x]) >> 5;
if(n < MAX_PAGES) if(n < MAX_PAGES)
{ {
tmp[n >> 5] |= 1 << (n & 31); uint32& row = tmp[n >> 5];
} uint32 col = 1 << (n & 31);
}
}
list<uint32>* l = new list<uint32>(); if((row & col) == 0)
for(int i = 0; i < countof(tmp); i++)
{ {
uint32 p = tmp[i]; row |= col;
if(p == 0) continue; *p++ = n;
}
unsigned long j; }
while(_BitScanForward(&j, p))
{
p ^= 1 << j;
l->push_back((i << 5) + j);
} }
} }
m_cache[r_hash] = l; *p++ = EOP;
return l; ASSERT(p - pages <= limit);
return pages;
} }

View File

@ -30,8 +30,6 @@
class GSOffset : public GSAlignedClass<32> class GSOffset : public GSAlignedClass<32>
{ {
hash_map<uint64, list<uint32>*> m_cache;
public: public:
__aligned(struct, 32) Block __aligned(struct, 32) Block
{ {
@ -53,7 +51,9 @@ public:
GSOffset(uint32 bp, uint32 bw, uint32 psm); GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset(); virtual ~GSOffset();
list<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL); enum {EOP = 0xffffffff};
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
}; };
struct GSPixelOffset4 struct GSPixelOffset4
@ -63,6 +63,7 @@ struct GSPixelOffset4
GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...) GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...)
GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...) GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...)
uint32 hash; uint32 hash;
uint32 fbp, zbp, fpsm, zpsm, bw;
}; };
class GSLocalMemory : public GSBlock class GSLocalMemory : public GSBlock
@ -158,7 +159,7 @@ protected:
hash_map<uint32, GSOffset*> m_omap; hash_map<uint32, GSOffset*> m_omap;
hash_map<uint32, GSPixelOffset4*> m_po4map; hash_map<uint32, GSPixelOffset4*> m_po4map;
hash_map<uint32, list<GSVector2i>*> m_p2tmap; hash_map<uint64, vector<GSVector2i>*> m_p2tmap;
public: public:
GSLocalMemory(); GSLocalMemory();
@ -166,7 +167,7 @@ public:
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm); GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
list<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0); vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
// address // address

View File

@ -28,9 +28,8 @@ public:
{ {
Main, Main,
Sync, Sync,
WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15, WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7,
WorkerSync0, WorkerSync1, WorkerSync2, WorkerSync3, WorkerSync4, WorkerSync5, WorkerSync6, WorkerSync7, WorkerSync8, WorkerSync9, WorkerSync10, WorkerSync11, WorkerSync12, WorkerSync13, WorkerSync14, WorkerSync15, WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15,
WorkerSleep0, WorkerSleep1, WorkerSleep2, WorkerSleep3, WorkerSleep4, WorkerSleep5, WorkerSleep6, WorkerSleep7, WorkerSleep8, WorkerSleep9, WorkerSleep10, WorkerSleep11, WorkerSleep12, WorkerSleep13, WorkerSleep14, WorkerSleep15,
TimerLast, TimerLast,
}; };

View File

@ -35,6 +35,7 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
, m_id(id) , m_id(id)
, m_threads(threads) , m_threads(threads)
, m_perfmon(perfmon) , m_perfmon(perfmon)
, m_pixels(0)
{ {
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false); m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0; m_edge.count = 0;
@ -63,11 +64,15 @@ GSRasterizer::~GSRasterizer()
bool GSRasterizer::IsOneOfMyScanlines(int top) const bool GSRasterizer::IsOneOfMyScanlines(int top) const
{ {
ASSERT(top >= 0 && top < 2048);
return m_myscanline[top >> THREAD_HEIGHT] != 0; return m_myscanline[top >> THREAD_HEIGHT] != 0;
} }
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
{ {
ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048);
top = top >> THREAD_HEIGHT; top = top >> THREAD_HEIGHT;
bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT; bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT;
@ -98,26 +103,42 @@ int GSRasterizer::FindMyNextScanline(int top) const
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data) void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
{ {
Draw(data); Draw(data.get());
} }
void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data) int GSRasterizer::GetPixels(bool reset)
{
int pixels = m_pixels;
if(reset)
{
m_pixels = 0;
}
return pixels;
}
void GSRasterizer::Draw(GSRasterizerData* data)
{ {
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id); GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->count == 0) return; if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
m_ds->BeginDraw(data->param); m_ds->BeginDraw(data);
const GSVertexSW* vertices = data->vertices; const GSVertexSW* vertex = data->vertex;
const GSVertexSW* vertices_end = data->vertices + data->count; const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
const uint32* index = data->index;
const uint32* index_end = data->index + data->index_count;
uint32 tmp_index[] = {0, 1, 2};
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor)); bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
m_scissor = data->scissor; m_scissor = data->scissor;
m_fscissor = GSVector4(data->scissor); m_fscissor_x = GSVector4(data->scissor).xzxz();
m_fscissor_y = GSVector4(data->scissor).ywyw();
m_pixels = 0;
uint64 start = __rdtsc(); uint64 start = __rdtsc();
@ -127,33 +148,57 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
if(scissor_test) if(scissor_test)
{ {
DrawPoint<true>(vertices, data->count); DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
} }
else else
{ {
DrawPoint<false>(vertices, data->count); DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
} }
break; break;
case GS_LINE_CLASS: case GS_LINE_CLASS:
do {DrawLine(vertices); vertices += 2;} if(index != NULL)
while(vertices < vertices_end); {
do {DrawLine(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawLine(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
break; break;
case GS_TRIANGLE_CLASS: case GS_TRIANGLE_CLASS:
do {DrawTriangle(vertices); vertices += 3;} if(index != NULL)
while(vertices < vertices_end); {
do {DrawTriangle(vertex, index); index += 3;}
while(index < index_end);
}
else
{
do {DrawTriangle(vertex, tmp_index); vertex += 3;}
while(vertex < vertex_end);
}
break; break;
case GS_SPRITE_CLASS: case GS_SPRITE_CLASS:
do {DrawSprite(vertices, data->solidrect); vertices += 2;} if(index != NULL)
while(vertices < vertices_end); {
do {DrawSprite(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawSprite(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
break; break;
@ -163,18 +208,19 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
uint64 ticks = __rdtsc() - start; uint64 ticks = __rdtsc() - start;
_InterlockedExchangeAdd(&data->ticks, ticks);
_InterlockedExchangeAdd(&data->pixels, m_pixels);
m_ds->EndDraw(data->frame, ticks, m_pixels); m_ds->EndDraw(data->frame, ticks, m_pixels);
} }
template<bool scissor_test> template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count) void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
{ {
for(; count > 0; count--, v++) if(index != NULL)
{ {
GSVector4i p(v->p); for(int i = 0; i < index_count; i++, index++)
{
const GSVertexSW& v = vertex[*index];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{ {
@ -182,17 +228,44 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{ {
m_pixels++; m_pixels++;
m_ds->SetupPrim(v, *v); m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, *v); m_ds->DrawScanline(1, p.x, p.y, v);
}
}
}
}
else
{
uint32 tmp_index[1] = {0};
for(int i = 0; i < vertex_count; i++, vertex++)
{
const GSVertexSW& v = vertex[0];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
{
m_pixels++;
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, v);
}
} }
} }
} }
} }
void GSRasterizer::DrawLine(const GSVertexSW* v) void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
{ {
GSVertexSW dv = v[1] - v[0]; const GSVertexSW& v0 = vertex[index[0]];
const GSVertexSW& v1 = vertex[index[1]];
GSVertexSW dv = v1 - v0;
GSVector4 dp = dv.p.abs(); GSVector4 dp = dv.p.abs();
@ -200,10 +273,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(m_ds->HasEdge()) if(m_ds->HasEdge())
{ {
DrawEdge(v[0], v[1], dv, i, 0); DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v[0], v[1], dv, i, 1); DrawEdge(v0, v1, dv, i, 1);
Flush(v, GSVertexSW::zero(), true); Flush(vertex, index, GSVertexSW::zero(), true);
return; return;
} }
@ -216,23 +289,21 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
{ {
// shortcut for horizontal lines // shortcut for horizontal lines
GSVector4 mask = (v[0].p > v[1].p).xxxx(); GSVector4 mask = (v0.p > v1.p).xxxx();
GSVertexSW scan; GSVertexSW scan;
scan.p = v[0].p.blend32(v[1].p, mask); scan.p = v0.p.blend32(v1.p, mask);
scan.t = v[0].t.blend32(v[1].t, mask); scan.t = v0.t.blend32(v1.t, mask);
scan.c = v[0].c.blend32(v[1].c, mask); scan.c = v0.c.blend32(v1.c, mask);
GSVector4i p(scan.p); GSVector4i p(scan.p);
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y)) if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
{ {
GSVector4 scissor = m_fscissor.xzxz(); GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil();
GSVector4 l = lrf.max(m_fscissor_x);
GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil(); GSVector4 r = lrf.min(m_fscissor_x);
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
GSVector4i lr = GSVector4i(l.xxyy(r)); GSVector4i lr = GSVector4i(l.xxyy(r));
int left = lr.extract32<0>(); int left = lr.extract32<0>();
@ -248,7 +319,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
scan += dscan * (l - scan.p).xxxx(); scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(v, dscan); m_ds->SetupPrim(vertex, index, dscan);
m_ds->DrawScanline(pixels, left, p.y, scan); m_ds->DrawScanline(pixels, left, p.y, scan);
} }
@ -262,7 +333,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(steps > 0) if(steps > 0)
{ {
GSVertexSW edge = v[0]; GSVertexSW edge = v0;
GSVertexSW dedge = dv / GSVector4(dp.v[i]); GSVertexSW dedge = dv / GSVector4(dp.v[i]);
GSVertexSW* RESTRICT e = m_edge.buff; GSVertexSW* RESTRICT e = m_edge.buff;
@ -288,7 +359,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
m_edge.count = e - m_edge.buff; m_edge.count = e - m_edge.buff;
Flush(v, GSVertexSW::zero()); Flush(vertex, index, GSVertexSW::zero());
} }
} }
@ -304,42 +375,47 @@ static const uint8 s_ysort[8][4] =
{2, 1, 0, 0}, // y2 < y1 < y0 {2, 1, 0, 0}, // y2 < y1 < y0
}; };
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
{ {
GSVertexSW v[3];
GSVertexSW dv[3]; GSVertexSW dv[3];
GSVertexSW edge; GSVertexSW edge;
GSVertexSW dedge; GSVertexSW dedge;
GSVertexSW dscan; GSVertexSW dscan;
GSVector4 y0011 = vertices[0].p.yyyy(vertices[1].p); GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p);
GSVector4 y1221 = vertices[1].p.yyyy(vertices[2].p).xzzx(); GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx();
int mask = (y0011 > y1221).mask() & 7; int m1 = (y0011 > y1221).mask() & 7;
v[0] = vertices[s_ysort[mask][0]]; int i[3];
v[1] = vertices[s_ysort[mask][1]];
v[2] = vertices[s_ysort[mask][2]];
y0011 = v[0].p.yyyy(v[1].p); i[0] = index[s_ysort[m1][0]];
y1221 = v[1].p.yyyy(v[2].p).xzzx(); i[1] = index[s_ysort[m1][1]];
i[2] = index[s_ysort[m1][2]];
int i = (y0011 == y1221).mask() & 7; const GSVertexSW& v0 = vertex[i[0]];
const GSVertexSW& v1 = vertex[i[1]];
const GSVertexSW& v2 = vertex[i[2]];
y0011 = v0.p.yyyy(v1.p);
y1221 = v1.p.yyyy(v2.p).xzzx();
m1 = (y0011 == y1221).mask() & 7;
// if(i == 0) => y0 < y1 < y2 // if(i == 0) => y0 < y1 < y2
// if(i == 1) => y0 == y1 < y2 // if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2 // if(i == 4) => y0 < y1 == y2
if(i == 7) return; // y0 == y1 == y2 if(m1 == 7) return; // y0 == y1 == y2
GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor.ywyw()); GSVector4 tbmax = tbf.max(m_fscissor_y);
GSVector4 tbmin = tbf.min(m_fscissor.ywyw()); GSVector4 tbmin = tbf.min(m_fscissor_y);
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b)
dv[0] = v[1] - v[0]; dv[0] = v1 - v0;
dv[1] = v[2] - v[0]; dv[1] = v2 - v0;
dv[2] = v[2] - v[1]; dv[2] = v2 - v1;
GSVector4 cross = dv[0].p * dv[1].p.yxwz(); GSVector4 cross = dv[0].p * dv[1].p.yxwz();
@ -347,11 +423,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value // the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
int j = cross.upl(cross == GSVector4::zero()).mask(); int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(j & 2) return; if(m2 & 2) return;
j &= 1; m2 &= 1;
cross = cross.rcpnr(); cross = cross.rcpnr();
@ -391,42 +467,42 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0 dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1 dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
if(i & 1) if(m1 & 1)
{ {
if(tb.y < tb.w) if(tb.y < tb.w)
{ {
edge = v[1 - j]; edge = vertex[i[1 - m2]];
edge.p = edge.p.insert<0, 1>(v[j].p); edge.p = edge.p.insert<0, 1>(vertex[i[m2]].p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p); dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p); DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
} }
} }
else else
{ {
if(tb.x < tb.z) if(tb.x < tb.z)
{ {
edge = v[0]; edge = v0;
edge.p = edge.p.xxzw(); edge.p = edge.p.xxzw();
dedge.p = ddx[j].xyzw(dedge.p); dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v[0].p); DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
} }
if(tb.y < tb.w) if(tb.y < tb.w)
{ {
edge = v[1]; edge = v1;
edge.p = (v[0].p.xxxx() + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p); edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p); dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p); DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
} }
} }
Flush(v, dscan); Flush(vertex, index, dscan);
if(m_ds->HasEdge()) if(m_ds->HasEdge())
{ {
@ -434,14 +510,14 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 b = dx < GSVector4::zero(); // dx < 0 GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0 GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
int i = a.mask(); int orientation = a.mask();
int j = ((a | b) ^ c).mask() ^ 2; // evil int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge(v[0], v[1], dv[0], i & 1, j & 1); DrawEdge(v0, v1, dv[0], orientation & 1, side & 1);
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2); DrawEdge(v0, v2, dv[1], orientation & 2, side & 2);
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4); DrawEdge(v1, v2, dv[2], orientation & 4, side & 4);
Flush(v, GSVertexSW::zero(), true); Flush(vertex, index, GSVertexSW::zero(), true);
} }
} }
@ -452,7 +528,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 scissor = m_fscissor.xzxz(); GSVector4 scissor = m_fscissor_x;
top = FindMyNextScanline(top); top = FindMyNextScanline(top);
@ -493,18 +569,21 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
m_edge.count += e - &m_edge.buff[m_edge.count]; m_edge.count += e - &m_edge.buff[m_edge.count];
} }
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect) void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
{ {
const GSVertexSW& v0 = vertex[index[0]];
const GSVertexSW& v1 = vertex[index[1]];
GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero());
GSVertexSW v[2]; GSVertexSW v[2];
GSVector4 mask = (vertices[0].p < vertices[1].p).xyzw(GSVector4::zero()); v[0].p = v1.p.blend32(v0.p, mask);
v[0].t = v1.t.blend32(v0.t, mask);
v[0].c = v1.c;
v[0].p = vertices[1].p.blend32(vertices[0].p, mask); v[1].p = v0.p.blend32(v1.p, mask);
v[0].t = vertices[1].t.blend32(vertices[0].t, mask); v[1].t = v0.t.blend32(v1.t, mask);
v[0].c = vertices[1].c;
v[1].p = vertices[0].p.blend32(vertices[1].p, mask);
v[1].t = vertices[0].t.blend32(vertices[1].t, mask);
GSVector4i r(v[0].p.xyxy(v[1].p).ceil()); GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
@ -514,14 +593,31 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
GSVertexSW scan = v[0]; GSVertexSW scan = v[0];
if(solidrect) if(m_ds->IsSolidRect())
{ {
if(m_id == 0) if(m_threads == 1)
{ {
m_ds->DrawRect(r, scan); m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height(); m_pixels += r.width() * r.height();
} }
else
{
int top = FindMyNextScanline(r.top);
int bottom = r.bottom;
while(top < bottom)
{
r.top = top;
r.bottom = std::min<int>((top + (1 << THREAD_HEIGHT)) & ~((1 << THREAD_HEIGHT) - 1), bottom);
m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height();
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT);
}
}
return; return;
} }
@ -543,7 +639,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy(); if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx(); if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(v, dscan); m_ds->SetupPrim(vertex, index, dscan);
while(1) while(1)
{ {
@ -575,13 +671,12 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
if(orientation) if(orientation)
{ {
GSVector4 tbmax = lrtb.max(m_fscissor.yyyy()); GSVector4 tbf = v0.p.yyyy(v1.p).ceil(); // t t b b
GSVector4 tbmin = lrtb.min(m_fscissor.wwww()); GSVector4 tbmax = tbf.max(m_fscissor_y); // max(t, st) max(t, sb) max(b, st) max(b, sb)
GSVector4i tb = GSVector4i(tbmax.zwzw(tbmin)); GSVector4 tbmin = tbf.min(m_fscissor_y); // min(t, st) min(t, sb) min(b, st) min(b, sb)
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(t, st) max(b, sb) min(t, st) min(b, sb)
int top, bottom; int top, bottom;
@ -589,27 +684,27 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if((dv.p >= GSVector4::zero()).mask() & 2) if((dv.p >= GSVector4::zero()).mask() & 2)
{ {
top = tb.extract32<0>(); top = tb.extract32<0>(); // max(t, st)
bottom = tb.extract32<3>(); bottom = tb.extract32<3>(); // min(b, sb)
if(top >= bottom) return; if(top >= bottom) return;
edge = v0; edge = v0;
dedge = dv / dv.p.yyyy(); dedge = dv / dv.p.yyyy();
edge += dedge * (tbmax.zzzz() - edge.p.yyyy()); edge += dedge * (tbmax.xxxx() - edge.p.yyyy());
} }
else else
{ {
top = tb.extract32<1>(); top = tb.extract32<1>(); // max(b, st)
bottom = tb.extract32<2>(); bottom = tb.extract32<2>(); // min(t, sb)
if(top >= bottom) return; if(top >= bottom) return;
edge = v1; edge = v1;
dedge = dv / dv.p.yyyy(); dedge = dv / dv.p.yyyy();
edge += dedge * (tbmax.wwww() - edge.p.yyyy()); edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
} }
GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000); GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);
@ -664,9 +759,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
} }
else else
{ {
GSVector4 lrmax = lrtb.max(m_fscissor.xxxx()); GSVector4 lrf = v0.p.xxxx(v1.p).ceil(); // l l r r
GSVector4 lrmin = lrtb.min(m_fscissor.zzzz()); GSVector4 lrmax = lrf.max(m_fscissor_x); // max(l, sl) max(l, sr) max(r, sl) max(r, sr)
GSVector4i lr = GSVector4i(lrmax.xyxy(lrmin)); GSVector4 lrmin = lrf.min(m_fscissor_x); // min(l, sl) min(l, sr) min(r, sl) min(r, sr)
GSVector4i lr = GSVector4i(lrmax.xzyw(lrmin)); // max(l, sl) max(r, sl) min(l, sr) min(r, sr)
int left, right; int left, right;
@ -674,8 +770,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if((dv.p >= GSVector4::zero()).mask() & 1) if((dv.p >= GSVector4::zero()).mask() & 1)
{ {
left = lr.extract32<0>(); left = lr.extract32<0>(); // max(l, sl)
right = lr.extract32<3>(); right = lr.extract32<3>(); // min(r, sr)
if(left >= right) return; if(left >= right) return;
@ -686,15 +782,15 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
} }
else else
{ {
left = lr.extract32<1>(); left = lr.extract32<1>(); // max(r, sl)
right = lr.extract32<2>(); right = lr.extract32<2>(); // min(l, sr)
if(left >= right) return; if(left >= right) return;
edge = v1; edge = v1;
dedge = dv / dv.p.xxxx(); dedge = dv / dv.p.xxxx();
edge += dedge * (lrmax.yyyy() - edge.p.xxxx()); edge += dedge * (lrmax.zzzz() - edge.p.xxxx());
} }
GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000); GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);
@ -760,7 +856,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
e->p.i16[2] = (int16)top; e->p.i16[2] = (int16)top;
} }
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge) void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge)
{ {
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline) // TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
@ -768,7 +864,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
if(count > 0) if(count > 0)
{ {
m_ds->SetupPrim(vertices, dscan); m_ds->SetupPrim(vertex, index, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff; const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count; const GSVertexSW* RESTRICT ee = e + count;
@ -811,6 +907,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
GSRasterizerList::GSRasterizerList() GSRasterizerList::GSRasterizerList()
: GSJobQueue<shared_ptr<GSRasterizerData> >() : GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_sync_count(0) , m_sync_count(0)
, m_syncpoint_count(0)
{ {
} }
@ -847,26 +944,28 @@ void GSRasterizerList::Sync()
m_sync_count++; m_sync_count++;
} }
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item) int GSRasterizerList::GetPixels(bool reset)
{ {
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch? int pixels = 0;
{
for(size_t i = 0; i < m_workers.size(); i++) for(size_t i = 0; i < m_workers.size(); i++)
{ {
m_workers[i]->Wait(); pixels += m_workers[i]->GetPixels(reset);
} }
m_workers.front()->Process(item); return pixels;
}
return;
}
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
if(item->syncpoint) if(item->syncpoint)
{ {
for(size_t i = 0; i < m_workers.size(); i++) for(size_t i = 0; i < m_workers.size(); i++)
{ {
m_workers[i]->Wait(); m_workers[i]->Wait();
} }
m_syncpoint_count++;
} }
for(size_t i = 0; i < m_workers.size(); i++) for(size_t i = 0; i < m_workers.size(); i++)
@ -890,6 +989,11 @@ GSRasterizerList::GSWorker::~GSWorker()
delete m_r; delete m_r;
} }
int GSRasterizerList::GSWorker::GetPixels(bool reset)
{
return m_r->GetPixels(reset);
}
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item) void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
{ {
GSVector4i r = item->bbox.rintersect(item->scissor); GSVector4i r = item->bbox.rintersect(item->scissor);
@ -902,5 +1006,5 @@ void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item) void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{ {
m_r->Draw(item); m_r->Draw(item.get());
} }

View File

@ -34,45 +34,38 @@ public:
GSVector4i scissor; GSVector4i scissor;
GSVector4i bbox; GSVector4i bbox;
GS_PRIM_CLASS primclass; GS_PRIM_CLASS primclass;
GSVertexSW* vertices; uint8* buff;
int count; GSVertexSW* vertex;
bool solidrect; int vertex_count;
uint32* index;
int index_count;
bool syncpoint; bool syncpoint;
uint64 frame; uint64 frame;
void* param;
// drawing stats
volatile long ticks;
volatile long pixels;
GSRasterizerData() GSRasterizerData()
: scissor(GSVector4i::zero()) : scissor(GSVector4i::zero())
, bbox(GSVector4i::zero()) , bbox(GSVector4i::zero())
, primclass(GS_INVALID_CLASS) , primclass(GS_INVALID_CLASS)
, vertices(NULL) , buff(NULL)
, count(0) , vertex(NULL)
, solidrect(false) , vertex_count(0)
, index(NULL)
, index_count(0)
, syncpoint(false) , syncpoint(false)
, frame(0) , frame(0)
, param(NULL)
, ticks(0)
, pixels(0)
{ {
} }
virtual ~GSRasterizerData() virtual ~GSRasterizerData()
{ {
if(vertices != NULL) _aligned_free(vertices); if(buff != NULL) _aligned_free(buff);
// derived class should free param and its members
} }
}; };
class IDrawScanline : public GSAlignedClass<32> class IDrawScanline : public GSAlignedClass<32>
{ {
public: public:
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan); typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
@ -86,19 +79,19 @@ public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {} IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
virtual ~IDrawScanline() {} virtual ~IDrawScanline() {}
virtual void BeginDraw(const void* param) = 0; virtual void BeginDraw(const GSRasterizerData* data) = 0;
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0; virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
#ifdef ENABLE_JIT_RASTERIZER #ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);} __forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) {m_sp(vertex, index, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);} __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);} __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
#else #else
virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0; virtual void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) = 0;
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0; virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
@ -106,6 +99,7 @@ public:
#endif #endif
__forceinline bool HasEdge() const {return m_de != NULL;} __forceinline bool HasEdge() const {return m_de != NULL;}
__forceinline bool IsSolidRect() const {return m_dr != NULL;}
}; };
class IRasterizer : public GSAlignedClass<32> class IRasterizer : public GSAlignedClass<32>
@ -115,6 +109,7 @@ public:
virtual void Queue(shared_ptr<GSRasterizerData> data) = 0; virtual void Queue(shared_ptr<GSRasterizerData> data) = 0;
virtual void Sync() = 0; virtual void Sync() = 0;
virtual int GetPixels(bool reset = true) = 0;
}; };
__aligned(class, 32) GSRasterizer : public IRasterizer __aligned(class, 32) GSRasterizer : public IRasterizer
@ -126,24 +121,25 @@ protected:
int m_threads; int m_threads;
uint8* m_myscanline; uint8* m_myscanline;
GSVector4i m_scissor; GSVector4i m_scissor;
GSVector4 m_fscissor; GSVector4 m_fscissor_x;
GSVector4 m_fscissor_y;
struct {GSVertexSW* buff; int count;} m_edge; struct {GSVertexSW* buff; int count;} m_edge;
int m_pixels; int m_pixels;
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count); typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template<bool scissor_test> template<bool scissor_test>
void DrawPoint(const GSVertexSW* v, int count); void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
void DrawLine(const GSVertexSW* v); void DrawLine(const GSVertexSW* vertex, const uint32* index);
void DrawTriangle(const GSVertexSW* v); void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
void DrawSprite(const GSVertexSW* v, bool solidrect); void DrawSprite(const GSVertexSW* vertex, const uint32* index);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0); __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan); __forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false); __forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
public: public:
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
@ -153,12 +149,13 @@ public:
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const; __forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
__forceinline int FindMyNextScanline(int top) const; __forceinline int FindMyNextScanline(int top) const;
void Draw(shared_ptr<GSRasterizerData> data); void Draw(GSRasterizerData* data);
// IRasterizer // IRasterizer
void Queue(shared_ptr<GSRasterizerData> data); void Queue(shared_ptr<GSRasterizerData> data);
void Sync() {} void Sync() {}
int GetPixels(bool reset);
}; };
class GSRasterizerList class GSRasterizerList
@ -174,6 +171,8 @@ protected:
GSWorker(GSRasterizer* r); GSWorker(GSRasterizer* r);
virtual ~GSWorker(); virtual ~GSWorker();
int GetPixels(bool reset);
// GSJobQueue // GSJobQueue
void Push(const shared_ptr<GSRasterizerData>& item); void Push(const shared_ptr<GSRasterizerData>& item);
@ -213,9 +212,11 @@ public:
} }
int m_sync_count; int m_sync_count;
int m_syncpoint_count;
// IRasterizer // IRasterizer
void Queue(shared_ptr<GSRasterizerData> data); void Queue(shared_ptr<GSRasterizerData> data);
void Sync(); void Sync();
int GetPixels(bool reset);
}; };

View File

@ -22,9 +22,8 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSRenderer.h" #include "GSRenderer.h"
GSRenderer::GSRenderer() GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
: GSState() : GSState(vt, vertex_stride)
, m_vt(this)
, m_dev(NULL) , m_dev(NULL)
, m_shader(0) , m_shader(0)
, m_shift_key(false) , m_shift_key(false)
@ -80,8 +79,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
void GSRenderer::ResetDevice() void GSRenderer::ResetDevice()
{ {
ResetPrim();
if(m_dev) m_dev->Reset(1, 1); if(m_dev) m_dev->Reset(1, 1);
} }
@ -350,8 +347,16 @@ void GSRenderer::VSync(int field)
if(fillrate > 0) if(fillrate > 0)
{ {
s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024));
int sum = 0;
for(int i = 0; i < 16; i++)
{
sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
} }
s += format(" | %d%% CPU", sum);
}
} }
else else
{ {
@ -528,7 +533,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
return; return;
case VK_F7: case VK_F7:
m_shader = (m_shader + 3 + step) % 3; m_shader = (m_shader + 3 + step) % 3;
printf("GSdx: Set shader %d (%s).\n", (int)m_shader); printf("GSdx: Set shader %d.\n", (int)m_shader);
return; return;
case VK_DELETE: case VK_DELETE:
m_aa1 = !m_aa1; m_aa1 = !m_aa1;
@ -602,308 +607,3 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
} }
#endif #endif
} }
void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
{
int tw = TEX0.TW;
int th = TEX0.TH;
int w = 1 << tw;
int h = 1 << th;
GSVector4i tr(0, 0, w, h);
int wms = CLAMP.WMS;
int wmt = CLAMP.WMT;
int minu = (int)CLAMP.MINU;
int minv = (int)CLAMP.MINV;
int maxu = (int)CLAMP.MAXU;
int maxv = (int)CLAMP.MAXV;
GSVector4i vr = tr;
switch(wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.x < minu) vr.x = minu;
if(vr.z > maxu + 1) vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.y < minv) vr.y = minv;
if(vr.w > maxv + 1) vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
if(wms + wmt < 6)
{
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if(linear)
{
st += GSVector4(-0x8000, 0x8000).xxyy();
}
GSVector4i uv = GSVector4i(st).sra32(16);
GSVector4i u, v;
int mask = 0;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
uv = uv.rintersect(tr);
switch(wms)
{
case CLAMP_REPEAT:
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
}
r = vr.rintersect(tr);
}
void GSRenderer::GetAlphaMinMax()
{
if(m_vt.m_alpha.valid)
{
return;
}
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if(PRIM->TME && context->TEX0.TCC)
{
switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
{
case 0:
a.y = 0;
a.w = 0xff;
break;
case 1:
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
break;
case 2:
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
break;
case 3:
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
break;
default:
__assume(0);
}
switch(context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vt.m_alpha.min = a.x;
m_vt.m_alpha.max = a.z;
m_vt.m_alpha.valid = true;
}
bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm)
{
const GSDrawingContext* context = m_context;
bool pass = true;
if(context->TEST.ATST == ATST_NEVER)
{
pass = false;
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GetAlphaMinMax();
int amin = m_vt.m_alpha.min;
int amax = m_vt.m_alpha.max;
int aref = context->TEST.AREF;
switch(context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if(amax < aref) pass = true;
else if(amin >= aref) pass = false;
else return false;
break;
case ATST_LEQUAL:
if(amax <= aref) pass = true;
else if(amin > aref) pass = false;
else return false;
break;
case ATST_EQUAL:
if(amin == aref && amax == aref) pass = true;
else if(amin > aref || amax < aref) pass = false;
else return false;
break;
case ATST_GEQUAL:
if(amin >= aref) pass = true;
else if(amax < aref) pass = false;
else return false;
break;
case ATST_GREATER:
if(amin > aref) pass = true;
else if(amax <= aref) pass = false;
else return false;
break;
case ATST_NOTEQUAL:
if(amin == aref && amax == aref) pass = false;
else if(amin > aref || amax < aref) pass = true;
else return false;
break;
default:
__assume(0);
}
}
if(!pass)
{
switch(context->TEST.AFAIL)
{
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
default: __assume(0);
}
}
return true;
}
bool GSRenderer::IsOpaque()
{
if(PRIM->AA1)
{
return false;
}
if(!PRIM->ABE)
{
return true;
}
const GSDrawingContext* context = m_context;
int amin = 0, amax = 0xff;
if(context->ALPHA.A != context->ALPHA.B)
{
if(context->ALPHA.C == 0)
{
GetAlphaMinMax();
amin = m_vt.m_alpha.min;
amax = m_vt.m_alpha.max;
}
else if(context->ALPHA.C == 1)
{
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
{
amin = amax = 0x80;
}
}
else if(context->ALPHA.C == 2)
{
amin = amax = context->ALPHA.FIX;
}
}
return context->ALPHA.IsOpaque(amin, amax);
}

View File

@ -24,8 +24,6 @@
#include "GSdx.h" #include "GSdx.h"
#include "GSWnd.h" #include "GSWnd.h"
#include "GSState.h" #include "GSState.h"
#include "GSVertexTrace.h"
#include "GSVertexList.h"
#include "GSCapture.h" #include "GSCapture.h"
class GSRenderer : public GSState class GSRenderer : public GSState
@ -53,15 +51,6 @@ protected:
virtual GSTexture* GetOutput(int i) = 0; virtual GSTexture* GetOutput(int i) = 0;
GSVertexTrace m_vt;
// following functions need m_vt to be initialized
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public: public:
GSWnd m_wnd; GSWnd m_wnd;
GSDevice* m_dev; GSDevice* m_dev;
@ -71,10 +60,9 @@ public:
bool s_save; bool s_save;
bool s_savez; bool s_savez;
int s_saven; int s_saven;
GSCritSec s_lock;
public: public:
GSRenderer(); GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
virtual ~GSRenderer(); virtual ~GSRenderer();
virtual bool CreateWnd(const string& title, int w, int h); virtual bool CreateWnd(const string& title, int w, int h);
@ -98,156 +86,3 @@ public:
char m_GStitleInfoBuffer[128]; char m_GStitleInfoBuffer[128];
}; };
template<class Vertex> class GSRendererT : public GSRenderer
{
protected:
Vertex* m_vertices;
int m_count;
int m_maxcount;
GSVertexList<Vertex> m_vl;
void Reset()
{
m_count = 0;
m_vl.RemoveAll();
GSRenderer::Reset();
}
void ResetPrim()
{
m_vl.RemoveAll();
}
void FlushPrim()
{
if(m_count == 0) return;
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
{
// FIXME: berserk fpsm = 27 (8H)
if(!m_dev->IsLost())
{
m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM));
Draw();
}
m_perfmon.Put(GSPerfMon::Draw, 1);
}
m_count = 0;
}
void GrowVertexBuffer()
{
int maxcount = std::max<int>(m_maxcount * 3 / 2, 10000);
Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * maxcount, 16);
if(m_vertices != NULL)
{
memcpy(vertices, m_vertices, sizeof(Vertex) * m_maxcount);
_aligned_free(m_vertices);
}
m_vertices = vertices;
m_maxcount = maxcount - 100;
}
// Returns a pointer to the drawing vertex. Can return NULL!
template<uint32 prim> __forceinline Vertex* DrawingKick(bool skip, int& count)
{
switch(prim)
{
case GS_POINTLIST: count = 1; break;
case GS_LINELIST: count = 2; break;
case GS_LINESTRIP: count = 2; break;
case GS_TRIANGLELIST: count = 3; break;
case GS_TRIANGLESTRIP: count = 3; break;
case GS_TRIANGLEFAN: count = 3; break;
case GS_SPRITE: count = 2; break;
case GS_INVALID: count = 1; break;
default: __assume(0);
}
if(m_vl.GetCount() < count)
{
return NULL;
}
if(m_count >= m_maxcount)
{
GrowVertexBuffer();
}
Vertex* v = &m_vertices[m_count];
switch(prim)
{
case GS_POINTLIST:
m_vl.GetAt(0, v[0]);
m_vl.RemoveAll();
break;
case GS_LINELIST:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GS_LINESTRIP:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAt(0, 1);
break;
case GS_TRIANGLELIST:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAll();
break;
case GS_TRIANGLESTRIP:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAt(0, 2);
break;
case GS_TRIANGLEFAN:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAt(1, 1);
break;
case GS_SPRITE:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GS_INVALID:
ASSERT(0);
m_vl.RemoveAll();
return NULL;
default:
__assume(0);
}
return !skip ? v : NULL;
}
virtual void Draw() = 0;
public:
GSRendererT()
: GSRenderer()
, m_vertices(NULL)
, m_count(0)
, m_maxcount(0)
{
}
virtual ~GSRendererT()
{
if(m_vertices) _aligned_free(m_vertices);
}
};

View File

@ -0,0 +1,426 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSRendererCS.h"
GSRendererCS::GSRendererCS()
: GSRenderer(new GSVertexTraceCS(this), sizeof(GSVertex))
{
m_nativeres = true;
InitConvertVertex(GSRendererCS);
memset(m_vm_valid, 0, sizeof(m_vm_valid));
}
GSRendererCS::~GSRendererCS()
{
}
bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
{
if(!__super::CreateDevice(dev_unk))
return false;
D3D_FEATURE_LEVEL level;
((GSDeviceDX*)dev_unk)->GetFeatureLevel(level);
if(level < D3D_FEATURE_LEVEL_10_0)
return false;
HRESULT hr;
GSDevice11* dev = (GSDevice11*)dev_unk;
D3D11_BUFFER_DESC bd;
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
// video memory (4MB)
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = 4 * 1024 * 1024;
bd.StructureByteStride = 4;
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vm);
if(FAILED(hr)) return false;
memset(&uavd, 0, sizeof(uavd));
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
uavd.Buffer.FirstElement = 0;
uavd.Buffer.NumElements = 1024 * 1024;
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
if(FAILED(hr)) return false;
// vertex buffer
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(GSVertex) * 10000;
bd.StructureByteStride = sizeof(GSVertex);
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vb);
if(FAILED(hr)) return false;
// index buffer
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(uint32) * 10000 * 3;
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return false;
// one page, for copying between cpu<->gpu
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = PAGE_SIZE;
bd.Usage = D3D11_USAGE_STAGING;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
hr = (*dev)->CreateBuffer(&bd, NULL, &m_pb);
if(FAILED(hr)) return false;
return true;
}
GSTexture* GSRendererCS::GetOutput(int i)
{
// TODO: create a compute shader which unswizzles the frame from m_vm to the output texture
return NULL;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index)
{
// TODO: vertex format more fitting as the input for the compute shader
if(src_index != dst_index)
{
GSVertex v = ((GSVertex*)m_vertex.buff)[src_index];
((GSVertex*)m_vertex.buff)[dst_index] = v;
}
}
void GSRendererCS::Draw()
{
HRESULT hr;
GSDevice11* dev = (GSDevice11*)m_dev;
ID3D11DeviceContext* ctx = *dev;
D3D11_BUFFER_DESC bd;
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
D3D11_MAPPED_SUBRESOURCE map;
CComPtr<ID3D11ShaderResourceView> vb_srv;
CComPtr<ID3D11ShaderResourceView> ib_srv;
// TODO: cache these in hash_maps
CComPtr<ID3D11Buffer> fbr, fbc, zbr, zbc;
CComPtr<ID3D11ShaderResourceView> fbr_srv, fbc_srv, zbr_srv, zbc_srv;
// TODO: grow m_vb, m_ib if needed
if(m_vertex.next > 10000) return;
if(m_index.tail > 30000) return;
// TODO: fill/advance/discardwhenfull, as in GSDevice11::IASetVertexBuffer/IASetIndexBuffer
hr = ctx->Map(m_vb, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
if(FAILED(hr)) return;
memcpy(map.pData, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
ctx->Unmap(m_vb, 0);
//
hr = ctx->Map(m_ib, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
if(FAILED(hr)) return;
memcpy(map.pData, m_index.buff, sizeof(uint32) * m_index.tail);
ctx->Unmap(m_ib, 0);
// TODO: UpdateResource might be faster, based on my exprience with the real vertex buffer, write-no-overwrite/discarded dynamic buffer + map is better
//
memset(&srvd, 0, sizeof(srvd));
srvd.Format = DXGI_FORMAT_UNKNOWN;
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvd.Buffer.FirstElement = 0;
srvd.Buffer.NumElements = m_vertex.next;
hr = (*dev)->CreateShaderResourceView(m_vb, &srvd, &vb_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
memset(&srvd, 0, sizeof(srvd));
srvd.Format = DXGI_FORMAT_R32_UINT;
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvd.Buffer.FirstElement = 0;
srvd.Buffer.NumElements = m_index.tail;
hr = (*dev)->CreateShaderResourceView(m_ib, &srvd, &ib_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
// fzb offsets
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(int) * 4096;
bd.StructureByteStride = sizeof(int);
bd.Usage = D3D11_USAGE_IMMUTABLE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
D3D11_SUBRESOURCE_DATA data;
memset(&data, 0, sizeof(data));
data.pSysMem = m_context->offset.fb->pixel.row;
hr = (*dev)->CreateBuffer(&bd, &data, &fbr);
data.pSysMem = m_context->offset.fb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
hr = (*dev)->CreateBuffer(&bd, &data, &fbc);
data.pSysMem = m_context->offset.zb->pixel.row;
hr = (*dev)->CreateBuffer(&bd, &data, &zbr);
data.pSysMem = m_context->offset.zb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
hr = (*dev)->CreateBuffer(&bd, &data, &zbc);
// TODO: D3D10_SHADER_MACRO (primclass, less frequently changing drawing attribs, etc.)
uint32 sel = 0; // TODO
hash_map<uint32, CComPtr<ID3D11ComputeShader> >::iterator i = m_cs.find(sel);
CComPtr<ID3D11ComputeShader> cs;
if(i == m_cs.end())
{
// hr = dev->CompileShader(IDR_CS_FX, "cs_main", NULL, &cs);
hr = dev->CompileShader("E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\cs.fx", "cs_main", NULL, &cs);
if(FAILED(hr)) return;
m_cs[sel] = cs;
}
else
{
cs = i->second;
}
//
dev->CSSetShaderUAV(0, m_vm_uav);
dev->CSSetShaderSRV(0, vb_srv);
dev->CSSetShaderSRV(1, ib_srv);
dev->CSSetShaderSRV(2, fbr_srv);
dev->CSSetShaderSRV(3, fbc_srv);
dev->CSSetShaderSRV(4, zbr_srv);
dev->CSSetShaderSRV(5, zbc_srv);
dev->CSSetShader(cs);
GSVector4i bbox = GSVector4i(0, 0, 640, 512); // TODO: vertex trace
GSVector4i r = bbox.ralign<Align_Outside>(GSVector2i(16, 8));
bool fb = true; // TODO: frame buffer used
bool zb = true; // TODO: z-buffer used
if(fb) Write(m_context->offset.fb, r);
if(zb) Write(m_context->offset.zb, r);
// TODO: constant buffer (frequently chaning drawing attribs)
// TODO: texture (implement texture cache)
// TODO: clut to a palette texture (should be texture1d, not simply buffer, it is random accessed)
// TODO: CSSetShaderSRV(6 7 8 ..., texture level 0 1 2 ...) or use Texture3D?
// TODO: invalidate texture cache
/*
CComPtr<ID3D11Query> q;
D3D11_QUERY_DESC qd;
memset(&qd, 0, sizeof(qd));
qd.Query = D3D11_QUERY_EVENT;
hr = (*dev)->CreateQuery(&qd, &q);
ctx->Begin(q);
*/
printf("[%lld] dispatch %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
__rdtsc(),
m_context->FRAME.Block(), m_context->FRAME.PSM,
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
PRIM->PRIM, m_vertex.next, m_index.tail);
GSVector4i rsize = r.rsize();
dev->Dispatch(rsize.z >> 4, rsize.w >> 3, 1); // TODO: pass upper-left corner offset (r.xy) in a const buffer
/*
ctx->End(q);
uint64 t0 = __rdtsc();
BOOL b;
while(S_OK != ctx->GetData(q, &b, sizeof(BOOL), 0)) {}
printf("%lld\n", __rdtsc() - t0);
*/
}
void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
Read(o, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated
// TODO: false deps, 8H/4HL/4HH texture sharing pages with 24-bit target
// TODO: invalidate texture cache
}
void GSRendererCS::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
Read(o, r, false);
}
void GSRendererCS::Write(GSOffset* o, const GSVector4i& r)
{
GSDevice11* dev = (GSDevice11*)m_dev;
ID3D11DeviceContext* ctx = *dev;
D3D11_BOX box;
memset(&box, 0, sizeof(box));
uint32* pages = o->GetPages(r);
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
{
uint32 page = pages[i];
uint32 row = page >> 5;
uint32 col = 1 << (page & 31);
if((m_vm_valid[row] & col) == 0)
{
m_vm_valid[row] |= col;
box.left = page * PAGE_SIZE;
box.right = box.left + PAGE_SIZE;
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + box.left, 0, 0);
printf("[%lld] write %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
}
}
delete [] pages;
}
void GSRendererCS::Read(GSOffset* o, const GSVector4i& r, bool invalidate)
{
GSDevice11* dev = (GSDevice11*)m_dev;
ID3D11DeviceContext* ctx = *dev;
D3D11_BOX box;
memset(&box, 0, sizeof(box));
uint32* pages = o->GetPages(r);
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
{
uint32 page = pages[i];
uint32 row = page >> 5;
uint32 col = 1 << (page & 31);
if(m_vm_valid[row] & col)
{
if(invalidate) m_vm_valid[row] ^= col;
box.left = page * PAGE_SIZE;
box.right = box.left + PAGE_SIZE;
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
D3D11_MAPPED_SUBRESOURCE map;
if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ_WRITE, 0, &map)))
{
memcpy(m_mem.m_vm8 + box.left, map.pData, PAGE_SIZE);
ctx->Unmap(m_pb, 0);
printf("[%lld] read %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
}
}
}
delete [] pages;
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSRenderer.h"
#include "GSDevice11.h"
class GSRendererCS : public GSRenderer
{
class GSVertexTraceCS : public GSVertexTrace
{
public:
GSVertexTraceCS(const GSState* state) : GSVertexTrace(state) {}
};
CComPtr<ID3D11Buffer> m_vm;
CComPtr<ID3D11UnorderedAccessView> m_vm_uav;
CComPtr<ID3D11Buffer> m_vb;
CComPtr<ID3D11Buffer> m_ib;
CComPtr<ID3D11Buffer> m_pb;
hash_map<uint32, CComPtr<ID3D11ComputeShader> > m_cs;
uint32 m_vm_valid[16];
void Write(GSOffset* o, const GSVector4i& r);
void Read(GSOffset* o, const GSVector4i& r, bool invalidate);
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
bool CreateDevice(GSDevice* dev);
GSTexture* GetOutput(int i);
void Draw();
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut);
public:
GSRendererCS();
virtual ~GSRendererCS();
};

View File

@ -21,3 +21,411 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSRendererDX.h" #include "GSRendererDX.h"
#include "GSDeviceDX.h"
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
: GSRendererHW(vt, vertex_stride, tc)
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
}
GSRendererDX::~GSRendererDX()
{
}
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture* rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceDX::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceDX::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceDX::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt->m_max.p.z > 0xffffff)
{
ASSERT(m_vt->m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt->m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt->m_max.p.z > 0xffff)
{
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt->m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// gs
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt->m_primclass;
// ps
GSDeviceDX::PSSelector ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawIndexedPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawIndexedPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawIndexedPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawIndexedPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}

View File

@ -23,8 +23,7 @@
#include "GSRendererHW.h" #include "GSRendererHW.h"
template<class Vertex> class GSRendererDX : public GSRendererHW
class GSRendererDX : public GSRendererHW<Vertex>
{ {
GSVector2 m_pixelcenter; GSVector2 m_pixelcenter;
bool m_logz; bool m_logz;
@ -35,413 +34,11 @@ class GSRendererDX : public GSRendererHW<Vertex>
protected: protected:
int m_topology; int m_topology;
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
virtual void UpdateFBA(GSTexture* rt) {} virtual void UpdateFBA(GSTexture* rt) {}
public: public:
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0)) GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
: GSRendererHW<Vertex>(tc) virtual ~GSRendererDX();
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
}
virtual ~GSRendererDX()
{
}
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture *rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceDX::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceDX::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceDX::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt.m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt.m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// gs
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt.m_primclass;
// ps
GSDeviceDX::PSSelector ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertices, m_count, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}
}; };

View File

@ -25,9 +25,9 @@
#include "resource.h" #include "resource.h"
GSRendererDX11::GSRendererDX11() GSRendererDX11::GSRendererDX11()
: GSRendererDX<GSVertexHW11>(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f)) : GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
{ {
InitVertexKick(GSRendererDX11); InitConvertVertex(GSRendererDX11);
} }
bool GSRendererDX11::CreateDevice(GSDevice* dev) bool GSRendererDX11::CreateDevice(GSDevice* dev)
@ -39,201 +39,42 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
} }
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::VertexKick(bool skip) void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
{ {
GSVertexHW11& dst = m_vl.AddTail(); GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
dst = *(GSVertexHW11*)&m_v; GSVector4i v0 = ((GSVector4i*)s)[0];
GSVector4i v1 = ((GSVector4i*)s)[1];
#ifdef ENABLE_UPSCALE_HACKS
if(tme && fst) if(tme && fst)
{ {
//GSVector4::storel(&dst.ST, m_v.GetUV()); // TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
int Udiff = 0; v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
int Vdiff = 0;
int Uadjust = 0;
int Vadjust = 0;
int multiplier = GetUpscaleMultiplier();
if(multiplier > 1)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
} }
if(Vdiff != 0) ((GSVector4i*)d)[0] = v0;
{ ((GSVector4i*)d)[1] = v1;
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.ST.S = (float)m_v.UV.U - Uadjust;
dst.ST.T = (float)m_v.UV.V - Vadjust;
}
else if(tme)
{
// Wip :p
//dst.XYZ.X += 5;
//dst.XYZ.Y += 5;
}
#else
if(tme && fst)
{
GSVector4::storel(&dst.ST, m_v.GetUV());
}
#endif
int count = 0;
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
{
GSVector4i scissor = m_context->scissor.dx10;
GSVector4i pmin, pmax;
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2;
switch(prim)
{
case GS_POINTLIST:
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
pmin = v0;
pmax = v0;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
pmin = v0.min_u16(v1).upl16();
pmax = v0.max_u16(v1).upl16();
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
v2 = GSVector4i::load((int)v[2].p.xy);
pmin = v0.min_u16(v1).min_u16(v2).upl16();
pmax = v0.max_u16(v1).max_u16(v2).upl16();
break;
}
#else
switch(prim)
{
case GS_POINTLIST:
pmin.x = v[0].p.x;
pmin.y = v[0].p.y;
pmax.x = v[0].p.x;
pmax.y = v[0].p.y;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
break;
}
#endif
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 0xff)
{
return;
}
m_count += count;
}
} }
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{ {
switch(m_vt.m_primclass) switch(m_vt->m_primclass)
{ {
case GS_POINT_CLASS: case GS_POINT_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break; break;
case GS_LINE_CLASS: case GS_LINE_CLASS:
case GS_SPRITE_CLASS: case GS_SPRITE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
break; break;
case GS_TRIANGLE_CLASS: case GS_TRIANGLE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
break; break;
default: default:
__assume(0); __assume(0);
} }
__super::Draw(rt, ds, tex); __super::DrawPrims(rt, ds, tex);
} }

View File

@ -25,16 +25,21 @@
#include "GSVertexHW.h" #include "GSVertexHW.h"
#include "GSTextureCache11.h" #include "GSTextureCache11.h"
class GSRendererDX11 : public GSRendererDX<GSVertexHW11> class GSRendererDX11 : public GSRendererDX
{ {
protected: protected:
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
public: public:
GSRendererDX11(); GSRendererDX11();
virtual ~GSRendererDX11() {} virtual ~GSRendererDX11() {}
bool CreateDevice(GSDevice* dev); bool CreateDevice(GSDevice* dev);
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
}; };

View File

@ -25,9 +25,9 @@
#include "resource.h" #include "resource.h"
GSRendererDX9::GSRendererDX9() GSRendererDX9::GSRendererDX9()
: GSRendererDX<GSVertexHW9>(new GSTextureCache9(this)) : GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
{ {
InitVertexKick(GSRendererDX9); InitConvertVertex(GSRendererDX9);
} }
bool GSRendererDX9::CreateDevice(GSDevice* dev) bool GSRendererDX9::CreateDevice(GSDevice* dev)
@ -58,210 +58,150 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
} }
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::VertexKick(bool skip) void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
{ {
GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
if(tme && !fst) if(tme && !fst)
{ {
p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q)); p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
} }
else else
{ {
p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
} }
GSVertexHW9& dst = m_vl.AddTail(); GSVector4 t = GSVector4::zero();
dst.p = p;
int Uadjust = 0;
int Vadjust = 0;
if(tme) if(tme)
{ {
if(fst) if(fst)
{ {
dst.t = m_v.GetUV(); t = GSVector4(GSVector4i::load(s->UV).upl16());
#ifdef ENABLE_UPSCALE_HACKS
int Udiff = 0;
int Vdiff = 0;
int multiplier = GetUpscaleMultiplier();
if(multiplier > 1)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.t.x -= (float) Uadjust;
dst.t.y -= (float) Vadjust;
#endif
} }
else else
{ {
dst.t = GSVector4::loadl(&m_v.ST); t = GSVector4::loadl(&s->ST);
} }
} }
dst._c0() = m_v.RGBAQ.u32[0]; t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
dst._c1() = m_v.FOG.u32[1];
// d->p = p;
d->t = t;
// BaseDrawingKick can never return NULL here because the DrawingKick function
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
// condition where this function would return NULL).
int count = 0;
if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
{
GSVector4 scissor = m_context->scissor.dx9;
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 3)
{
return;
}
switch(prim)
{
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();}
break;
case GS_SPRITE:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
v[0].p.z = v[1].p.z;
v[0].p.w = v[1].p.w;
v[0]._c1() = v[1]._c1();
v[2] = v[1];
v[3] = v[1];
v[1].p.y = v[0].p.y;
v[1].t.y = v[0].t.y;
v[2].p.x = v[0].p.x;
v[2].t.x = v[0].t.x;
v[4] = v[1];
v[5] = v[2];
count += 4;
break;
}
m_count += count;
}
} }
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{ {
switch(m_vt.m_primclass) switch(m_vt->m_primclass)
{ {
case GS_POINT_CLASS: case GS_POINT_CLASS:
m_topology = D3DPT_POINTLIST; m_topology = D3DPT_POINTLIST;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break; break;
case GS_LINE_CLASS: case GS_LINE_CLASS:
m_topology = D3DPT_LINELIST; m_topology = D3DPT_LINELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
if(PRIM->IIP == 0)
{
for(size_t i = 0, j = m_index.tail; i < j; i += 2)
{
uint32 tmp = m_index.buff[i + 0];
m_index.buff[i + 0] = m_index.buff[i + 1];
m_index.buff[i + 1] = tmp;
}
}
break; break;
case GS_TRIANGLE_CLASS: case GS_TRIANGLE_CLASS:
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST; m_topology = D3DPT_TRIANGLELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
if(PRIM->IIP == 0)
{
for(size_t i = 0, j = m_index.tail; i < j; i += 3)
{
uint32 tmp = m_index.buff[i + 0];
m_index.buff[i + 0] = m_index.buff[i + 2];
m_index.buff[i + 2] = tmp;
}
}
break; break;
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
// each sprite converted to quad needs twice the space
while(m_vertex.tail * 2 > m_vertex.maxcount)
{
GrowVertexBuffer();
}
// assume vertices are tightly packed and sequentially indexed (it should be the case)
if(m_vertex.next >= 2)
{
size_t count = m_vertex.next;
int i = (int)count * 2 - 4;
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
GSVertexHW9 v0 = s[0];
GSVertexHW9 v1 = s[1];
v0.p = v0.p.xyzw(v1.p); // z, q
v0.t = v0.t.xyzw(v1.t); // c, f
q[0] = v0;
q[3] = v1;
// swap x, s
GSVector4 p = v0.p.insert<0, 0>(v1.p);
GSVector4 t = v0.t.insert<0, 0>(v1.t);
v1.p = v1.p.insert<0, 0>(v0.p);
v1.t = v1.t.insert<0, 0>(v0.t);
v0.p = p;
v0.t = t;
q[1] = v0;
q[2] = v1;
index[0] = i + 0;
index[1] = i + 1;
index[2] = i + 2;
index[3] = i + 1;
index[4] = i + 2;
index[5] = i + 3;
}
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
m_index.tail = count * 3;
}
break;
default: default:
__assume(0); __assume(0);
} }
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO (*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
__super::Draw(rt, ds, tex); __super::DrawPrims(rt, ds, tex);
} }
void GSRendererDX9::UpdateFBA(GSTexture* rt) void GSRendererDX9::UpdateFBA(GSTexture* rt)
@ -280,7 +220,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx(); GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] = GSVertexPT1 vertices[] =

View File

@ -25,7 +25,7 @@
#include "GSVertexHW.h" #include "GSVertexHW.h"
#include "GSTextureCache9.h" #include "GSTextureCache9.h"
class GSRendererDX9 : public GSRendererDX<GSVertexHW9> class GSRendererDX9 : public GSRendererDX
{ {
protected: protected:
struct struct
@ -34,14 +34,20 @@ protected:
Direct3DBlendState9 bs; Direct3DBlendState9 bs;
} m_fba; } m_fba;
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void UpdateFBA(GSTexture* rt); void UpdateFBA(GSTexture* rt);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
public: public:
GSRendererDX9(); GSRendererDX9();
virtual ~GSRendererDX9() {} virtual ~GSRendererDX9() {}
bool CreateDevice(GSDevice* dev); bool CreateDevice(GSDevice* dev);
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
}; };

View File

@ -21,3 +21,912 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSRendererHW.h" #include "GSRendererHW.h"
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
: GSRenderer(vt, vertex_stride)
, m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
, m_reset(false)
, m_upscale_multiplier(1)
{
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
if(!m_nativeres)
{
m_width = theApp.GetConfig("resx", m_width);
m_height = theApp.GetConfig("resy", m_height);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
if(m_upscale_multiplier > 6)
{
m_upscale_multiplier = 1; // use the normal upscale math
}
else if(m_upscale_multiplier > 1)
{
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else
{
m_upscale_multiplier = 1;
}
}
GSRendererHW::~GSRendererHW()
{
delete m_tc;
}
void GSRendererHW::SetGameCRC(uint32 crc, int options)
{
GSRenderer::SetGameCRC(crc, options);
m_hacks.SetGameCRC(m_game);
if(m_game.title == CRC::JackieChanAdv)
{
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
}
}
bool GSRendererHW::CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GSRendererHW::GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
void GSRendererHW::Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRenderer::Reset();
}
void GSRendererHW::VSync(int field)
{
GSRenderer::VSync(field);
m_tc->IncAge();
m_dev->AgePool();
m_skip = 0;
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void GSRendererHW::ResetDevice()
{
m_tc->RemoveAll();
GSRenderer::ResetDevice();
}
GSTexture* GSRendererHW::GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
{
t = rt->m_texture;
if(s_dump)
{
if(s_save && s_n >= s_saven)
{
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
}
return t;
}
void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void GSRendererHW::Draw()
{
if(m_dev->IsLost()) return;
#ifndef DISABLE_CRC_HACKS
if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
{
ASSERT(0);
return;
}
GSTextureCache::Source* tex = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) return;
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven && tex)
{
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
tex->m_texture->Save(s, true);
if(tex->m_palette)
{
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
tex->m_palette->Save(s, true);
}
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
{
return;
}
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(GSRenderer::TryAlphaTest(fm, zm))
{
context->TEST.ATST = ATST_ALWAYS;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
DrawPrims(rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
}
//
if(m_hacks.m_oo)
{
(this->*m_hacks.m_oo)();
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}
// hacks
GSRendererHW::Hacks::Hacks()
: m_oi_map(m_oi_list)
, m_oo_map(m_oo_list)
, m_cu_map(m_cu_list)
, m_oi(NULL)
, m_oo(NULL)
, m_cu(NULL)
{
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
}
void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
{
uint32 hash = (uint32)((game.region << 24) | game.title);
m_oi = m_oi_map[hash];
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
}
bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
static uint32* video = NULL;
static size_t lines = 0;
if(lines == 0)
{
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
{
lines = m_vertex.next / 2;
}
}
else
{
if(m_vt->m_primclass == GS_POINT_CLASS)
{
if(m_vertex.next >= 16 * 512)
{
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
if(!video) video = new uint32[512 * 512];
int ox = m_context->XYOFFSET.OFX;
int oy = m_context->XYOFFSET.OFY;
const uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{
int x = (GetPosX(v) - ox) >> 4;
int y = (GetPosY(v) - oy) >> 4;
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
}
return false;
}
else
{
lines = 0;
}
}
else if(m_vt->m_primclass == GS_LINE_CLASS)
{
if(m_vertex.next == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
m_dev->Recycle(t->m_texture);
t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
size_t stride = m_vertex.stride;
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
m_index.buff[0] = 0;
m_index.buff[1] = 1;
m_index.buff[2] = 2;
m_index.buff[3] = 1;
m_index.buff[4] = 2;
m_index.buff[5] = 3;
m_vertex.head = m_vertex.tail = m_vertex.next = 4;
m_index.tail = 6;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
}
else
{
lines = 0;
}
}
}
return true;
}
bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
uint32 TBP = m_context->TEX0.TBP0;
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
// missing red channel fix (looks alright in pcsx2 r5000+)
uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 r = (c >> 0) & 0xff;
uint32 g = (c >> 8) & 0xff;
uint32 b = (c >> 16) & 0xff;
if(r == 0 && g != 0 && b != 0)
{
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
}
}
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
return true;
}
bool GSRendererHW::OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
{
// z buffer clear
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
bool GSRendererHW::OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
{
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
// how? by using a render target that overlaps with the lower half of the z buffer...
// TODO: tony hawk pro skater 4 same problem, the empty half is not visible though, painted over fully
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(!PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
if(FBP == 0x008c0 && ZBP == 0x01a40)
{
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
GIFRegTEX0 TEX0;
TEX0.TBP0 = ZBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->FRAME.PSM;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
{
m_dev->ClearRenderTarget(rt->m_texture, 0);
}
return false;
}
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->ZBUF.PSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
}
return true;
}
bool GSRendererHW::OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
{
//only top half of the screen clears
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_vertex.next == 16)
{
uint8* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 16; i++, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else if(m_vertex.next == 256)
{
uint8* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 256; i++, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else
{
ASSERT(0);
}
}
}
return true;
}
void GSRendererHW::OO_DBZBT2()
{
// palette readback (cannot detect yet, when fetching the texture later)
uint32 FBP = m_context->FRAME.Block();
uint32 TBP0 = m_context->TEX0.TBP0;
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
}
}
void GSRendererHW::OO_MajokkoALaMode2()
{
// palette readback
uint32 FBP = m_context->FRAME.Block();
if(!PRIM->TME && FBP == 0x03f40)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
}
}
bool GSRendererHW::CU_DBZBT2()
{
// palette should stay 64 x 64
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03c00 && FBP != 0x03ac0;
}
bool GSRendererHW::CU_MajokkoALaMode2()
{
// palette should stay 16 x 16
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03f40;
}
bool GSRendererHW::CU_TalesOfAbyss()
{
// full image blur and brightening
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
}

View File

@ -26,34 +26,13 @@
#include "GSCrc.h" #include "GSCrc.h"
#include "GSFunctionMap.h" #include "GSFunctionMap.h"
class GSRendererHW : public GSRenderer
template<class Vertex>
class GSRendererHW : public GSRendererT<Vertex>
{ {
protected:
using GSRendererT<Vertex>::m_vt;
using GSRendererT<Vertex>::m_count;
using GSRendererT<Vertex>::m_env;
using GSRendererT<Vertex>::m_context;
using GSRendererT<Vertex>::m_vertices;
using GSRendererT<Vertex>::m_dev;
using GSRendererT<Vertex>::PRIM;
using GSRendererT<Vertex>::m_mem;
using GSRendererT<Vertex>::m_regs;
using GSRendererT<Vertex>::m_perfmon;
using GSRendererT<Vertex>::m_game;
using GSRendererT<Vertex>::s_dump;
using GSRendererT<Vertex>::s_save;
using GSRendererT<Vertex>::s_saven;
using GSRendererT<Vertex>::s_savez;
using GSRendererT<Vertex>::s_n;
private: private:
int m_width; int m_width;
int m_height; int m_height;
int m_skip; int m_skip;
bool m_reset; bool m_reset;
bool m_nativeres;
int m_upscale_multiplier; int m_upscale_multiplier;
int m_userhacks_skipdraw; int m_userhacks_skipdraw;
@ -63,505 +42,29 @@ private:
typedef void (GSRendererHW::*OO_Ptr)(); typedef void (GSRendererHW::*OO_Ptr)();
typedef bool (GSRendererHW::*CU_Ptr)(); typedef bool (GSRendererHW::*CU_Ptr)();
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
{ bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
static uint32* video = NULL; bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
static int lines = 0; bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
if(lines == 0) bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
{ bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
if(m_vt.m_primclass == GS_LINE_CLASS && (m_count == 448 * 2 || m_count == 512 * 2)) bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
{ bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
lines = m_count / 2; bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
} bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
} bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
else bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
{ bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
if(m_vt.m_primclass == GS_POINT_CLASS) bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
{ bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
if(m_count >= 16 * 512) bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
{ void OO_DBZBT2();
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 void OO_MajokkoALaMode2();
if(!video) video = new uint32[512 * 512]; bool CU_DBZBT2();
bool CU_MajokkoALaMode2();
int ox = m_context->XYOFFSET.OFX; bool CU_TalesOfAbyss();
int oy = m_context->XYOFFSET.OFY;
for(int i = 0; i < m_count; i++)
{
int x = ((int)m_vertices[i].p.x - ox) >> 4;
int y = ((int)m_vertices[i].p.y - oy) >> 4;
// video[y * 448 + x] = m_vertices[i].c0;
video[(y << 8) + (y << 7) + (y << 6) + x] = m_vertices[i]._c0();
}
return false;
}
else
{
lines = 0;
}
}
else if(m_vt.m_primclass == GS_LINE_CLASS)
{
if(m_count == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
m_dev->Recycle(t->m_texture);
t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
m_vertices[0] = m_vertices[0];
m_vertices[1] = m_vertices[1];
m_vertices[2] = m_vertices[m_count - 2];
m_vertices[3] = m_vertices[1];
m_vertices[4] = m_vertices[2];
m_vertices[5] = m_vertices[m_count - 1];
m_count = 6;
m_vt.Update(m_vertices, m_count, GS_TRIANGLE_CLASS);
}
else
{
lines = 0;
}
}
}
return true;
}
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
uint32 TBP = m_context->TEX0.TBP0;
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
// missing red channel fix
for(int i = 0, j = m_count; i < j; i++)
{
if(m_vertices[i]._r() == 0 && m_vertices[i]._g() != 0 && m_vertices[i]._b() != 0)
{
m_vertices[i]._r() = (m_vertices[i]._g() + m_vertices[i]._b()) / 2;
}
}
m_vt.Update(m_vertices, m_count, m_vt.m_primclass);
return true;
}
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
{
// z buffer clear
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
{
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
// how? by using a render target that overlaps with the lower half of the z buffer...
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(!PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
if(FBP == 0x008c0 && ZBP == 0x01a40)
{
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
GIFRegTEX0 TEX0;
TEX0.TBP0 = ZBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->FRAME.PSM;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
{
m_dev->ClearRenderTarget(rt->m_texture, 0);
}
return false;
}
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->ZBUF.PSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
}
return true;
}
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
{
//only top half of the screen clears
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_max.p.z == m_vt.m_min.p.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_count == 16)
{
for(int i = 0; i < 16; i++)
{
uint8 a = m_vertices[i]._a();
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
m_mem.WritePixel32(i & 7, i >> 3, m_vertices[i]._c0(), FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else if(m_count == 256)
{
for(int i = 0; i < 256; i++)
{
uint8 a = m_vertices[i]._a();
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
m_mem.WritePixel32(i & 15, i >> 4, m_vertices[i]._c0(), FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else
{
ASSERT(0);
}
}
}
return true;
}
void OO_DBZBT2()
{
// palette readback (cannot detect yet, when fetching the texture later)
uint32 FBP = m_context->FRAME.Block();
uint32 TBP0 = m_context->TEX0.TBP0;
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
}
}
void OO_MajokkoALaMode2()
{
// palette readback
uint32 FBP = m_context->FRAME.Block();
if(!PRIM->TME && FBP == 0x03f40)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
}
}
bool CU_DBZBT2()
{
// palette should stay 64 x 64
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03c00 && FBP != 0x03ac0;
}
bool CU_MajokkoALaMode2()
{
// palette should stay 16 x 16
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03f40;
}
bool CU_TalesOfAbyss()
{
// full image blur and brightening
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
}
class Hacks class Hacks
{ {
@ -617,383 +120,37 @@ private:
OO_Ptr m_oo; OO_Ptr m_oo;
CU_Ptr m_cu; CU_Ptr m_cu;
Hacks() Hacks();
: m_oi_map(m_oi_list)
, m_oo_map(m_oo_list)
, m_cu_map(m_cu_list)
, m_oi(NULL)
, m_oo(NULL)
, m_cu(NULL)
{
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2)); void SetGameCRC(const CRC::Game& game);
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
}
void SetGame(const CRC::Game& game)
{
uint32 hash = (uint32)((game.region << 24) | game.title);
m_oi = m_oi_map[hash];
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
}
} m_hacks; } m_hacks;
virtual int GetPosX(const void* vertex) const = 0;
virtual int GetPosY(const void* vertex) const = 0;
virtual uint32 GetColor(const void* vertex) const = 0;
virtual void SetColor(void* vertex, uint32 c) const = 0;
#pragma endregion #pragma endregion
protected: protected:
GSTextureCache* m_tc; GSTextureCache* m_tc;
void Reset() virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRendererT<Vertex>::Reset();
}
void VSync(int field)
{
GSRendererT<Vertex>::VSync(field);
m_tc->IncAge();
m_dev->AgePool();
m_skip = 0;
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void ResetDevice()
{
m_tc->RemoveAll();
GSRendererT<Vertex>::ResetDevice();
}
GSTexture* GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
{
t = rt->m_texture;
if(s_dump)
{
if(s_save && s_n >= s_saven)
{
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
}
return t;
}
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void Draw()
{
#ifndef DISABLE_CRC_HACKS
if(GSRendererT<Vertex>::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
{
ASSERT(0);
return;
}
GSTextureCache::Source* tex = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) return;
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven && tex)
{
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
tex->m_texture->Save(s, true);
if(tex->m_palette)
{
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
tex->m_palette->Save(s, true);
}
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
{
return;
}
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(GSRendererT<Vertex>::TryAlphaTest(fm, zm))
{
context->TEST.ATST = ATST_ALWAYS;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
Draw(rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
}
//
if(m_hacks.m_oo)
{
(this->*m_hacks.m_oo)();
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}
virtual void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
bool CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
public: public:
GSRendererHW(GSTextureCache* tc) GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
: GSRendererT<Vertex>() virtual ~GSRendererHW();
, m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
, m_reset(false)
, m_upscale_multiplier(1)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
if(!m_nativeres) void SetGameCRC(uint32 crc, int options);
{ bool CanUpscale();
m_width = theApp.GetConfig("resx", m_width); int GetUpscaleMultiplier();
m_height = theApp.GetConfig("resy", m_height);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier); void Reset();
void VSync(int field);
if(m_upscale_multiplier > 6) void ResetDevice();
{ GSTexture* GetOutput(int i);
m_upscale_multiplier = 1; // use the normal upscale math void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
} void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
else if(m_upscale_multiplier > 1) void Draw();
{
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else m_upscale_multiplier = 1;
}
virtual ~GSRendererHW()
{
delete m_tc;
}
void SetGameCRC(uint32 crc, int options)
{
GSRendererT<Vertex>::SetGameCRC(crc, options);
m_hacks.SetGame(m_game);
if(m_game.title == CRC::JackieChanAdv)
{
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
}
}
}; };

View File

@ -23,9 +23,20 @@
#include "GSRenderer.h" #include "GSRenderer.h"
class GSRendererNull : public GSRendererT<GSVertexNull> class GSRendererNull : public GSRenderer
{ {
class GSVertexTraceNull : public GSVertexTrace
{
public:
GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {}
};
protected: protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index)
{
}
void Draw() void Draw()
{ {
} }
@ -37,12 +48,8 @@ protected:
public: public:
GSRendererNull() GSRendererNull()
: GSRendererT<GSVertexNull>() : GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
{
InitVertexKick(GSRendererNull);
}
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip)
{ {
InitConvertVertex(GSRendererNull);
} }
}; };

View File

@ -25,9 +25,12 @@
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSRendererSW::GSRendererSW(int threads) GSRendererSW::GSRendererSW(int threads)
: m_fzb(NULL) : GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW))
, m_fzb(NULL)
{ {
InitVertexKick(GSRendererSW); InitConvertVertex(GSRendererSW);
m_nativeres = true; // ignore ini, sw is always native
m_tc = new GSTextureCacheSW(this); m_tc = new GSTextureCacheSW(this);
@ -62,46 +65,32 @@ void GSRendererSW::Reset()
m_reset = true; m_reset = true;
GSRendererT<GSVertexSW>::Reset(); GSRenderer::Reset();
} }
void GSRendererSW::VSync(int field) void GSRendererSW::VSync(int field)
{ {
Sync(0); // IncAge might delete a cached texture in use Sync(0); // IncAge might delete a cached texture in use
/* /*
printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n", int draw[8], sum = 0;
for(int i = 0; i < countof(draw); i++)
{
draw[i] = m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
sum += draw[i];
}
printf("CPU %d Sync %d W %d %d %d %d %d %d %d %d (%d)\n",
m_perfmon.CPU(GSPerfMon::Main), m_perfmon.CPU(GSPerfMon::Main),
m_perfmon.CPU(GSPerfMon::Sync), m_perfmon.CPU(GSPerfMon::Sync),
m_perfmon.CPU(GSPerfMon::WorkerSync0), draw[0], draw[1], draw[2], draw[3], draw[4], draw[5], draw[6], draw[7], sum);
m_perfmon.CPU(GSPerfMon::WorkerSleep0),
m_perfmon.CPU(GSPerfMon::WorkerDraw0),
m_perfmon.CPU(GSPerfMon::WorkerSync1),
m_perfmon.CPU(GSPerfMon::WorkerSleep1),
m_perfmon.CPU(GSPerfMon::WorkerDraw1),
m_perfmon.CPU(GSPerfMon::WorkerSync2),
m_perfmon.CPU(GSPerfMon::WorkerSleep2),
m_perfmon.CPU(GSPerfMon::WorkerDraw2),
m_perfmon.CPU(GSPerfMon::WorkerSync3),
m_perfmon.CPU(GSPerfMon::WorkerSleep3),
m_perfmon.CPU(GSPerfMon::WorkerDraw3),
m_perfmon.CPU(GSPerfMon::WorkerSync4),
m_perfmon.CPU(GSPerfMon::WorkerSleep4),
m_perfmon.CPU(GSPerfMon::WorkerDraw4),
m_perfmon.CPU(GSPerfMon::WorkerSync5),
m_perfmon.CPU(GSPerfMon::WorkerSleep5),
m_perfmon.CPU(GSPerfMon::WorkerDraw5),
m_perfmon.CPU(GSPerfMon::WorkerSync6),
m_perfmon.CPU(GSPerfMon::WorkerSleep6),
m_perfmon.CPU(GSPerfMon::WorkerDraw6),
m_perfmon.CPU(GSPerfMon::WorkerSync7),
m_perfmon.CPU(GSPerfMon::WorkerSleep7),
m_perfmon.CPU(GSPerfMon::WorkerDraw7));
// //
printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0; printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0;
printf("m_syncpoint_count = %d\n", ((GSRasterizerList*)m_rl)->m_syncpoint_count); ((GSRasterizerList*)m_rl)->m_syncpoint_count = 0;
*/ */
GSRenderer::VSync(field);
GSRendererT<GSVertexSW>::VSync(field);
m_tc->IncAge(); m_tc->IncAge();
@ -162,91 +151,193 @@ GSTexture* GSRendererSW::GetOutput(int i)
return m_texture[i]; return m_texture[i];
} }
void GSRendererSW::Draw() template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
{ {
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass); GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
GSVector4i scissor = GSVector4i(m_context->scissor.in); ASSERT(d->_pad.u32[0] != 0x12345678);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
scissor.z = std::min<int>(scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour uint32 z = s->XYZ.Z;
GSVector4i r = bbox.rintersect(scissor); GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
list<uint32>* fb_pages = m_context->offset.fb->GetPages(r); GSVector4 p, t, c;
list<uint32>* zb_pages = m_context->offset.zb->GetPages(r);
GSRasterizerData2* data2 = new GSRasterizerData2(this, fb_pages, zb_pages); p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
shared_ptr<GSRasterizerData> data(data2); if(tme)
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
if(!GetScanlineGlobalData(*gd))
{ {
return; if(fst)
{
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
}
} }
data->scissor = scissor; c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
data->bbox = bbox;
data->primclass = m_vt.m_primclass; d->p = p;
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later? d->c = c;
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here d->t = t;
data->count = m_count;
data->solidrect = gd->sel.IsSolidRect(); #ifdef _DEBUG
data->frame = m_perfmon.GetFrame(); d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once
#endif
if(prim == GS_SPRITE)
{
d->t.u32[3] = z;
}
}
#define LOG 0
FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
void GSRendererSW::Draw()
{
SharedData* sd = new SharedData(this);
shared_ptr<GSRasterizerData> data(sd);
sd->primclass = m_vt->m_primclass;
sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
sd->vertex = (GSVertexSW*)sd->buff;
sd->vertex_count = m_vertex.next;
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
sd->index_count = m_index.tail;
memcpy(sd->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
for(size_t i = 0; i < m_index.tail; i++)
{
ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678);
}
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
// TODO: m_tc->InvalidatePages must be called after texture->Update, move that inside GSRasterizerData::Update too
if(!GetScanlineGlobalData(sd)) return;
// //
if(gd->sel.fwrite) const GSDrawingContext* context = m_context;
GSScanlineGlobalData& gd = sd->global;
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
sd->scissor = scissor;
sd->bbox = bbox;
sd->frame = m_perfmon.GetFrame();
//
uint32* fb_pages = NULL;
uint32* zb_pages = NULL;
GSVector4i r = bbox.rintersect(scissor);
if(gd.sel.fwrite)
{ {
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm); fb_pages = context->offset.fb->GetPages(r);
m_tc->InvalidatePages(fb_pages, context->offset.fb->psm);
} }
if(gd->sel.zwrite) if(gd.sel.zwrite)
{ {
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm); zb_pages = context->offset.zb->GetPages(r);
m_tc->InvalidatePages(zb_pages, context->offset.zb->psm);
} }
// set data->syncpoint // set data->syncpoint
if(m_fzb != m_context->offset.fzb) if(m_fzb != context->offset.fzb)
{ {
m_fzb = m_context->offset.fzb; // hmm, what if "r" gets bigger next time and slips through unchecked, need to trace that too
data->syncpoint = true; sd->syncpoint = true; // TODO
if(!sd->syncpoint)
{
if(fb_pages == NULL)
{
fb_pages = context->offset.fb->GetPages(r);
} }
// - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue if(CheckTargetPages<0xffffffff>(fb_pages))
// - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300) {
sd->syncpoint = true;
if(!data->syncpoint) if(LOG) fprintf(s_fp, "syncpoint 0\n");
{
if(gd->sel.fwrite)
{
for(list<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0xffff0000) // already used as a z-buffer
{
data->syncpoint = true;
break;
}
}
} }
} }
if(!data->syncpoint) if(!sd->syncpoint)
{ {
if(gd->sel.zwrite) if(zb_pages == NULL)
{ {
for(list<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++) zb_pages = context->offset.zb->GetPages(r);
{ }
if(m_fzb_pages[*i] & 0x0000ffff) // already used as a frame buffer
{
data->syncpoint = true;
break; if(CheckTargetPages<0xffffffff>(zb_pages))
{
sd->syncpoint = true;
if(LOG) fprintf(s_fp, "syncpoint 1\n");
}
}
if(!sd->syncpoint)
{
if(LOG) fprintf(s_fp, "no syncpoint *\n");
}
m_fzb = context->offset.fzb;
}
else
{
// chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue,
// m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output
// is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
if(!sd->syncpoint)
{
if(gd.sel.fwrite)
{
if(CheckTargetPages<0xffff0000>(fb_pages)) // already used as a z-buffer
{
sd->syncpoint = true;
if(LOG) fprintf(s_fp, "syncpoint 2\n");
}
}
}
if(!sd->syncpoint)
{
if(gd.sel.zwrite)
{
if(CheckTargetPages<0x0000ffff>(zb_pages)) // already used as a frame buffer
{
sd->syncpoint = true;
if(LOG) fprintf(s_fp, "syncpoint 3\n");
} }
} }
} }
@ -254,7 +345,7 @@ void GSRendererSW::Draw()
// //
data2->UseTargetPages(); sd->UseTargetPages(fb_pages, zb_pages);
// //
@ -313,21 +404,15 @@ void GSRendererSW::Draw()
} }
else else
{ {
if(LOG) fprintf(s_fp, "queue %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
m_context->FRAME.Block(), m_context->FRAME.PSM,
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
PRIM->PRIM, sd->vertex_count, sd->index_count);
m_rl->Queue(data); m_rl->Queue(data);
} }
int prims = 0;
switch(data->primclass)
{
case GS_POINT_CLASS: prims = data->count; break;
case GS_LINE_CLASS: prims = data->count / 2; break;
case GS_TRIANGLE_CLASS: prims = data->count / 3; break;
case GS_SPRITE_CLASS: prims = data->count / 2; break;
}
m_perfmon.Put(GSPerfMon::Prim, prims);
/* /*
if(0)//stats.ticks > 5000000) if(0)//stats.ticks > 5000000)
{ {
@ -346,45 +431,59 @@ void GSRendererSW::Sync(int reason)
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync); GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
uint64 t = __rdtsc();
m_rl->Sync(); m_rl->Sync();
// NOTE: m_fzb_pages is refcounted, zeroing is done automatically s_n++;
memset(m_tex_pages, 0, sizeof(m_tex_pages)); t = __rdtsc() - t;
if(LOG) fprintf(s_fp, "sync n=%d r=%d t=%lld p=%d %c\n", s_n, reason, t, m_rl->GetPixels(), t > 10000000 ? '*' : ' ');
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
} }
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{ {
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
list<uint32>* pages = o->GetPages(r); uint32* RESTRICT p = m_tmp_pages;
m_tc->InvalidatePages(pages, o->psm); o->GetPages(r, p);
// check if the changing pages either used as a texture or a target // check if the changing pages either used as a texture or a target
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++) for(; *p != GSOffset::EOP; p++)
{ {
uint32 page = *i; uint32 page = *p;
if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31)))) //while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause();
if(m_fzb_pages[page] | m_tex_pages[page])
{ {
Sync(5); Sync(5);
break; break;
} }
} }
m_tc->InvalidatePages(m_tmp_pages, o->psm); // if texture update runs on a thread and Sync(5) happens then this must come later
} }
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{ {
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
list<uint32>* pages = o->GetPages(r); uint32* RESTRICT p = m_tmp_pages;
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++) o->GetPages(r, p);
for(; *p != GSOffset::EOP; p++)
{ {
if(m_fzb_pages[*i]) //while(m_fzb_pages[*p]) _mm_pause();
if(m_fzb_pages[*p])
{ {
Sync(6); Sync(6);
@ -393,52 +492,84 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
} }
} }
void GSRendererSW::UseTargetPages(const list<uint32>* pages, int offset) void GSRendererSW::UsePages(const uint32* pages, int type)
{ {
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++) if(type < 2)
{ {
ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX); for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset); _InterlockedIncrement16((short*)&m_fzb_pages[*p] + type);
} }
}
void GSRendererSW::ReleaseTargetPages(const list<uint32>* pages, int offset)
{
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0);
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset);
} }
} else
void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t)
{
for(list<uint32>::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
{ {
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D) for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
//while(m_fzb_pages[*p]) _mm_pause();
if(m_fzb_pages[*p]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
{ {
Sync(7); Sync(7);
return; break;
}
} }
} for(const uint32* p = pages; *p != GSOffset::EOP; p++)
for(size_t i = 0; i < countof(t->m_pages.bm); i++)
{ {
m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used ASSERT(m_tex_pages[*p] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_tex_pages[*p]); // remember which texture pages are used
} }
}
}
void GSRendererSW::ReleasePages(const uint32* pages, int type)
{
if(type < 2)
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] > 0);
_InterlockedDecrement16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*p] > 0);
_InterlockedDecrement16((short*)&m_tex_pages[*p]);
}
}
}
template<uint32 mask> bool GSRendererSW::CheckTargetPages(const uint32* pages)
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
if(mask != 0xffffffff ? (m_fzb_pages[*p] & mask) : m_fzb_pages[*p])
{
return true;
}
}
return false;
} }
#include "GSTextureSW.h" #include "GSTextureSW.h"
bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{ {
GSScanlineGlobalData& gd = data->global;
const GSDrawingEnvironment& env = m_env; const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context; const GSDrawingContext* context = m_context;
const GS_PRIM_CLASS primclass = m_vt.m_primclass; const GS_PRIM_CLASS primclass = m_vt->m_primclass;
gd.vm = m_mem.m_vm8; gd.vm = m_mem.m_vm8;
@ -456,7 +587,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.atst = ATST_ALWAYS; gd.sel.atst = ATST_ALWAYS;
gd.sel.tfx = TFX_NONE; gd.sel.tfx = TFX_NONE;
gd.sel.ababcd = 255; gd.sel.ababcd = 255;
gd.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0; gd.sel.prim = primclass;
uint32 fm = context->FRAME.FBMSK; uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
@ -500,7 +631,12 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
bool zwrite = zm != 0xffffffff; bool zwrite = zm != 0xffffffff;
bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS; bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
/*
printf("%05x %d %05x %d %05x %d %dx%d\n",
fwrite || ftest ? m_context->FRAME.Block() : 0xfffff, m_context->FRAME.PSM,
zwrite || ztest ? m_context->ZBUF.Block() : 0xfffff, m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH);
*/
if(!fwrite && !zwrite) return false; if(!fwrite && !zwrite) return false;
gd.sel.fwrite = fwrite; gd.sel.fwrite = fwrite;
@ -510,7 +646,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{ {
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff)
{ {
gd.sel.iip = PRIM->IIP; gd.sel.iip = PRIM->IIP;
} }
@ -520,7 +656,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.tfx = context->TEX0.TFX; gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC; gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST; gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt.IsLinear(); gd.sel.ltf = m_vt->IsLinear();
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{ {
@ -534,7 +670,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.wms = context->CLAMP.WMS; gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT; gd.sel.wmt = context->CLAMP.WMT;
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128)))
{ {
// modulate does not do anything when vertex color is 0x80 // modulate does not do anything when vertex color is 0x80
@ -545,7 +681,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(t == NULL) {ASSERT(0); return false;} if(t == NULL) {ASSERT(0); return false;}
UseSourcePages(t); data->UseSourcePages(t, 0);
GSVector4i r; GSVector4i r;
@ -553,7 +689,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(!t->Update(r)) {ASSERT(0); return false;} if(!t->Update(r)) {ASSERT(0); return false;}
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0) if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0)
{ {
uint64 frame = m_perfmon.GetFrame(); uint64 frame = m_perfmon.GetFrame();
@ -570,7 +706,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.tex[0] = t->m_buff; gd.tex[0] = t->m_buff;
gd.sel.tw = t->m_tw - 3; gd.sel.tw = t->m_tw - 3;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0) if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0)
{ {
// TEX1.MMIN // TEX1.MMIN
// 000 p // 000 p
@ -580,13 +716,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// 100 l round // 100 l round
// 101 l tri // 101 l tri
if(m_vt.m_lod.x > 0) if(m_vt->m_lod.x > 0)
{ {
gd.sel.ltf = context->TEX1.MMIN >> 2; gd.sel.ltf = context->TEX1.MMIN >> 2;
} }
else else
{ {
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0 // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0
} }
gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
@ -595,9 +731,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16); int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16);
int k = context->TEX1.K << 12; int k = context->TEX1.K << 12;
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL)
{ {
k = (int)m_vt.m_lod.x << 16; // set lod to max level k = (int)m_vt->m_lod.x << 16; // set lod to max level
gd.sel.lcm = 1; // lod is constant gd.sel.lcm = 1; // lod is constant
gd.sel.mmin = 1; // tri-linear is meaningless gd.sel.mmin = 1; // tri-linear is meaningless
@ -611,7 +747,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(gd.sel.fst) if(gd.sel.fst)
{ {
ASSERT(gd.sel.lcm == 1); ASSERT(gd.sel.lcm == 1);
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
gd.sel.lcm = 1; gd.sel.lcm = 1;
} }
@ -640,8 +776,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
GIFRegTEX0 MIP_TEX0 = context->TEX0; GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP; GIFRegCLAMP MIP_CLAMP = context->CLAMP;
GSVector4 tmin = m_vt.m_min.t; GSVector4 tmin = m_vt->m_min.t;
GSVector4 tmax = m_vt.m_max.t; GSVector4 tmax = m_vt->m_max.t;
static int s_counter = 0; static int s_counter = 0;
@ -691,14 +827,14 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
MIP_CLAMP.MAXU >>= 1; MIP_CLAMP.MAXU >>= 1;
MIP_CLAMP.MAXV >>= 1; MIP_CLAMP.MAXV >>= 1;
m_vt.m_min.t *= 0.5f; m_vt->m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f; m_vt->m_max.t *= 0.5f;
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3); GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;} if(t == NULL) {ASSERT(0); return false;}
UseSourcePages(t); data->UseSourcePages(t, i);
GSVector4i r; GSVector4i r;
@ -734,8 +870,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
s_counter++; s_counter++;
m_vt.m_min.t = tmin; m_vt->m_min.t = tmin;
m_vt.m_max.t = tmax; m_vt->m_max.t = tmax;
} }
else else
{ {
@ -743,17 +879,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{ {
// skip per pixel division if q is constant // skip per pixel division if q is constant
GSVertexSW* v = m_vertices; GSVertexSW* RESTRICT v = data->vertex;
if(m_vt.m_eq.q) if(m_vt->m_eq.q)
{ {
gd.sel.fst = 1; gd.sel.fst = 1;
if(v[0].t.z != 1.0f) const GSVector4& t = v[data->index[0]].t;
{
GSVector4 w = v[0].t.zzzz().rcpnr();
for(int i = 0, j = m_count; i < j; i++) if(t.z != 1.0f)
{
GSVector4 w = t.zzzz().rcpnr();
for(int i = 0, j = data->vertex_count; i < j; i++)
{ {
GSVector4 t = v[i].t; GSVector4 t = v[i].t;
@ -765,7 +903,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{ {
gd.sel.fst = 1; gd.sel.fst = 1;
for(int i = 0, j = m_count; i < j; i += 2) for(int i = 0, j = data->vertex_count; i < j; i += 2)
{ {
GSVector4 t0 = v[i + 0].t; GSVector4 t0 = v[i + 0].t;
GSVector4 t1 = v[i + 1].t; GSVector4 t1 = v[i + 1].t;
@ -786,9 +924,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
GSVector4 half(0x8000, 0x8000); GSVector4 half(0x8000, 0x8000);
GSVertexSW* v = m_vertices; GSVertexSW* RESTRICT v = data->vertex;
for(int i = 0, j = m_count; i < j; i++) for(int i = 0, j = data->vertex_count; i < j; i++)
{ {
GSVector4 t = v[i].t; GSVector4 t = v[i].t;
@ -920,7 +1058,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{ {
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt; gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS; gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000; gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000;
} }
gd.fm = GSVector4i(fm); gd.fm = GSVector4i(fm);
@ -950,149 +1088,73 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
return true; return true;
} }
template<uint32 prim, uint32 tme, uint32 fst> GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
void GSRendererSW::VertexKick(bool skip) : m_parent(parent)
, m_fb_pages(NULL)
, m_zb_pages(NULL)
, m_using_pages(false)
{ {
const GSDrawingContext* context = m_context; m_tex_pages[0] = NULL;
GSVertexSW& dst = m_vl.AddTail(); global.sel.key = 0;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET; global.clut = NULL;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later global.dimx = NULL;
}
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; GSRendererSW::SharedData::~SharedData()
if(tme)
{
GSVector4 t;
if(fst)
{
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
}
dst.t = t;
}
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
if(prim == GS_SPRITE)
{
dst.t.u32[3] = m_v.XYZ.Z;
}
int count = 0;
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
{
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(prim);
if(!m_dump)
{ {
GSVector4 pmin, pmax; if(m_using_pages)
switch(primclass)
{ {
case GS_POINT_CLASS: if(global.sel.fwrite)
pmin = v[0].p; {
pmax = v[0].p; m_parent->ReleasePages(m_fb_pages, 0);
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLE_CLASS:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
} }
GSVector4 scissor = context->scissor.ex; if(global.sel.zwrite)
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(primclass)
{ {
case GS_TRIANGLE_CLASS: m_parent->ReleasePages(m_zb_pages, 1);
case GS_SPRITE_CLASS: }
test |= pmin.ceil() == pmax.ceil();
break;
} }
switch(primclass) delete m_fb_pages;
delete m_zb_pages;
for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++)
{ {
case GS_TRIANGLE_CLASS: m_parent->ReleasePages(m_tex_pages[i], 2);
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
break;
} }
if(test.mask() & 3) if(global.clut) _aligned_free(global.clut);
{ if(global.dimx) _aligned_free(global.dimx);
return;
}
} }
switch(primclass)
void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
{
if(m_using_pages) return;
m_fb_pages = fb_pages;
m_zb_pages = zb_pages;
if(global.sel.fwrite)
{ {
case GS_POINT_CLASS: m_parent->UsePages(fb_pages, 0);
break;
case GS_LINE_CLASS:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
break;
case GS_TRIANGLE_CLASS:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
break;
case GS_SPRITE_CLASS:
break;
} }
if(m_count < 30 && m_count >= 3) if(global.sel.zwrite)
{ {
int tl = 0; m_parent->UsePages(zb_pages, 1);
int br = 0;
if(primclass == GS_TRIANGLE_CLASS && GSVertexSW::IsQuad(&m_vertices[m_count - 3], tl, br))
{
m_count -= 3;
if(m_count > 0)
{
tl += m_count;
br += m_count;
Flush();
} }
if(tl != 0) m_vertices[0] = m_vertices[tl]; m_using_pages = true;
if(br != 1) m_vertices[1] = m_vertices[br]; }
m_vertices[0].t.u32[3] = m_v.XYZ.Z; void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
m_vertices[1].t.u32[3] = m_v.XYZ.Z; {
ASSERT(m_tex_pages[level] == NULL);
m_count = 2;
m_tex_pages[level] = t->m_pages.n;
uint32 tmp = PRIM->PRIM; m_tex_pages[level + 1] = NULL;
PRIM->PRIM = GS_SPRITE;
m_parent->UsePages(t->m_pages.n, 2);
Flush();
PRIM->PRIM = tmp;
m_perfmon.Put(GSPerfMon::Quad, 1);
return;
}
}
m_count += count;
// Flush();
}
} }

View File

@ -25,83 +25,22 @@
#include "GSTextureCacheSW.h" #include "GSTextureCacheSW.h"
#include "GSDrawScanline.h" #include "GSDrawScanline.h"
class GSRendererSW : public GSRendererT<GSVertexSW> class GSRendererSW : public GSRenderer
{ {
class GSRasterizerData2 : public GSRasterizerData class SharedData : public GSDrawScanline::SharedData
{ {
GSRendererSW* m_parent; GSRendererSW* m_parent;
const list<uint32>* m_fb_pages; const uint32* m_fb_pages;
const list<uint32>* m_zb_pages; const uint32* m_zb_pages;
const uint32* m_tex_pages[7 + 1]; // NULL terminated
bool m_using_pages; bool m_using_pages;
public: public:
GSRasterizerData2(GSRendererSW* parent, const list<uint32>* fb_pages, const list<uint32>* zb_pages) SharedData(GSRendererSW* parent);
: m_parent(parent) virtual ~SharedData();
, m_fb_pages(fb_pages)
, m_zb_pages(zb_pages)
, m_using_pages(false)
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
gd->sel.key = 0; void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
gd->clut = NULL;
gd->dimx = NULL;
param = gd;
}
virtual ~GSRasterizerData2()
{
ReleaseTargetPages();
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
if(gd->dimx) _aligned_free(gd->dimx);
_aligned_free(gd);
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
}
void UseTargetPages()
{
if(m_using_pages) {ASSERT(0); return;}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
{
m_parent->UseTargetPages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_parent->UseTargetPages(m_zb_pages, 1);
}
m_using_pages = true;
}
void ReleaseTargetPages()
{
if(!m_using_pages) {ASSERT(0); return;}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
{
m_parent->ReleaseTargetPages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_parent->ReleaseTargetPages(m_zb_pages, 1);
}
m_using_pages = false;
}
}; };
protected: protected:
@ -112,7 +51,8 @@ protected:
bool m_reset; bool m_reset;
GSPixelOffset4* m_fzb; GSPixelOffset4* m_fzb;
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint32 m_tex_pages[16]; uint16 m_tex_pages[512];
uint32 m_tmp_pages[512 + 1];
void Reset(); void Reset();
void VSync(int field); void VSync(int field);
@ -124,16 +64,16 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UseTargetPages(const list<uint32>* pages, int offset); void UsePages(const uint32* pages, int type);
void ReleaseTargetPages(const list<uint32>* pages, int offset); void ReleasePages(const uint32* pages, int type);
void UseSourcePages(const GSTextureCacheSW::Texture* t); template<uint32 mask> bool CheckTargetPages(const uint32* pages);
bool GetScanlineGlobalData(GSScanlineGlobalData& gd); bool GetScanlineGlobalData(SharedData* data);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
public: public:
GSRendererSW(int threads); GSRendererSW(int threads);
virtual ~GSRendererSW(); virtual ~GSRendererSW();
template<uint32 prim, uint32 tme, uint32 fst>
void VertexKick(bool skip);
}; };

View File

@ -61,12 +61,12 @@ union GSScanlineSelector
uint32 colclamp:1; // 43 uint32 colclamp:1; // 43
uint32 fba:1; // 44 uint32 fba:1; // 44
uint32 dthe:1; // 45 uint32 dthe:1; // 45
uint32 sprite:1; // 46 uint32 prim:2; // 46
uint32 edge:1; // 47
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3) uint32 edge:1; // 48
uint32 lcm:1; // 49 uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 mmin:2; // 50 uint32 lcm:1; // 50
uint32 mmin:2; // 51
}; };
struct struct
@ -92,7 +92,7 @@ union GSScanlineSelector
bool IsSolidRect() const bool IsSolidRect() const
{ {
return sprite return prim == GS_SPRITE_CLASS
&& iip == 0 && iip == 0
&& tfx == TFX_NONE && tfx == TFX_NONE
&& abe == 0 && abe == 0

View File

@ -317,9 +317,9 @@ void GSSettingsDlg::UpdateControls()
EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw); EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw && GSDevice9::GetMaxDepth(m_lastValidMsaa) < 32); EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw && GSDevice9::GetMaxDepth(m_lastValidMsaa) < 32);
EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw); EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); //EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used
EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw); //EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw); //EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_MSAAEDIT), hw); EnableWindow(GetDlgItem(m_hWnd, IDC_MSAAEDIT), hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_MSAA), hw); EnableWindow(GetDlgItem(m_hWnd, IDC_MSAA), hw);

View File

@ -27,10 +27,17 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
void GSSetupPrimCodeGenerator::Generate() void GSSetupPrimCodeGenerator::Generate()
{ {
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{ {
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 5; i++) for(int i = 0; i < 5; i++)
{ {
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]); vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
@ -53,7 +60,7 @@ void GSSetupPrimCodeGenerator::Depth()
return; return;
} }
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
// GSVector4 p = dscan.p; // GSVector4 p = dscan.p;
@ -107,7 +114,12 @@ void GSSetupPrimCodeGenerator::Depth()
} }
else else
{ {
// GSVector4 p = vertices[0].p; // GSVector4 p = vertex[index[1]].p;
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -312,7 +324,25 @@ void GSSetupPrimCodeGenerator::Color()
} }
else else
{ {
// GSVector4i c = GSVector4i(vertices[0].c); // GSVector4i c = GSVector4i(vertex[index[last].c);
int last = 0;
switch(m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * last]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
}
vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

View File

@ -27,10 +27,17 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
void GSSetupPrimCodeGenerator::Generate() void GSSetupPrimCodeGenerator::Generate()
{ {
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{ {
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 5; i++) for(int i = 0; i < 5; i++)
{ {
movaps(Xmm(3 + i), ptr[&m_shift[i]]); movaps(Xmm(3 + i), ptr[&m_shift[i]]);
@ -53,7 +60,7 @@ void GSSetupPrimCodeGenerator::Depth()
return; return;
} }
if(!m_sel.sprite) if(m_sel.prim != GS_SPRITE_CLASS)
{ {
// GSVector4 p = dscan.p; // GSVector4 p = dscan.p;
@ -112,7 +119,12 @@ void GSSetupPrimCodeGenerator::Depth()
} }
else else
{ {
// GSVector4 p = vertices[0].p; // GSVector4 p = vertex[index[1]].p;
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -327,7 +339,25 @@ void GSSetupPrimCodeGenerator::Color()
} }
else else
{ {
// GSVector4i c = GSVector4i(vertices[0].c); // GSVector4i c = GSVector4i(vertex[index[last].c);
int last = 0;
switch(m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * last]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
}
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@
#include "GSDrawingContext.h" #include "GSDrawingContext.h"
#include "GSDrawingEnvironment.h" #include "GSDrawingEnvironment.h"
#include "GSVertex.h" #include "GSVertex.h"
#include "GSVertexList.h" #include "GSVertexTrace.h"
#include "GSUtil.h" #include "GSUtil.h"
#include "GSPerfMon.h" #include "GSPerfMon.h"
#include "GSVector.h" #include "GSVector.h"
@ -42,13 +42,14 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r); typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r);
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16]; GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4];
void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r); template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r); template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
@ -56,8 +57,9 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r); typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r);
GIFRegHandler m_fpGIFRegHandlers[256]; GIFRegHandler m_fpGIFRegHandlers[256];
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
void ApplyTEX0(int i, GIFRegTEX0& TEX0); template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
void ApplyPRIM(const GIFRegPRIM& PRIM); void ApplyPRIM(const GIFRegPRIM& PRIM);
void GIFRegHandlerNull(const GIFReg* RESTRICT r); void GIFRegHandlerNull(const GIFReg* RESTRICT r);
@ -65,13 +67,11 @@ class GSState : public GSAlignedClass<32>
void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r); void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r);
void GIFRegHandlerST(const GIFReg* RESTRICT r); void GIFRegHandlerST(const GIFReg* RESTRICT r);
void GIFRegHandlerUV(const GIFReg* RESTRICT r); void GIFRegHandlerUV(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r); template<uint32 prim, uint32 adc> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r); template<uint32 prim, uint32 adc> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r); template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r); template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
void GIFRegHandlerFOG(const GIFReg* RESTRICT r); void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF3(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ3(const GIFReg* RESTRICT r);
void GIFRegHandlerNOP(const GIFReg* RESTRICT r); void GIFRegHandlerNOP(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX1(const GIFReg* RESTRICT r); template<int i> void GIFRegHandlerTEX1(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX2(const GIFReg* RESTRICT r); template<int i> void GIFRegHandlerTEX2(const GIFReg* RESTRICT r);
@ -126,49 +126,68 @@ class GSState : public GSAlignedClass<32>
} m_tr; } m_tr;
void FlushWrite();
protected: protected:
bool IsBadFrame(int& skip, int UserHacks_SkipDraw); bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
typedef void (GSState::*VertexKickPtr)(bool skip); GSVertex m_v;
float m_q;
GSVector4 m_scissor;
uint32 m_ofxy;
VertexKickPtr m_vk[8][2][2]; struct
VertexKickPtr m_vkf;
#define InitVertexKick3(T, P, N, M) \
m_vk[P][N][M] = (VertexKickPtr)(void (T::*)(bool))&T::VertexKick<P, N, M>;
#define InitVertexKick2(T, P) \
InitVertexKick3(T, P, 0, 0) \
InitVertexKick3(T, P, 0, 1) \
InitVertexKick3(T, P, 1, 0) \
InitVertexKick3(T, P, 1, 1) \
#define InitVertexKick(T) \
InitVertexKick2(T, GS_POINTLIST) \
InitVertexKick2(T, GS_LINELIST) \
InitVertexKick2(T, GS_LINESTRIP) \
InitVertexKick2(T, GS_TRIANGLELIST) \
InitVertexKick2(T, GS_TRIANGLESTRIP) \
InitVertexKick2(T, GS_TRIANGLEFAN) \
InitVertexKick2(T, GS_SPRITE) \
InitVertexKick2(T, GS_INVALID) \
void UpdateVertexKick()
{ {
m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST]; uint8* buff;
} size_t stride;
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
GSVector4 xy[4];
size_t xy_tail;
uint8* tmp;
} m_vertex;
void VertexKickNull(bool skip) struct
{ {
ASSERT(0); uint32* buff;
} size_t tail;
} m_index;
void VertexKick(bool skip) typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
{
(this->*m_vkf)(skip); ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
}
#define InitConvertVertex2(T, P) \
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
#define InitConvertVertex(T) \
InitConvertVertex2(T, GS_POINTLIST) \
InitConvertVertex2(T, GS_LINELIST) \
InitConvertVertex2(T, GS_LINESTRIP) \
InitConvertVertex2(T, GS_TRIANGLELIST) \
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
InitConvertVertex2(T, GS_TRIANGLEFAN) \
InitConvertVertex2(T, GS_SPRITE) \
InitConvertVertex2(T, GS_INVALID) \
void UpdateContext();
void UpdateScissor();
virtual void UpdateVertexKick();
void GrowVertexBuffer();
template<uint32 prim>
void VertexKick(uint32 skip);
// following functions need m_vt to be initialized
GSVertexTrace* m_vt;
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public: public:
GIFPath m_path[4]; GIFPath m_path[4];
@ -177,10 +196,6 @@ public:
GSLocalMemory m_mem; GSLocalMemory m_mem;
GSDrawingEnvironment m_env; GSDrawingEnvironment m_env;
GSDrawingContext* m_context; GSDrawingContext* m_context;
GSVertex m_v;
float m_q;
uint32 m_vprim;
GSPerfMon m_perfmon; GSPerfMon m_perfmon;
uint32 m_crc; uint32 m_crc;
int m_options; int m_options;
@ -188,9 +203,10 @@ public:
bool m_framelimit; bool m_framelimit;
CRC::Game m_game; CRC::Game m_game;
GSDump m_dump; GSDump m_dump;
bool m_nativeres;
public: public:
GSState(); GSState(GSVertexTrace* vt, size_t vertex_stride);
virtual ~GSState(); virtual ~GSState();
void ResetHandlers(); void ResetHandlers();
@ -205,8 +221,9 @@ public:
virtual void Reset(); virtual void Reset();
virtual void Flush(); virtual void Flush();
virtual void FlushPrim() = 0; virtual void FlushPrim();
virtual void ResetPrim() = 0; virtual void FlushWrite();
virtual void Draw() = 0;
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {} virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {} virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}

View File

@ -319,11 +319,13 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
GSVector4i r; GSVector4i r;
list<uint32>* pages = o->GetPages(rect, &r); uint32* pages = (uint32*)m_temp;
o->GetPages(rect, pages, &r);
bool found = false; bool found = false;
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++) for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{ {
uint32 page = *p; uint32 page = *p;
@ -337,22 +339,24 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM)) if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{ {
uint32* RESTRICT valid = s->m_valid;
bool b = bp == s->m_TEX0.TBP0; bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target) if(!s->m_target)
{ {
if(s->m_repeating) if(s->m_repeating)
{ {
list<GSVector2i>& l = s->m_p2t[page]; vector<GSVector2i>& l = s->m_p2t[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++) for(vector<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{ {
s->m_valid[k->x] &= k->y; valid[k->x] &= k->y;
} }
} }
else else
{ {
s->m_valid[page] = 0; valid[page] = 0;
} }
s->m_complete = false; s->m_complete = false;

View File

@ -75,7 +75,7 @@ public:
bool m_target; bool m_target;
bool m_complete; bool m_complete;
bool m_repeating; bool m_repeating;
list<GSVector2i>* m_p2t; vector<GSVector2i>* m_p2t;
public: public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp); Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp);

View File

@ -74,18 +74,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
m_textures.insert(t); m_textures.insert(t);
for(list<uint32>::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++) for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
{ {
m_map[*i].push_front(t); m_map[*p].push_front(t);
} }
} }
return t; return t;
} }
void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm) void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
{ {
for(list<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++) for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{ {
uint32 page = *p; uint32 page = *p;
@ -95,20 +95,22 @@ void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm)
{ {
Texture* t = *i; Texture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) if(GSUtil::HasSharedBits(psm, t->m_sharedbits))
{ {
uint32* RESTRICT valid = t->m_valid;
if(t->m_repeating) if(t->m_repeating)
{ {
list<GSVector2i>& l = t->m_p2t[page]; vector<GSVector2i>& l = t->m_p2t[page];
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++) for(vector<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{ {
t->m_valid[j->x] &= j->y; valid[j->x] &= j->y;
} }
} }
else else
{ {
t->m_valid[page] = 0; valid[page] = 0;
} }
t->m_complete = false; t->m_complete = false;
@ -179,16 +181,17 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
memset(m_valid, 0, sizeof(m_valid)); memset(m_valid, 0, sizeof(m_valid));
memset(m_pages.bm, 0, sizeof(m_pages.bm)); memset(m_pages.bm, 0, sizeof(m_pages.bm));
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++) for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++)
{ {
uint32 page = *i; uint32 page = *p;
m_pages.bm[page >> 5] |= 1 << (page & 31); m_pages.bm[page >> 5] |= 1 << (page & 31);
m_pages.n.push_back(page);
} }
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
@ -201,6 +204,8 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
GSTextureCacheSW::Texture::~Texture() GSTextureCacheSW::Texture::~Texture()
{ {
delete [] m_pages.n;
if(m_buff) if(m_buff)
{ {
_aligned_free(m_buff); _aligned_free(m_buff);
@ -267,22 +272,29 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
uint8* dst = (uint8*)m_buff + pitch * r.top; uint8* dst = (uint8*)m_buff + pitch * r.top;
int block_pitch = pitch * bs.y;
r = r.srl32(3);
bs.x >>= 3;
bs.y >>= 3;
shift += 3;
if(m_repeating) if(m_repeating)
{ {
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch) for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
{ {
uint32 base = o->block.row[y >> 3]; uint32 base = o->block.row[y];
for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{ {
uint32 block = base + o->block.col[x >> 3]; uint32 block = base + o->block.col[x];
if(block < MAX_BLOCKS) if(block < MAX_BLOCKS)
{ {
uint32 addr = i >> 3; uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
uint32 row = addr >> 5;
uint32 col = 1 << (addr & 31);
if((m_valid[row] & col) == 0) if((m_valid[row] & col) == 0)
{ {
@ -298,13 +310,13 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
} }
else else
{ {
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch) for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
{ {
uint32 base = o->block.row[y >> 3]; uint32 base = o->block.row[y];
for(int x = r.left; x < r.right; x += bs.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = base + o->block.col[x >> 3]; uint32 block = base + o->block.col[x];
if(block < MAX_BLOCKS) if(block < MAX_BLOCKS)
{ {

View File

@ -38,9 +38,10 @@ public:
uint32 m_age; uint32 m_age;
bool m_complete; bool m_complete;
bool m_repeating; bool m_repeating;
list<GSVector2i>* m_p2t; vector<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES]; uint32 m_valid[MAX_PAGES];
struct {uint32 bm[16]; list<uint32> n;} m_pages; struct {uint32 bm[16]; const uint32* n;} m_pages;
const uint32* RESTRICT m_sharedbits;
// m_valid // m_valid
// fast mode: each uint32 bits map to the 32 blocks of that page // fast mode: each uint32 bits map to the 32 blocks of that page
@ -64,7 +65,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0); Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidatePages(const list<uint32>* pages, uint32 psm); void InvalidatePages(const uint32* pages, uint32 psm);
void RemoveAll(); void RemoveAll();
void RemoveAt(Texture* t); void RemoveAt(Texture* t);

View File

@ -82,9 +82,10 @@ bool GSDevice11::CreateTextureFX()
return true; return true;
} }
void GSDevice11::SetupIA(const void* vertices, int count, int prim) void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{ {
IASetVertexBuffer(vertices, sizeof(GSVertexHW11), count); IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim); IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
} }

View File

@ -61,9 +61,10 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
return t; return t;
} }
void GSDevice9::SetupIA(const void* vertices, int count, int prim) void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{ {
IASetVertexBuffer(vertices, sizeof(GSVertexHW9), count); IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim); IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
} }

View File

@ -275,10 +275,7 @@ protected:
if(m_exit) {m_cv.lock.Unlock(); return;} if(m_exit) {m_cv.lock.Unlock(); return;}
} }
{ T& item = m_queue.front();
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();
m_cv.lock.Unlock(); m_cv.lock.Unlock();
@ -287,7 +284,6 @@ protected:
m_cv.lock.Lock(); m_cv.lock.Lock();
m_queue.pop(); m_queue.pop();
}
if(m_queue.empty()) if(m_queue.empty())
{ {
@ -312,10 +308,7 @@ protected:
m_ev.lock.Lock(); m_ev.lock.Lock();
} }
{ T& item = m_queue.front();
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();
m_ev.lock.Unlock(); m_ev.lock.Unlock();
@ -324,11 +317,9 @@ protected:
m_ev.lock.Lock(); m_ev.lock.Lock();
m_queue.pop(); m_queue.pop();
}
_InterlockedDecrement(&m_ev.count); _InterlockedDecrement(&m_ev.count);
} }
} }
} }

View File

@ -91,6 +91,7 @@ static class GSUtilMaps
{ {
public: public:
uint8 PrimClassField[8]; uint8 PrimClassField[8];
uint8 VertexCountField[8];
uint32 CompatibleBitsField[64][2]; uint32 CompatibleBitsField[64][2];
uint32 SharedBitsField[64][2]; uint32 SharedBitsField[64][2];
@ -105,6 +106,15 @@ public:
PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS; PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS;
PrimClassField[GS_INVALID] = GS_INVALID_CLASS; PrimClassField[GS_INVALID] = GS_INVALID_CLASS;
VertexCountField[GS_POINTLIST] = 1;
VertexCountField[GS_LINELIST] = 2;
VertexCountField[GS_LINESTRIP] = 2;
VertexCountField[GS_TRIANGLELIST] = 3;
VertexCountField[GS_TRIANGLESTRIP] = 3;
VertexCountField[GS_TRIANGLEFAN] = 3;
VertexCountField[GS_SPRITE] = 2;
VertexCountField[GS_INVALID] = 1;
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
for(int i = 0; i < 64; i++) for(int i = 0; i < 64; i++)
@ -146,6 +156,21 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
return (GS_PRIM_CLASS)s_maps.PrimClassField[prim]; return (GS_PRIM_CLASS)s_maps.PrimClassField[prim];
} }
int GSUtil::GetVertexCount(uint32 prim)
{
return s_maps.VertexCountField[prim];
}
const uint32* GSUtil::HasSharedBitsPtr(uint32 dpsm)
{
return s_maps.SharedBitsField[dpsm];
}
bool GSUtil::HasSharedBits(uint32 spsm, const uint32* RESTRICT ptr)
{
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
}
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm) bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
{ {
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0; return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
@ -321,7 +346,7 @@ static bool DXUTDelayLoadDXGI()
bool GSUtil::CheckDirect3D11Level(D3D_FEATURE_LEVEL& level) bool GSUtil::CheckDirect3D11Level(D3D_FEATURE_LEVEL& level)
{ {
HRESULT hr; HRESULT hr = S_OK;
level = (D3D_FEATURE_LEVEL)0; level = (D3D_FEATURE_LEVEL)0;

View File

@ -29,7 +29,10 @@ public:
static const char* GetLibName(); static const char* GetLibName();
static GS_PRIM_CLASS GetPrimClass(uint32 prim); static GS_PRIM_CLASS GetPrimClass(uint32 prim);
static int GetVertexCount(uint32 prim);
static const uint32* HasSharedBitsPtr(uint32 dpsm);
static bool HasSharedBits(uint32 spsm, const uint32* ptr);
static bool HasSharedBits(uint32 spsm, uint32 dpsm); static bool HasSharedBits(uint32 spsm, uint32 dpsm);
static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm); static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm);
static bool HasCompatibleBits(uint32 spsm, uint32 dpsm); static bool HasCompatibleBits(uint32 spsm, uint32 dpsm);

View File

@ -1900,7 +1900,7 @@ public:
d = f.uph64(d); d = f.uph64(d);
} }
__forceinline static bool compare16(const void* dst, const void* src, int size) __forceinline static bool compare16(const void* dst, const void* src, size_t size)
{ {
ASSERT((size & 15) == 0); ASSERT((size & 15) == 0);
@ -1909,7 +1909,7 @@ public:
GSVector4i* s = (GSVector4i*)src; GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst; GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i++) for(size_t i = 0; i < size; i++)
{ {
if(!d[i].eq(s[i])) if(!d[i].eq(s[i]))
{ {
@ -1920,7 +1920,7 @@ public:
return true; return true;
} }
__forceinline static bool compare64(const void* dst, const void* src, int size) __forceinline static bool compare64(const void* dst, const void* src, size_t size)
{ {
ASSERT((size & 63) == 0); ASSERT((size & 63) == 0);
@ -1929,7 +1929,7 @@ public:
GSVector4i* s = (GSVector4i*)src; GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst; GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i += 4) for(size_t i = 0; i < size; i += 4)
{ {
GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]); GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]);
GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]); GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]);
@ -1948,7 +1948,7 @@ public:
return true; return true;
} }
__forceinline static bool update(const void* dst, const void* src, int size) __forceinline static bool update(const void* dst, const void* src, size_t size)
{ {
ASSERT((size & 15) == 0); ASSERT((size & 15) == 0);
@ -1959,7 +1959,7 @@ public:
GSVector4i v = GSVector4i::xffffffff(); GSVector4i v = GSVector4i::xffffffff();
for(int i = 0; i < size; i++) for(size_t i = 0; i < size; i++)
{ {
v &= d[i] == s[i]; v &= d[i] == s[i];

View File

@ -30,15 +30,20 @@
__aligned(struct, 32) GSVertex __aligned(struct, 32) GSVertex
{ {
union
{
struct
{
GIFRegST ST; GIFRegST ST;
GIFRegRGBAQ RGBAQ; GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ; GIFRegXYZ XYZ;
GIFRegFOG FOG; uint32 UV, FOG;
GIFRegUV UV; };
GSVertex() {memset(this, 0, sizeof(*this));} __m128i m[2];
};
GSVector4 GetUV() const {return GSVector4(GSVector4i::load(UV.u32[0]).upl16());} void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];}
}; };
struct GSVertexP struct GSVertexP
@ -58,9 +63,4 @@ struct GSVertexPT2
GSVector2 t[2]; GSVector2 t[2];
}; };
struct GSVertexNull
{
GSVector4 p;
};
#pragma pack(pop) #pragma pack(pop)

View File

@ -35,16 +35,6 @@ __aligned(struct, 32) GSVertexHW9
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;} // t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;} GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
float& _q() {return p.w;}
uint8& _r() {return t.u8[8];}
uint8& _g() {return t.u8[9];}
uint8& _b() {return t.u8[10];}
uint8& _a() {return t.u8[11];}
uint32& _c0() {return t.u32[2];}
uint32& _c1() {return t.u32[3];}
}; };
__aligned(union, 32) GSVertexHW11 __aligned(union, 32) GSVertexHW11
@ -86,16 +76,6 @@ __aligned(union, 32) GSVertexHW11
return *this; return *this;
} }
float& _q() {return q;}
uint8& _r() {return r;}
uint8& _g() {return g;}
uint8& _b() {return b;}
uint8& _a() {return a;}
uint32& _c0() {return c0;}
uint32& _c1() {return c1;}
}; };
#pragma pack(pop) #pragma pack(pop)

View File

@ -23,9 +23,9 @@
#include "GSVector.h" #include "GSVector.h"
__aligned(struct, 16) GSVertexSW __aligned(struct, 32) GSVertexSW
{ {
GSVector4 p, t, c; GSVector4 p, t, c, _pad;
__forceinline GSVertexSW() {} __forceinline GSVertexSW() {}
__forceinline GSVertexSW(const GSVertexSW& v) {*this = v;} __forceinline GSVertexSW(const GSVertexSW& v) {*this = v;}

View File

@ -28,30 +28,17 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
GSVertexTrace::GSVertexTrace(const GSState* state) GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state) : m_state(state)
, m_map_sw("VertexTraceSW", NULL)
, m_map_hw9("VertexTraceHW9", NULL)
, m_map_hw11("VertexTraceHW11", NULL)
{ {
} }
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass) void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{ {
m_primclass = primclass; m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4); m_alpha.valid = false;
if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC)) if(m_state->PRIM->TME)
{ {
hash |= 1 << 5;
}
return hash;
}
void GSVertexTrace::UpdateLOD()
{
if(!m_state->PRIM->TME) return;
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1; const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
m_filter.mmag = TEX1.IsMagLinear(); m_filter.mmag = TEX1.IsMagLinear();
@ -92,22 +79,45 @@ void GSVertexTrace::UpdateLOD()
{ {
m_filter.linear = m_filter.mmag | m_filter.mmin; m_filter.linear = m_filter.mmag | m_filter.mmin;
} }
}
} }
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass) uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
{ {
m_map_sw[Hash(primclass)](count, v, m_min, m_max); m_primclass = primclass;
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4);
m_alpha.valid = false; if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC))
{
hash |= 1 << 5;
}
UpdateLOD(); return hash;
} }
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass) GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceSW", NULL)
{ {
m_map_hw9[Hash(primclass)](count, v, m_min, m_max); }
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
GSVertexTrace::Update(vertex, index, count, primclass);
}
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW9", NULL)
{
}
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context; const GSDrawingContext* context = m_state->m_context;
@ -132,16 +142,18 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl
m_max.t *= s; m_max.t *= s;
} }
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); GSVertexTrace::Update(vertex, index, count, primclass);
m_alpha.valid = false;
UpdateLOD();
} }
void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass) GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW11", NULL)
{ {
m_map_hw11[Hash(primclass)](count, v, m_min, m_max); }
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context; const GSDrawingContext* context = m_state->m_context;
@ -166,10 +178,6 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
m_max.t *= s; m_max.t *= s;
} }
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); GSVertexTrace::Update(vertex, index, count, primclass);
m_alpha.valid = false;
UpdateLOD();
} }

View File

@ -29,42 +29,18 @@
class GSState; class GSState;
__aligned(class, 32) GSVertexTrace __aligned(class, 32) GSVertexTrace : public GSAlignedClass<32>
{ {
public: public:
struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000 struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000
struct VertexAlpha {int min, max; bool valid;}; struct VertexAlpha {int min, max; bool valid;};
private: protected:
typedef void (*VertexTracePtr)(int count, const void* v, Vertex& min, Vertex& max);
class CGSW : public GSCodeGenerator
{
public:
CGSW(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW9 : public GSCodeGenerator
{
public:
CGHW9(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW11 : public GSCodeGenerator
{
public:
CGHW11(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr> m_map_sw;
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
const GSState* m_state; const GSState* m_state;
uint32 Hash(GS_PRIM_CLASS primclass); uint32 Hash(GS_PRIM_CLASS primclass);
void UpdateLOD(); typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
static const GSVector4 s_minmax; static const GSVector4 s_minmax;
@ -73,10 +49,7 @@ public:
Vertex m_min; Vertex m_min;
Vertex m_max; Vertex m_max;
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
// source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
VertexAlpha m_alpha;
union union
{ {
@ -92,12 +65,59 @@ public:
GSVector2 m_lod; // x = min, y = max GSVector2 m_lod; // x = min, y = max
public:
GSVertexTrace(const GSState* state); GSVertexTrace(const GSState* state);
virtual ~GSVertexTrace() {}
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass); virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexNull* v, int count, GS_PRIM_CLASS primclass) {}
bool IsLinear() const {return m_filter.linear;} bool IsLinear() const {return m_filter.linear;}
}; };
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceSW(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX9(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX11(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};

View File

@ -26,13 +26,14 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0; static const int _args = 4;
static const int _count = _args + 4; // rcx static const int _count = _args + 4; // rcx
static const int _v = _args + 8; // rdx static const int _vertex = _args + 8; // rdx
static const int _min = _args + 12; // r8 static const int _index = _args + 12; // r8
static const int _max = _args + 16; // r9 static const int _min = _args + 16; // r9
static const int _max = _args + 20; // _args + 4
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
{ {
uint32 primclass = (key >> 0) & 3; uint32 primclass = (key >> 0) & 3;
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break; break;
} }
push(ebx);
// min.p = FLT_MAX; // min.p = FLT_MAX;
// max.p = -FLT_MAX; // max.p = -FLT_MAX;
@ -83,7 +86,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// for(int i = 0; i < count; i += step) { // for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]); mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]); mov(ecx, dword[esp + _count]);
align(16); align(16);
@ -92,18 +96,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
if(tme && !fst && primclass == GS_SPRITE_CLASS) if(tme && !fst && primclass == GS_SPRITE_CLASS)
{ {
vmovaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
} }
for(int j = 0; j < n; j++) for(int j = 0; j < n; j++)
{ {
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1)) if(color && (iip || j == n - 1))
{ {
// min.c = min.c.minv(v[i + j].c); // min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c); // max.c = max.c.maxv(v[i + j].c);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]); vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
vminps(xmm2, xmm0); vminps(xmm2, xmm0);
vmaxps(xmm3, xmm0); vmaxps(xmm3, xmm0);
@ -112,7 +122,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.p = min.p.minv(v[i + j].p); // min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]); vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
vminps(xmm4, xmm0); vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0); vmaxps(xmm5, xmm0);
@ -122,7 +132,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.t = min.t.minv(v[i + j].t); // min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t); // max.t = max.t.maxv(v[i + j].t);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst) if(!fst)
{ {
@ -140,7 +150,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
} }
} }
add(edx, n * sizeof(GSVertexSW)); add(ebx, n * sizeof(uint32));
sub(ecx, n); sub(ecx, n);
jg("loop"); jg("loop");
@ -170,10 +180,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
} }
pop(ebx);
ret(); ret();
} }
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
{ {
uint32 primclass = (key >> 0) & 3; uint32 primclass = (key >> 0) & 3;
@ -189,17 +201,17 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
case GS_POINT_CLASS: case GS_POINT_CLASS:
n = 1; n = 1;
break; break;
case GS_SPRITE_CLASS:
case GS_LINE_CLASS: case GS_LINE_CLASS:
n = 2; n = 2;
break; break;
case GS_TRIANGLE_CLASS: case GS_TRIANGLE_CLASS:
n = 3; n = 3;
break; break;
case GS_SPRITE_CLASS:
n = 6;
break;
} }
push(ebx);
// min.p = FLT_MAX; // min.p = FLT_MAX;
// max.p = -FLT_MAX; // max.p = -FLT_MAX;
@ -226,7 +238,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
// for(int i = 0; i < count; i += step) { // for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]); mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]); mov(ecx, dword[esp + _count]);
align(16); align(16);
@ -235,16 +248,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(tme && !fst && primclass == GS_SPRITE_CLASS) if(tme && !fst && primclass == GS_SPRITE_CLASS)
{ {
vmovaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
} }
for(int j = 0; j < n; j++) for(int j = 0; j < n; j++)
{ {
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p); // min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vminps(xmm4, xmm0); vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0); vmaxps(xmm5, xmm0);
@ -256,7 +275,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(color && (iip || j == n - 1) || tme) if(color && (iip || j == n - 1) || tme)
{ {
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]); vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
} }
if(color && (iip || j == n - 1)) if(color && (iip || j == n - 1))
@ -287,7 +306,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
} }
} }
add(edx, n * sizeof(GSVertexHW9)); add(ebx, n * sizeof(uint32));
sub(ecx, n); sub(ecx, n);
jg("loop"); jg("loop");
@ -330,10 +349,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
} }
pop(ebx);
ret(); ret();
} }
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
{ {
uint32 primclass = (key >> 0) & 3; uint32 primclass = (key >> 0) & 3;
@ -358,6 +379,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break; break;
} }
push(ebx);
// min.p = FLT_MAX; // min.p = FLT_MAX;
// max.p = -FLT_MAX; // max.p = -FLT_MAX;
@ -384,7 +407,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
// for(int i = 0; i < count; i += step) { // for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]); mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]); mov(ecx, dword[esp + _count]);
align(16); align(16);
@ -393,9 +417,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
for(int j = 0; j < n; j++) for(int j = 0; j < n; j++)
{ {
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme) if(color && (iip || j == n - 1) || tme)
{ {
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]); vmovaps(xmm0, ptr[edx + eax]);
} }
if(color && (iip || j == n - 1)) if(color && (iip || j == n - 1))
@ -424,7 +451,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmaxps(xmm7, xmm0); vmaxps(xmm7, xmm0);
} }
vmovdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]); vmovdqa(xmm0, ptr[edx + eax + 16]);
vpmovzxwd(xmm1, xmm0); vpmovzxwd(xmm1, xmm0);
vpsrld(xmm0, 1); vpsrld(xmm0, 1);
@ -435,7 +462,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmaxps(xmm5, xmm1); vmaxps(xmm5, xmm1);
} }
add(edx, n * sizeof(GSVertexHW11)); add(ebx, n * sizeof(uint32));
sub(ecx, n); sub(ecx, n);
jg("loop"); jg("loop");
@ -478,6 +505,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
} }
pop(ebx);
ret(); ret();
} }

View File

@ -26,13 +26,14 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0; static const int _args = 4;
static const int _count = _args + 4; // rcx static const int _count = _args + 4; // rcx
static const int _v = _args + 8; // rdx static const int _vertex = _args + 8; // rdx
static const int _min = _args + 12; // r8 static const int _index = _args + 12; // r8
static const int _max = _args + 16; // r9 static const int _min = _args + 16; // r9
static const int _max = _args + 20; // _args + 4
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
{ {
uint32 primclass = (key >> 0) & 3; uint32 primclass = (key >> 0) & 3;
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break; break;
} }
push(ebx);
// min.p = FLT_MAX; // min.p = FLT_MAX;
// max.p = -FLT_MAX; // max.p = -FLT_MAX;
@ -86,7 +89,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// for(int i = 0; i < count; i += step) { // for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]); mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]); mov(ecx, dword[esp + _count]);
align(16); align(16);
@ -95,18 +99,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
if(tme && !fst && primclass == GS_SPRITE_CLASS) if(tme && !fst && primclass == GS_SPRITE_CLASS)
{ {
movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
} }
for(int j = 0; j < n; j++) for(int j = 0; j < n; j++)
{ {
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1)) if(color && (iip || j == n - 1))
{ {
// min.c = min.c.minv(v[i + j].c); // min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c); // max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]); movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
minps(xmm2, xmm0); minps(xmm2, xmm0);
maxps(xmm3, xmm0); maxps(xmm3, xmm0);
@ -115,7 +125,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.p = min.p.minv(v[i + j].p); // min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]); movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
minps(xmm4, xmm0); minps(xmm4, xmm0);
maxps(xmm5, xmm0); maxps(xmm5, xmm0);
@ -125,7 +135,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.t = min.t.minv(v[i + j].t); // min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t); // max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst) if(!fst)
{ {
@ -144,7 +154,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
} }
} }
add(edx, n * sizeof(GSVertexSW)); add(ebx, n * sizeof(uint32));
sub(ecx, n); sub(ecx, n);
jg("loop"); jg("loop");
@ -174,10 +184,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
} }
pop(ebx);
ret(); ret();
} }
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
{ {
uint32 primclass = (key >> 0) & 3; uint32 primclass = (key >> 0) & 3;
@ -204,6 +216,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
break; break;
} }
push(ebx);
// min.p = FLT_MAX; // min.p = FLT_MAX;
// max.p = -FLT_MAX; // max.p = -FLT_MAX;
@ -233,7 +247,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
// for(int i = 0; i < count; i += step) { // for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]); mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]); mov(ecx, dword[esp + _count]);
align(16); align(16);
@ -242,16 +257,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(tme && !fst && primclass == GS_SPRITE_CLASS) if(tme && !fst && primclass == GS_SPRITE_CLASS)
{ {
movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
} }
for(int j = 0; j < n; j++) for(int j = 0; j < n; j++)
{ {
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p); // min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
minps(xmm4, xmm0); minps(xmm4, xmm0);
maxps(xmm5, xmm0); maxps(xmm5, xmm0);
@ -264,7 +285,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(color && (iip || j == n - 1) || tme) if(color && (iip || j == n - 1) || tme)
{ {
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]); movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
} }
if(color && (iip || j == n - 1)) if(color && (iip || j == n - 1))
@ -295,7 +316,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
} }
} }
add(edx, n * sizeof(GSVertexHW9)); add(ebx, n * sizeof(uint32));
sub(ecx, n); sub(ecx, n);
jg("loop"); jg("loop");
@ -351,10 +372,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
} }
pop(ebx);
ret(); ret();
} }
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
{ {
uint32 primclass = (key >> 0) & 3; uint32 primclass = (key >> 0) & 3;
@ -379,6 +402,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break; break;
} }
push(ebx);
// min.p = FLT_MAX; // min.p = FLT_MAX;
// max.p = -FLT_MAX; // max.p = -FLT_MAX;
@ -408,7 +433,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
// for(int i = 0; i < count; i += step) { // for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]); mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]); mov(ecx, dword[esp + _count]);
align(16); align(16);
@ -417,9 +443,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
for(int j = 0; j < n; j++) for(int j = 0; j < n; j++)
{ {
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme) if(color && (iip || j == n - 1) || tme)
{ {
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]); movaps(xmm0, ptr[edx + eax]);
} }
if(color && (iip || j == n - 1)) if(color && (iip || j == n - 1))
@ -448,7 +477,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
maxps(xmm7, xmm0); maxps(xmm7, xmm0);
} }
movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]); movdqa(xmm0, ptr[edx + eax + 16]);
if(m_cpu.has(util::Cpu::tSSE41)) if(m_cpu.has(util::Cpu::tSSE41))
{ {
@ -469,7 +498,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
maxps(xmm5, xmm1); maxps(xmm5, xmm1);
} }
add(edx, n * sizeof(GSVertexHW11)); add(ebx, n * sizeof(uint32));
sub(ecx, n); sub(ecx, n);
jg("loop"); jg("loop");
@ -525,6 +554,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
} }
pop(ebx);
ret(); ret();
} }

Some files were not shown because too many files have changed in this diff Show More