Merge pull request #1276 from vlj/d3d12

D3d12: Code cleaning
This commit is contained in:
B1ackDaemon 2015-10-30 22:12:52 +02:00
commit b5cf7fba2e
29 changed files with 2189 additions and 2329 deletions

View File

@ -233,12 +233,14 @@ namespace fmt
for (std::size_t buf_size = fixed_buf.size();;)
{
#ifndef _MSC_VER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-security"
#endif
const std::size_t len = std::snprintf(buf_addr, buf_size, fmt, do_unveil(args)...);
#ifndef _MSC_VER
#pragma GCC diagnostic pop
#endif
if (len > INT_MAX)
{
throw std::runtime_error("std::snprintf() failed");

View File

@ -78,14 +78,14 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\D3D12\D3D12.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12Buffer.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12Utils.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12CommonDecompiler.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12Formats.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12FragmentProgramDecompiler.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12GSRender.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12MemoryHelpers.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12PipelineState.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12RenderTargetSets.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12Texture.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12VertexProgramDecompiler.h" />
<ClInclude Include="Emu\RSX\D3D12\d3dx12.h" />
<ClInclude Include="stdafx_d3d12.h" />
@ -93,8 +93,10 @@
<ItemGroup>
<ClCompile Include="Emu\RSX\D3D12\D3D12Buffer.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12CommonDecompiler.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12Formats.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12FragmentProgramDecompiler.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12GSRender.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12MemoryHelpers.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12Overlay.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12PipelineState.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12RenderTargetSets.cpp" />

View File

@ -8,12 +8,6 @@
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\D3D12\D3D12.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12Buffer.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12CommonDecompiler.h">
<Filter>Source Files</Filter>
</ClInclude>
@ -29,9 +23,6 @@
<ClInclude Include="Emu\RSX\D3D12\D3D12RenderTargetSets.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12Texture.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12VertexProgramDecompiler.h">
<Filter>Source Files</Filter>
</ClInclude>
@ -41,6 +32,15 @@
<ClInclude Include="stdafx_d3d12.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12Utils.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12Formats.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\D3D12\D3D12MemoryHelpers.h">
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Emu\RSX\D3D12\D3D12Buffer.cpp">
@ -76,5 +76,11 @@
<ClCompile Include="stdafx_d3d12.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\D3D12\D3D12Formats.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\D3D12\D3D12MemoryHelpers.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -5,57 +5,7 @@
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
inline
bool overlaps(const std::pair<size_t, size_t> &range1, const std::pair<size_t, size_t> &range2)
{
return !(range1.second < range2.first || range2.second < range1.first);
}
std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset)
{
std::vector<VertexBufferFormat> Result;
for (size_t i = 0; i < rsx::limits::vertex_count; ++i)
{
const rsx::data_array_format_info &vertexData = vertex_array_desc[i];
if (!vertexData.size) continue;
u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + i];
u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31);
size_t elementCount = ((vertexData.array) ? vertex_data_size[i] : vertex_data[i].size()) / (vertexData.size * rsx::get_vertex_type_size(vertexData.type));
// If there is a single element, stride is 0, use the size of element instead
size_t stride = vertexData.stride;
size_t elementSize = rsx::get_vertex_type_size(vertexData.type);
size_t start = addr + base_offset;
size_t end = start + elementSize * vertexData.size + (elementCount - 1) * stride - 1;
std::pair<size_t, size_t> range = std::make_pair(start, end);
assert(start < end);
bool isMerged = false;
for (VertexBufferFormat &vbf : Result)
{
if (overlaps(vbf.range, range) && vbf.stride == stride)
{
// Extend buffer if necessary
vbf.range.first = MIN2(vbf.range.first, range.first);
vbf.range.second = MAX2(vbf.range.second, range.second);
vbf.elementCount = MAX2(vbf.elementCount, elementCount);
vbf.attributeId.push_back(i);
isMerged = true;
break;
}
}
if (isMerged)
continue;
VertexBufferFormat newRange = { range, std::vector<size_t>{ i }, elementCount, stride };
Result.emplace_back(newRange);
}
return Result;
}
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc)
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc) noexcept
{
assert(vertex_array_desc.array);
@ -109,8 +59,10 @@ void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_
}
}
namespace
{
template<typename IndexType>
void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index)
void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) noexcept
{
for (u32 i = 0; i < indexCount; ++i)
{
@ -124,7 +76,7 @@ void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_res
}
template<typename IndexType>
void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index)
void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) noexcept
{
for (unsigned i = 0; i < indexCount - 2; i++)
{
@ -154,7 +106,7 @@ void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is
}
template<typename IndexType>
void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index)
void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) noexcept
{
for (unsigned i = 0; i < indexCount / 4; i++)
{
@ -193,9 +145,10 @@ void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primi
}
}
}
}
// Only handle quads and triangle fan now
bool isNativePrimitiveMode(unsigned m_draw_mode)
bool is_primitive_native(unsigned m_draw_mode) noexcept
{
switch (m_draw_mode)
{
@ -215,10 +168,10 @@ bool isNativePrimitiveMode(unsigned m_draw_mode)
}
}
size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count)
size_t get_index_count(unsigned m_draw_mode, unsigned initial_index_count) noexcept
{
// Index count
if (isNativePrimitiveMode(m_draw_mode))
if (is_primitive_native(m_draw_mode))
return initial_index_count;
switch (m_draw_mode)
@ -232,7 +185,17 @@ size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count)
}
}
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned draw_mode, unsigned first, unsigned count)
size_t get_index_type_size(u32 type) noexcept
{
switch (type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: return 2;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: return 4;
default: return 0;
}
}
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned draw_mode, unsigned first, unsigned count) noexcept
{
unsigned short *typedDst = (unsigned short *)(dst);
switch (draw_mode)
@ -261,7 +224,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst,
}
}
void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index)
void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index) noexcept
{
u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_INDEX_ARRAY_ADDRESS], rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] & 0xf);
u32 type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4;

View File

@ -12,36 +12,34 @@ struct VertexBufferFormat
size_t stride;
};
/*
* Detect buffer containing interleaved vertex attribute.
* This minimizes memory upload size.
*/
std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset);
/*
/**
* Write count vertex attributes from index array buffer starting at first, using vertex_array_desc
*/
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc);
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc) noexcept;
/*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
*/
bool isNativePrimitiveMode(unsigned m_draw_mode);
bool is_primitive_native(unsigned m_draw_mode) noexcept;
/*
/**
* Returns a fixed index count for emulated primitive, otherwise returns initial_index_count
*/
size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count);
size_t get_index_count(unsigned m_draw_mode, unsigned initial_index_count) noexcept;
/*
/**
* Returns index type size in byte
*/
size_t get_index_type_size(u32 type) noexcept;
/**
* Write count indexes starting at first to dst buffer.
* Returns min/max index found during the process.
* The function expands index buffer for non native primitive type.
*/
void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index);
void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index) noexcept;
/*
/**
* Write index data needed to emulate non indexed non native primitive mode.
*/
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count);
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept;

View File

@ -6,11 +6,12 @@
#define MAX2(a, b) ((a) > (b)) ? (a) : (b)
namespace
{
/**
* Write data, assume src pixels are packed but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
std::vector<MipmapLevelInfo>
writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
@ -42,7 +43,7 @@ writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heigh
/**
* Write data, assume src pixels are swizzled and but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
std::vector<MipmapLevelInfo>
writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
@ -84,7 +85,7 @@ writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heig
/**
* Write data, assume compressed (DXTCn) format
*/
inline std::vector<MipmapLevelInfo>
std::vector<MipmapLevelInfo>
writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
@ -117,7 +118,7 @@ writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blo
/**
* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
std::vector<MipmapLevelInfo>
write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
@ -158,7 +159,7 @@ write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t h
/**
* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
std::vector<MipmapLevelInfo>
write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
@ -196,7 +197,7 @@ write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t he
/**
* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
std::vector<MipmapLevelInfo>
write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
@ -231,255 +232,117 @@ write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t
return Result;
}
size_t getPlacedTextureStorageSpace(const rsx::texture &texture, size_t rowPitchAlignement)
/**
* A texture is stored as an array of blocks, where a block is a pixel for standard texture
* but is a structure containing several pixels for compressed format
*/
size_t get_texture_block_size(u32 format) noexcept
{
size_t w = texture.width(), h = texture.height();
size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
switch (format)
{
case CELL_GCM_TEXTURE_B8: return 1;
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5: return 2;
case CELL_GCM_TEXTURE_A8R8G8B8: return 4;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return 8;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return 16;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 16;
case CELL_GCM_TEXTURE_G8B8: return 2;
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return 4;
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_X16: return 2;
case CELL_GCM_TEXTURE_Y16_X16: return 4;
case CELL_GCM_TEXTURE_R5G5B5A1: return 2;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return 8;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return 16;
case CELL_GCM_TEXTURE_X32_FLOAT: return 4;
case CELL_GCM_TEXTURE_D1R5G5B5: return 2;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 4;
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
break;
case CELL_GCM_TEXTURE_B8:
blockSizeInByte = 1;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A1R5G5B5:
blockSizeInByte = 2;
blockHeightInPixel = 1, blockWidthInPixel = 1;
break;
case CELL_GCM_TEXTURE_A4R4G4B4:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G6B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
blockSizeInByte = 8;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_G8B8:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G5B5A1:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
blockSizeInByte = 8;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
blockSizeInByte = 16;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X32_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D1R5G5B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
return 0;
}
}
size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
size_t get_texture_block_edge(u32 format) noexcept
{
switch (format)
{
case CELL_GCM_TEXTURE_B8:
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_A8R8G8B8: return 1;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4;
case CELL_GCM_TEXTURE_G8B8:
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
case CELL_GCM_TEXTURE_X32_FLOAT:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_D8R8G8B8: return 1;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 2;
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
return 0;
}
}
}
size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) noexcept
{
size_t w = texture.width(), h = texture.height();
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
size_t blockEdge = get_texture_block_edge(format);
size_t blockSizeInByte = get_texture_block_size(format);
size_t heightInBlocks = (h + blockEdge - 1) / blockEdge;
size_t widthInBlocks = (w + blockEdge - 1) / blockEdge;
size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement);
return rowPitch * heightInBlocks * 2; // * 2 for mipmap levels
}
std::vector<MipmapLevelInfo> uploadPlacedTexture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData)
std::vector<MipmapLevelInfo> upload_placed_texture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData) noexcept
{
size_t w = texture.width(), h = texture.height();
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
break;
case CELL_GCM_TEXTURE_B8:
blockSizeInByte = 1;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A1R5G5B5:
blockSizeInByte = 2;
blockHeightInPixel = 1, blockWidthInPixel = 1;
break;
case CELL_GCM_TEXTURE_A4R4G4B4:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G6B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
blockSizeInByte = 8;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_G8B8:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G5B5A1:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
blockSizeInByte = 8;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
blockSizeInByte = 16;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X32_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D1R5G5B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
}
size_t blockSizeInByte = get_texture_block_size(format);
size_t blockEdge = get_texture_block_edge(format);
size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
size_t heightInBlocks = (h + blockEdge - 1) / blockEdge;
size_t widthInBlocks = (w + blockEdge - 1) / blockEdge;
std::vector<MipmapLevelInfo> mipInfos;
@ -505,8 +368,76 @@ std::vector<MipmapLevelInfo> uploadPlacedTexture(const rsx::texture &texture, si
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.mipmap());
return writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockEdge, heightInBlocks, blockEdge, blockSizeInByte, texture.mipmap());
default:
return writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.mipmap());
}
}
size_t get_texture_size(const rsx::texture &texture) noexcept
{
size_t w = texture.width(), h = texture.height();
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
// TODO: Take mipmaps into account
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
return 0;
case CELL_GCM_TEXTURE_B8:
return w * h;
case CELL_GCM_TEXTURE_A1R5G5B5:
return w * h * 2;
case CELL_GCM_TEXTURE_A4R4G4B4:
return w * h * 2;
case CELL_GCM_TEXTURE_R5G6B5:
return w * h * 2;
case CELL_GCM_TEXTURE_A8R8G8B8:
return w * h * 4;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
return w * h / 6;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
return w * h / 4;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return w * h / 4;
case CELL_GCM_TEXTURE_G8B8:
return w * h * 2;
case CELL_GCM_TEXTURE_R6G5B5:
return w * h * 2;
case CELL_GCM_TEXTURE_DEPTH24_D8:
return w * h * 4;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return w * h * 4;
case CELL_GCM_TEXTURE_DEPTH16:
return w * h * 2;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return w * h * 2;
case CELL_GCM_TEXTURE_X16:
return w * h * 2;
case CELL_GCM_TEXTURE_Y16_X16:
return w * h * 4;
case CELL_GCM_TEXTURE_R5G5B5A1:
return w * h * 2;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return w * h * 8;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return w * h * 16;
case CELL_GCM_TEXTURE_X32_FLOAT:
return w * h * 4;
case CELL_GCM_TEXTURE_D1R5G5B5:
return w * h * 2;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
return w * h * 4;
case CELL_GCM_TEXTURE_D8R8G8B8:
return w * h * 4;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
return w * h * 4;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return w * h * 4;
}
}

View File

@ -14,11 +14,16 @@ struct MipmapLevelInfo
* Get size to store texture in a linear fashion.
* Storage is assumed to use a rowPitchAlignement boundary for every row of texture.
*/
size_t getPlacedTextureStorageSpace(const rsx::texture &texture, size_t rowPitchAlignement);
size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) noexcept;
/**
* Write texture data to textureData.
* Data are not packed, they are stored per rows using rowPitchAlignement.
* Similarly, offset for every mipmaplevel is aligned to rowPitchAlignement boundary.
*/
std::vector<MipmapLevelInfo> uploadPlacedTexture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData);
std::vector<MipmapLevelInfo> upload_placed_texture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData) noexcept;
/**
* Get number of bytes occupied by texture in RSX mem
*/
size_t get_texture_size(const rsx::texture &texture) noexcept;

View File

@ -1,281 +0,0 @@
#pragma once
#include <d3d12.h>
#include <cassert>
#include <wrl/client.h>
#include "Utilities/Log.h"
#include "Emu/Memory/vm.h"
#include "Emu/RSX/GCM.h"
using namespace Microsoft::WRL;
#define SAFE_RELEASE(x) if (x) x->Release();
// From DX12 D3D11On12 Sample (MIT Licensed)
inline void ThrowIfFailed(HRESULT hr)
{
if (FAILED(hr))
{
throw;
}
}
/**
* Send data to dst pointer without polluting cache.
* Usefull to write to mapped memory from upload heap.
*/
inline
void streamToBuffer(void* dst, void* src, size_t sizeInBytes)
{
#pragma omp parallel for
for (int i = 0; i < sizeInBytes / 16; i++)
{
const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16));
_mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr);
}
}
/**
* copy src to dst pointer without polluting cache.
* Usefull to write to mapped memory from upload heap.
*/
inline
void streamBuffer(void* dst, void* src, size_t sizeInBytes)
{
// Assume 64 bytes cache line
int offset = 0;
bool isAligned = !((size_t)src & 15);
#pragma omp parallel for
for (offset = 0; offset < sizeInBytes - 64; offset += 64)
{
char *line = (char*)src + offset;
char *dstline = (char*)dst + offset;
// prefetch next line
_mm_prefetch(line + 16, _MM_HINT_NTA);
__m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line);
_mm_stream_si128((__m128i*)dstline, srcPtr);
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16));
_mm_stream_si128((__m128i*)(dstline + 16), srcPtr);
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32));
_mm_stream_si128((__m128i*)(dstline + 32), srcPtr);
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48));
_mm_stream_si128((__m128i*)(dstline + 48), srcPtr);
}
memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset);
}
/**
* Convert GCM blend operator code to D3D12 one
*/
inline D3D12_BLEND_OP getBlendOp(u16 op)
{
switch (op)
{
case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD;
case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN;
case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX;
default:
case CELL_GCM_FUNC_ADD_SIGNED:
case CELL_GCM_FUNC_REVERSE_ADD_SIGNED:
case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED:
LOG_WARNING(RSX, "Unsupported Blend Op %d", op);
return D3D12_BLEND_OP();
}
}
/**
* Convert GCM blend factor code to D3D12 one
*/
inline D3D12_BLEND getBlendFactor(u16 factor)
{
switch (factor)
{
case CELL_GCM_ZERO: return D3D12_BLEND_ZERO;
case CELL_GCM_ONE: return D3D12_BLEND_ONE;
case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
default:
case CELL_GCM_CONSTANT_COLOR:
case CELL_GCM_ONE_MINUS_CONSTANT_COLOR:
case CELL_GCM_CONSTANT_ALPHA:
case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA:
LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor);
return D3D12_BLEND();
}
}
/**
* Convert GCM blend factor code to D3D12 one for alpha component
*/
inline D3D12_BLEND getBlendFactorAlpha(u16 factor)
{
switch (factor)
{
case CELL_GCM_ZERO: return D3D12_BLEND_ZERO;
case CELL_GCM_ONE: return D3D12_BLEND_ONE;
case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_ALPHA;
case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_ALPHA;
case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_ALPHA;
case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_INV_DEST_ALPHA;
default:
case CELL_GCM_CONSTANT_COLOR:
case CELL_GCM_ONE_MINUS_CONSTANT_COLOR:
case CELL_GCM_CONSTANT_ALPHA:
case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA:
LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor);
return D3D12_BLEND();
}
}
/**
* Convert GCM logic op code to D3D12 one
*/
inline D3D12_LOGIC_OP getLogicOp(u32 op)
{
switch (op)
{
default:
LOG_WARNING(RSX, "Unsupported Logic Op %d", op);
return D3D12_LOGIC_OP();
case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR;
case CELL_GCM_AND: return D3D12_LOGIC_OP_AND;
case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY;
case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP;
case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR;
case CELL_GCM_OR: return D3D12_LOGIC_OP_OR;
case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR;
case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV;
case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT;
case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND;
}
}
/**
* Convert GCM stencil op code to D3D12 one
*/
inline D3D12_STENCIL_OP getStencilOp(u32 op)
{
switch (op)
{
case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP;
case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO;
case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE;
case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR;
case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR;
default:
case CELL_GCM_INCR_WRAP:
case CELL_GCM_DECR_WRAP:
LOG_WARNING(RSX, "Unsupported Stencil Op %d", op);
return D3D12_STENCIL_OP();
}
}
/**
* Convert GCM comparison function code to D3D12 one.
*/
inline D3D12_COMPARISON_FUNC getCompareFunc(u32 op)
{
switch (op)
{
case CELL_GCM_ZERO:
case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER;
case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS;
case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL;
case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL;
case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER;
case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL;
case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS;
default:
LOG_WARNING(RSX, "Unsupported Compare Function %d", op);
return D3D12_COMPARISON_FUNC();
}
}
/**
* Convert GCM texture format to an equivalent one supported by D3D12.
* Destination format may require a byte swap or data conversion.
*/
inline DXGI_FORMAT getTextureDXGIFormat(int format)
{
switch (format)
{
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
return DXGI_FORMAT();
case CELL_GCM_TEXTURE_B8:
return DXGI_FORMAT_R8_UNORM;
case CELL_GCM_TEXTURE_A1R5G5B5:
return DXGI_FORMAT_B5G5R5A1_UNORM;
case CELL_GCM_TEXTURE_A4R4G4B4:
return DXGI_FORMAT_B4G4R4A4_UNORM;
case CELL_GCM_TEXTURE_R5G6B5:
return DXGI_FORMAT_B5G6R5_UNORM;
case CELL_GCM_TEXTURE_A8R8G8B8:
return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
return DXGI_FORMAT_BC1_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
return DXGI_FORMAT_BC2_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return DXGI_FORMAT_BC3_UNORM;
case CELL_GCM_TEXTURE_G8B8:
return DXGI_FORMAT_G8R8_G8B8_UNORM;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_TEXTURE_DEPTH24_D8:
return DXGI_FORMAT_R32_UINT;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return DXGI_FORMAT_R32_FLOAT;
case CELL_GCM_TEXTURE_DEPTH16:
return DXGI_FORMAT_R16_UNORM;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return DXGI_FORMAT_R16_FLOAT;
case CELL_GCM_TEXTURE_X16:
return DXGI_FORMAT_R16_UNORM;
case CELL_GCM_TEXTURE_Y16_X16:
return DXGI_FORMAT_R16G16_UNORM;
case CELL_GCM_TEXTURE_R5G5B5A1:
return DXGI_FORMAT_B5G5R5A1_UNORM;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return DXGI_FORMAT_R16G16B16A16_FLOAT;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return DXGI_FORMAT_R32G32B32A32_FLOAT;
case CELL_GCM_TEXTURE_X32_FLOAT:
return DXGI_FORMAT_R32_FLOAT;
case CELL_GCM_TEXTURE_D1R5G5B5:
return DXGI_FORMAT_B5G5R5A1_UNORM;
case CELL_GCM_TEXTURE_D8R8G8B8:
return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
return DXGI_FORMAT_G8R8_G8B8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return DXGI_FORMAT_R8G8_B8G8_UNORM;
}
}

View File

@ -1,115 +1,31 @@
#include "stdafx_d3d12.h"
#ifdef _WIN32
#include "D3D12Buffer.h"
#include "Utilities/Log.h"
#include "D3D12GSRender.h"
#include "d3dx12.h"
#include "../Common/BufferUtils.h"
#include "D3D12Formats.h"
const int g_vertexCount = 32;
// Where are these type defined ???
static
DXGI_FORMAT getFormat(u8 type, u8 size)
namespace
{
/*static const u32 gl_types[] =
{
GL_SHORT,
GL_FLOAT,
GL_HALF_FLOAT,
GL_UNSIGNED_BYTE,
GL_SHORT,
GL_FLOAT, // Needs conversion
GL_UNSIGNED_BYTE,
};
static const bool gl_normalized[] =
{
GL_TRUE,
GL_FALSE,
GL_FALSE,
GL_TRUE,
GL_FALSE,
GL_TRUE,
GL_FALSE,
};*/
static const DXGI_FORMAT typeX1[] =
{
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
DXGI_FORMAT_R16_FLOAT,
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_R32_FLOAT,
DXGI_FORMAT_R8_UINT
};
static const DXGI_FORMAT typeX2[] =
{
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
DXGI_FORMAT_R16G16_FLOAT,
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_R32G32_FLOAT,
DXGI_FORMAT_R8G8_UINT
};
static const DXGI_FORMAT typeX3[] =
{
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R16G16B16A16_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UINT
};
static const DXGI_FORMAT typeX4[] =
{
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
DXGI_FORMAT_R16G16B16A16_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R32G32B32A32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UINT
};
switch (size)
{
case 1:
return typeX1[type];
case 2:
return typeX2[type];
case 3:
return typeX3[type];
case 4:
return typeX4[type];
default:
LOG_ERROR(RSX, "Wrong size for vertex attrib : %d", size);
return DXGI_FORMAT();
}
}
// D3D12GS member handling buffers
/**
*
*/
static
D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector<u8> &vertex_data, ID3D12Device *device, DataHeap<ID3D12Resource, 65536> &vertexIndexHeap)
D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector<u8> &vertex_data, ID3D12Device *device, data_heap<ID3D12Resource, 65536> &vertex_index_heap)
{
size_t subBufferSize = vertex_data.size();
assert(vertexIndexHeap.canAlloc(subBufferSize));
size_t heapOffset = vertexIndexHeap.alloc(subBufferSize);
size_t buffer_size = vertex_data.size();
assert(vertex_index_heap.can_alloc(buffer_size));
size_t heap_offset = vertex_index_heap.alloc(buffer_size);
void *buffer;
ThrowIfFailed(vertexIndexHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
ThrowIfFailed(vertex_index_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *bufferMap = (char*)buffer + heap_offset;
memcpy(bufferMap, vertex_data.data(), vertex_data.size());
vertexIndexHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
vertex_index_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
return vertex_index_heap.m_heap->GetGPUVirtualAddress() + heap_offset;
}
}
void D3D12GSRender::load_vertex_data(u32 first, u32 count)
@ -122,7 +38,7 @@ void D3D12GSRender::upload_vertex_attributes(const std::vector<std::pair<u32, u3
{
m_vertex_buffer_views.clear();
m_IASet.clear();
size_t inputSlot = 0;
size_t input_slot = 0;
size_t vertex_count = 0;
@ -140,33 +56,35 @@ void D3D12GSRender::upload_vertex_attributes(const std::vector<std::pair<u32, u3
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = element_size * vertex_count;
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
size_t buffer_size = element_size * vertex_count;
assert(m_vertexIndexData.can_alloc(buffer_size));
size_t heap_offset = m_vertexIndexData.alloc(buffer_size);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
for (const auto &range : vertex_ranges)
{
write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info);
bufferMap = (char*)bufferMap + range.second * element_size;
write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info);
mapped_buffer = (char*)mapped_buffer + range.second * element_size;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
m_vertex_buffer_views.push_back(vertex_buffer_view);
m_timers.m_bufferUploadSize += subBufferSize;
m_timers.m_bufferUploadSize += buffer_size;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
@ -188,27 +106,28 @@ void D3D12GSRender::upload_vertex_attributes(const std::vector<std::pair<u32, u3
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = data.size();
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
size_t buffer_size = data.size();
assert(m_vertexIndexData.can_alloc(buffer_size));
size_t heap_offset = m_vertexIndexData.alloc(buffer_size);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
memcpy(bufferMap, data.data(), data.size());
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
memcpy(mapped_buffer, data.data(), data.size());
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
m_vertex_buffer_views.push_back(vertex_buffer_view);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
@ -221,9 +140,9 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count)
m_renderingInfo.m_indexed = true;
}
void D3D12GSRender::setScaleOffset(size_t descriptorIndex)
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
{
float scaleOffsetMat[16] =
float scale_offset_matrix[16] =
{
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, -1.0f, 0.0f, 0.0f,
@ -235,52 +154,53 @@ void D3D12GSRender::setScaleOffset(size_t descriptorIndex)
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
// Scale
scaleOffsetMat[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f);
scaleOffsetMat[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f);
scaleOffsetMat[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
scale_offset_matrix[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f);
scale_offset_matrix[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f);
scale_offset_matrix[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
// Offset
scaleOffsetMat[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f);
scaleOffsetMat[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f));
scaleOffsetMat[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
scale_offset_matrix[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f);
scale_offset_matrix[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f));
scale_offset_matrix[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
scaleOffsetMat[3] /= clip_w / 2.f;
scaleOffsetMat[7] /= clip_h / 2.f;
scale_offset_matrix[3] /= clip_w / 2.f;
scale_offset_matrix[7] /= clip_h / 2.f;
assert(m_constantsData.canAlloc(256));
size_t heapOffset = m_constantsData.alloc(256);
assert(m_constantsData.can_alloc(256));
size_t heap_offset = m_constantsData.alloc(256);
// Scale offset buffer
// Separate constant buffer
void *scaleOffsetMap;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256), &scaleOffsetMap));
streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float));
int isAlphaTested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
void *mapped_buffer;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer));
streamToBuffer((char*)mapped_buffer + heap_offset, scale_offset_matrix, 16 * sizeof(float));
int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF];
memcpy((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int));
memcpy((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &alpha_ref, sizeof(float));
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256));
memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int));
memcpy((char*)mapped_buffer + heap_offset + 17 * sizeof(float), &alpha_ref, sizeof(float));
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256));
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)256;
m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constantsData.m_heap->GetGPUVirtualAddress() + heap_offset,
256
};
m_device->CreateConstantBufferView(&constant_buffer_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
}
void D3D12GSRender::FillVertexShaderConstantsBuffer(size_t descriptorIndex)
void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_index)
{
for (const auto &entry : transform_constants)
local_transform_constants[entry.first] = entry.second;
size_t bufferSize = 512 * 4 * sizeof(float);
size_t buffer_size = 512 * 4 * sizeof(float);
assert(m_constantsData.canAlloc(bufferSize));
size_t heapOffset = m_constantsData.alloc(bufferSize);
assert(m_constantsData.can_alloc(buffer_size));
size_t heap_offset = m_constantsData.alloc(buffer_size);
void *constantsBufferMap;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap));
void *mapped_buffer;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
for (const auto &entry : local_transform_constants)
{
float data[4] = {
@ -289,54 +209,37 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer(size_t descriptorIndex)
entry.second.z,
entry.second.w
};
streamToBuffer((char*)constantsBufferMap + heapOffset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float));
streamToBuffer((char*)mapped_buffer + heap_offset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float));
}
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize));
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constantsData.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size
};
m_device->CreateConstantBufferView(&constant_buffer_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptor_index, g_descriptorStrideSRVCBVUAV));
}
void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex)
void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_index)
{
// Get constant from fragment program
const std::vector<size_t> &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program);
size_t bufferSize = fragmentOffset.size() * 4 * sizeof(float) + 1;
const std::vector<size_t> &fragment_constant_offsets = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program);
size_t buffer_size = fragment_constant_offsets.size() * 4 * sizeof(float) + 1;
// Multiple of 256 never 0
bufferSize = (bufferSize + 255) & ~255;
buffer_size = (buffer_size + 255) & ~255;
assert(m_constantsData.canAlloc(bufferSize));
size_t heapOffset = m_constantsData.alloc(bufferSize);
assert(m_constantsData.can_alloc(buffer_size));
size_t heap_offset = m_constantsData.alloc(buffer_size);
size_t offset = 0;
void *constantsBufferMap;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap));
for (size_t offsetInFP : fragmentOffset)
void *mapped_buffer;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
for (size_t offset_in_fragment_program : fragment_constant_offsets)
{
u32 vector[4];
// Is it assigned by color register in command buffer ?
// TODO : we loop every iteration, we might do better...
bool isCommandBufferSetConstant = false;
/* for (const auto& entry : fragment_constants)
{
size_t fragmentId = entry.first - fragment_program.offset;
if (fragmentId == offsetInFP)
{
isCommandBufferSetConstant = true;
vector[0] = (u32&)entry.second.x;
vector[1] = (u32&)entry.second.y;
vector[2] = (u32&)entry.second.z;
vector[3] = (u32&)entry.second.w;
break;
}
}*/
if (!isCommandBufferSetConstant)
{
auto data = vm::ps3::ptr<u32>::make(fragment_program.addr + (u32)offsetInFP);
auto data = vm::ps3::ptr<u32>::make(fragment_program.addr + (u32)offset_in_fragment_program);
u32 c0 = (data[0] >> 16 | data[0] << 16);
u32 c1 = (data[1] >> 16 | data[1] << 16);
@ -347,60 +250,60 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex)
vector[1] = c1;
vector[2] = c2;
vector[3] = c3;
}
streamToBuffer((char*)constantsBufferMap + heapOffset + offset, vector, 4 * sizeof(u32));
streamToBuffer((char*)mapped_buffer + heap_offset + offset, vector, 4 * sizeof(u32));
offset += 4 * sizeof(u32);
}
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize));
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constantsData.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size
};
m_device->CreateConstantBufferView(&constant_buffer_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptor_index, g_descriptorStrideSRVCBVUAV));
}
void D3D12GSRender::upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist)
void D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list)
{
// Index count
m_renderingInfo.m_count = 0;
for (const auto &pair : m_first_count_pairs)
m_renderingInfo.m_count += getIndexCount(draw_mode, pair.second);
m_renderingInfo.m_count += get_index_count(draw_mode, pair.second);
if (!m_renderingInfo.m_indexed)
{
// Non indexed
upload_vertex_attributes(m_first_count_pairs);
cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (isNativePrimitiveMode(draw_mode))
command_list->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (is_primitive_native(draw_mode))
return;
// Handle non native primitive
// Alloc
size_t subBufferSize = align(m_renderingInfo.m_count * sizeof(u16), 64);
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
size_t buffer_size = align(m_renderingInfo.m_count * sizeof(u16), 64);
assert(m_vertexIndexData.can_alloc(buffer_size));
size_t heap_offset = m_vertexIndexData.alloc(buffer_size);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
size_t first = 0;
for (const auto &pair : m_first_count_pairs)
{
size_t element_count = getIndexCount(draw_mode, pair.second);
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)bufferMap, draw_mode, first, pair.second);
bufferMap = (char*)bufferMap + element_count * sizeof(u16);
size_t element_count = get_index_count(draw_mode, pair.second);
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, draw_mode, (u32)first, (u32)pair.second);
mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16);
first += pair.second;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_INDEX_BUFFER_VIEW indexBufferView = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset,
(UINT)subBufferSize,
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
DXGI_FORMAT_R16_UINT
};
cmdlist->IASetIndexBuffer(&indexBufferView);
command_list->IASetIndexBuffer(&index_buffer_view);
m_renderingInfo.m_indexed = true;
}
else
@ -408,35 +311,35 @@ void D3D12GSRender::upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist)
u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4;
// Index type
size_t indexSize = (indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? 2 : 4;
size_t index_size = get_index_type_size(indexed_type);
// Alloc
size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64);
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
size_t buffer_size = align(m_renderingInfo.m_count * index_size, 64);
assert(m_vertexIndexData.can_alloc(buffer_size));
size_t heap_offset = m_vertexIndexData.alloc(buffer_size);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
u32 min_index = (u32)-1, max_index = 0;
for (const auto &pair : m_first_count_pairs)
{
size_t element_count = getIndexCount(draw_mode, pair.second);
write_index_array_data_to_buffer((char*)bufferMap, draw_mode, pair.first, pair.second, min_index, max_index);
bufferMap = (char*)bufferMap + element_count * indexSize;
size_t element_count = get_index_count(draw_mode, pair.second);
write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index);
mapped_buffer = (char*)mapped_buffer + element_count * index_size;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_INDEX_BUFFER_VIEW indexBufferView = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset,
(UINT)subBufferSize,
(indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
get_index_type(indexed_type)
};
m_timers.m_bufferUploadSize += subBufferSize;
cmdlist->IASetIndexBuffer(&indexBufferView);
m_timers.m_bufferUploadSize += buffer_size;
command_list->IASetIndexBuffer(&index_buffer_view);
m_renderingInfo.m_indexed = true;
upload_vertex_attributes({ std::make_pair(0, max_index + 1) });
cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
command_list->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
}
}

View File

@ -1,7 +0,0 @@
#pragma once
#include <d3d12.h>
#include "Emu/Memory/vm.h"
#include "Emu/RSX/RSXThread.h"
std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(ID3D12Device *device, bool indexedDraw, const rsx::data_array_format_info *vertexData);

View File

@ -71,4 +71,3 @@ std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::str
}
}
#endif

View File

@ -0,0 +1,468 @@
#include "stdafx_d3d12.h"
#ifdef _WIN32
#include "D3D12Formats.h"
#include "D3D12Utils.h"
#include "Emu/RSX/GCM.h"
D3D12_BLEND_OP get_blend_op(u16 op) noexcept
{
switch (op)
{
case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD;
case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN;
case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX;
case CELL_GCM_FUNC_ADD_SIGNED:
case CELL_GCM_FUNC_REVERSE_ADD_SIGNED:
case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED:
unreachable("Unsupported blend op");
}
unreachable("Wrong blend op");
}
D3D12_BLEND get_blend_factor(u16 factor) noexcept
{
switch (factor)
{
case CELL_GCM_ZERO: return D3D12_BLEND_ZERO;
case CELL_GCM_ONE: return D3D12_BLEND_ONE;
case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
case CELL_GCM_CONSTANT_COLOR:
case CELL_GCM_ONE_MINUS_CONSTANT_COLOR:
case CELL_GCM_CONSTANT_ALPHA:
case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA:
unreachable("Unsupported blend color factor");
}
unreachable("Wrong blend color factor");
}
D3D12_BLEND get_blend_factor_alpha(u16 factor) noexcept
{
switch (factor)
{
case CELL_GCM_ZERO: return D3D12_BLEND_ZERO;
case CELL_GCM_ONE: return D3D12_BLEND_ONE;
case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_ALPHA;
case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_ALPHA;
case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_ALPHA;
case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_INV_DEST_ALPHA;
case CELL_GCM_CONSTANT_COLOR:
case CELL_GCM_ONE_MINUS_CONSTANT_COLOR:
case CELL_GCM_CONSTANT_ALPHA:
case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA:
unreachable("Unsupported blend alpha factor");
}
unreachable("Wrong blend alpha factor");
}
/**
* Convert GCM logic op code to D3D12 one
*/
D3D12_LOGIC_OP get_logic_op(u32 op) noexcept
{
switch (op)
{
case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR;
case CELL_GCM_AND: return D3D12_LOGIC_OP_AND;
case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY;
case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP;
case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR;
case CELL_GCM_OR: return D3D12_LOGIC_OP_OR;
case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR;
case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV;
case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT;
case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND;
}
unreachable("Wrong logic op");
}
/**
* Convert GCM stencil op code to D3D12 one
*/
D3D12_STENCIL_OP get_stencil_op(u32 op) noexcept
{
switch (op)
{
case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP;
case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO;
case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE;
case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR;
case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR;
case CELL_GCM_INCR_WRAP:
case CELL_GCM_DECR_WRAP:
unreachable("Unsupported Stencil Op %d");
}
unreachable("Wrong Stencil Op %d");
}
D3D12_COMPARISON_FUNC get_compare_func(u32 op) noexcept
{
switch (op)
{
case CELL_GCM_ZERO:
case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER;
case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS;
case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL;
case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL;
case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER;
case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL;
case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS;
}
unreachable("Wrong compare function");
}
DXGI_FORMAT get_texture_format(int format) noexcept
{
switch (format)
{
case CELL_GCM_TEXTURE_B8:
return DXGI_FORMAT_R8_UNORM;
case CELL_GCM_TEXTURE_A1R5G5B5:
return DXGI_FORMAT_B5G5R5A1_UNORM;
case CELL_GCM_TEXTURE_A4R4G4B4:
return DXGI_FORMAT_B4G4R4A4_UNORM;
case CELL_GCM_TEXTURE_R5G6B5:
return DXGI_FORMAT_B5G6R5_UNORM;
case CELL_GCM_TEXTURE_A8R8G8B8:
return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
return DXGI_FORMAT_BC1_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
return DXGI_FORMAT_BC2_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return DXGI_FORMAT_BC3_UNORM;
case CELL_GCM_TEXTURE_G8B8:
return DXGI_FORMAT_G8R8_G8B8_UNORM;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_TEXTURE_DEPTH24_D8:
return DXGI_FORMAT_R32_UINT;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return DXGI_FORMAT_R32_FLOAT;
case CELL_GCM_TEXTURE_DEPTH16:
return DXGI_FORMAT_R16_UNORM;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return DXGI_FORMAT_R16_FLOAT;
case CELL_GCM_TEXTURE_X16:
return DXGI_FORMAT_R16_UNORM;
case CELL_GCM_TEXTURE_Y16_X16:
return DXGI_FORMAT_R16G16_UNORM;
case CELL_GCM_TEXTURE_R5G5B5A1:
return DXGI_FORMAT_B5G5R5A1_UNORM;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return DXGI_FORMAT_R16G16B16A16_FLOAT;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return DXGI_FORMAT_R32G32B32A32_FLOAT;
case CELL_GCM_TEXTURE_X32_FLOAT:
return DXGI_FORMAT_R32_FLOAT;
case CELL_GCM_TEXTURE_D1R5G5B5:
return DXGI_FORMAT_B5G5R5A1_UNORM;
case CELL_GCM_TEXTURE_D8R8G8B8:
return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
return DXGI_FORMAT_G8R8_G8B8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return DXGI_FORMAT_R8G8_B8G8_UNORM;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
unreachable( "Unimplemented Texture format");
}
unreachable("Wrong Texture format");
}
UINT get_texture_max_aniso(u8 aniso) noexcept
{
switch (aniso)
{
case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1;
case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2;
case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4;
case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6;
case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8;
case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10;
case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12;
case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16;
}
unreachable("Wrong Texture max aniso");
}
D3D12_TEXTURE_ADDRESS_MODE get_texture_wrap_mode(u8 wrap) noexcept
{
switch (wrap)
{
case CELL_GCM_TEXTURE_WRAP: return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
case CELL_GCM_TEXTURE_MIRROR: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
case CELL_GCM_TEXTURE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
case CELL_GCM_TEXTURE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
}
unreachable("Wrong texture wrap mode");
}
namespace
{
void get_min_filter(u8 min_filter, D3D12_FILTER_TYPE &min, D3D12_FILTER_TYPE &mip) noexcept
{
switch (min_filter)
{
case CELL_GCM_TEXTURE_NEAREST:
min = D3D12_FILTER_TYPE_POINT;
mip = D3D12_FILTER_TYPE_POINT;
return;;
case CELL_GCM_TEXTURE_LINEAR:
min = D3D12_FILTER_TYPE_LINEAR;
mip = D3D12_FILTER_TYPE_POINT;
return;
case CELL_GCM_TEXTURE_NEAREST_NEAREST:
min = D3D12_FILTER_TYPE_POINT;
mip = D3D12_FILTER_TYPE_POINT;
return;
case CELL_GCM_TEXTURE_LINEAR_NEAREST:
min = D3D12_FILTER_TYPE_LINEAR;
mip = D3D12_FILTER_TYPE_POINT;
return;
case CELL_GCM_TEXTURE_NEAREST_LINEAR:
min = D3D12_FILTER_TYPE_POINT;
mip = D3D12_FILTER_TYPE_LINEAR;
return;
case CELL_GCM_TEXTURE_LINEAR_LINEAR:
min = D3D12_FILTER_TYPE_LINEAR;
mip = D3D12_FILTER_TYPE_LINEAR;
return;
case CELL_GCM_TEXTURE_CONVOLUTION_MIN:
unreachable("Unsupported min filter");
}
unreachable("Wrong min filter");
}
D3D12_FILTER_TYPE get_mag_filter(u8 mag_filter) noexcept
{
switch (mag_filter)
{
case CELL_GCM_TEXTURE_NEAREST: return D3D12_FILTER_TYPE_POINT;
case CELL_GCM_TEXTURE_LINEAR: return D3D12_FILTER_TYPE_LINEAR;
}
unreachable("Wrong mag filter");
}
}
D3D12_FILTER get_texture_filter(u8 min_filter, u8 mag_filter) noexcept
{
D3D12_FILTER_TYPE min, mip;
get_min_filter(min_filter, min, mip);
D3D12_FILTER_TYPE mag = get_mag_filter(mag_filter);
return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD);
}
D3D12_PRIMITIVE_TOPOLOGY get_primitive_topology(u8 draw_mode) noexcept
{
switch (draw_mode)
{
case CELL_GCM_PRIMITIVE_POINTS: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
case CELL_GCM_PRIMITIVE_LINES: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
case CELL_GCM_PRIMITIVE_LINE_LOOP: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
case CELL_GCM_PRIMITIVE_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
case CELL_GCM_PRIMITIVE_TRIANGLES: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
// Emulated
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
case CELL_GCM_PRIMITIVE_QUADS: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
}
unreachable("Wrong draw mode");
}
D3D12_PRIMITIVE_TOPOLOGY_TYPE get_primitive_topology_type(u8 draw_mode) noexcept
{
switch (draw_mode)
{
case CELL_GCM_PRIMITIVE_POINTS: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
case CELL_GCM_PRIMITIVE_LINES:
case CELL_GCM_PRIMITIVE_LINE_LOOP:
case CELL_GCM_PRIMITIVE_LINE_STRIP: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
case CELL_GCM_PRIMITIVE_TRIANGLES:
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
case CELL_GCM_PRIMITIVE_QUADS:
// unsupported
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
}
unreachable("Wrong draw mode");
}
DXGI_FORMAT get_color_surface_format(u8 format) noexcept
{
switch (format)
{
case CELL_GCM_SURFACE_A8R8G8B8: return DXGI_FORMAT_R8G8B8A8_UNORM;
case CELL_GCM_SURFACE_F_W16Z16Y16X16: return DXGI_FORMAT_R16G16B16A16_FLOAT;
}
unreachable("Wrong color surface format");
}
DXGI_FORMAT get_depth_stencil_surface_format(u8 format) noexcept
{
switch (format)
{
case CELL_GCM_SURFACE_Z16: return DXGI_FORMAT_D16_UNORM;
case CELL_GCM_SURFACE_Z24S8: return DXGI_FORMAT_D24_UNORM_S8_UINT;
}
unreachable("Wrong depth stencil surface format");
}
DXGI_FORMAT get_depth_stencil_surface_clear_format(u8 format) noexcept
{
switch (format)
{
case CELL_GCM_SURFACE_Z16: return DXGI_FORMAT_D16_UNORM;
case CELL_GCM_SURFACE_Z24S8: return DXGI_FORMAT_D24_UNORM_S8_UINT;
}
unreachable("Wrong depth stencil surface format");
}
DXGI_FORMAT get_depth_typeless_surface_format(u8 format) noexcept
{
switch (format)
{
case CELL_GCM_SURFACE_Z16: return DXGI_FORMAT_R16_TYPELESS;
case CELL_GCM_SURFACE_Z24S8: return DXGI_FORMAT_R24G8_TYPELESS;
}
unreachable("Wrong depth stencil surface format");
}
BOOL get_front_face_ccw(u32 set_front_face_value) noexcept
{
switch (set_front_face_value)
{
case CELL_GCM_CW: return FALSE;
default: // Disgaea 3 pass some garbage value at startup, this is needed to survive.
case CELL_GCM_CCW: return TRUE;
}
unreachable("Wrong front face value");
}
DXGI_FORMAT get_index_type(u8 index_type) noexcept
{
switch (index_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: return DXGI_FORMAT_R16_UINT;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: return DXGI_FORMAT_R32_UINT;
}
unreachable("Wrong index type");
}
DXGI_FORMAT get_vertex_attribute_format(u8 type, u8 size) noexcept
{
switch (type)
{
case CELL_GCM_VERTEX_S1:
{
switch (size)
{
case 1: return DXGI_FORMAT_R16_SNORM;
case 2: return DXGI_FORMAT_R16G16_SNORM;
case 3: return DXGI_FORMAT_R16G16B16A16_SNORM; // No 3 channel type
case 4: return DXGI_FORMAT_R16G16B16A16_SNORM;
}
unreachable("Wrong type size");
}
case CELL_GCM_VERTEX_F:
{
switch (size)
{
case 1: return DXGI_FORMAT_R32_FLOAT;
case 2: return DXGI_FORMAT_R32G32_FLOAT;
case 3: return DXGI_FORMAT_R32G32B32_FLOAT;
case 4: return DXGI_FORMAT_R32G32B32A32_FLOAT;
}
unreachable("Wrong type size");
}
case CELL_GCM_VERTEX_SF:
{
switch (size)
{
case 1: return DXGI_FORMAT_R16_FLOAT;
case 2: return DXGI_FORMAT_R16G16_FLOAT;
case 3: return DXGI_FORMAT_R16G16B16A16_FLOAT; // No 3 channel type
case 4: return DXGI_FORMAT_R16G16B16A16_FLOAT;
}
unreachable("Wrong type size");
}
case CELL_GCM_VERTEX_UB:
{
switch (size)
{
case 1: return DXGI_FORMAT_R8_UNORM;
case 2: return DXGI_FORMAT_R8G8_UNORM;
case 3: return DXGI_FORMAT_R8G8B8A8_UNORM; // No 3 channel type
case 4: return DXGI_FORMAT_R8G8B8A8_UNORM;
}
unreachable("Wrong type size");
}
case CELL_GCM_VERTEX_S32K:
{
switch (size)
{
case 1: return DXGI_FORMAT_R16_SINT;
case 2: return DXGI_FORMAT_R16G16_SINT;
case 3: return DXGI_FORMAT_R16G16B16A16_SINT; // No 3 channel type
case 4: return DXGI_FORMAT_R16G16B16A16_SINT;
}
unreachable("Wrong type size");
}
case CELL_GCM_VERTEX_CMP:
{
switch (size)
{
case 1: return DXGI_FORMAT_R32_FLOAT;
case 2: return DXGI_FORMAT_R32G32_FLOAT;
case 3: return DXGI_FORMAT_R32G32B32_FLOAT;
case 4: return DXGI_FORMAT_R32G32B32A32_FLOAT;
}
unreachable("Wrong type size");
}
case CELL_GCM_VERTEX_UB256:
{
switch (size)
{
case 1: return DXGI_FORMAT_R8_UINT;
case 2: return DXGI_FORMAT_R8G8_UINT;
case 3: return DXGI_FORMAT_R8G8B8A8_UINT; // No 3 channel type
case 4: return DXGI_FORMAT_R8G8B8A8_UINT;
}
unreachable("Wrong type size");
}
}
unreachable("Wrong type");
}
#endif

View File

@ -0,0 +1,99 @@
#pragma once
#include <d3d12.h>
/**
* Convert GCM blend operator code to D3D12 one
*/
D3D12_BLEND_OP get_blend_op(u16 op) noexcept;
/**
* Convert GCM blend factor code to D3D12 one
*/
D3D12_BLEND get_blend_factor(u16 factor) noexcept;
/**
* Convert GCM blend factor code to D3D12 one for alpha component
*/
D3D12_BLEND get_blend_factor_alpha(u16 factor) noexcept;
/**
* Convert GCM logic op code to D3D12 one
*/
D3D12_LOGIC_OP get_logic_op(u32 op) noexcept;
/**
* Convert GCM stencil op code to D3D12 one
*/
D3D12_STENCIL_OP get_stencil_op(u32 op) noexcept;
/**
* Convert GCM comparison function code to D3D12 one.
*/
D3D12_COMPARISON_FUNC get_compare_func(u32 op) noexcept;
/**
* Convert GCM texture format to an equivalent one supported by D3D12.
* Destination format may require a byte swap or data conversion.
*/
DXGI_FORMAT get_texture_format(int format) noexcept;
/**
* Convert texture aniso value to UINT.
*/
UINT get_texture_max_aniso(u8 aniso) noexcept;
/**
* Convert texture wrap mode to D3D12_TEXTURE_ADDRESS_MODE
*/
D3D12_TEXTURE_ADDRESS_MODE get_texture_wrap_mode(u8 wrap) noexcept;
/**
* Convert minify and magnify filter to D3D12_FILTER
*/
D3D12_FILTER get_texture_filter(u8 min_filter, u8 mag_filter) noexcept;
/**
* Convert draw mode to D3D12_PRIMITIVE_TOPOLOGY
*/
D3D12_PRIMITIVE_TOPOLOGY get_primitive_topology(u8 draw_mode) noexcept;
/**
* Convert draw mode to D3D12_PRIMITIVE_TOPOLOGY_TYPE
*/
D3D12_PRIMITIVE_TOPOLOGY_TYPE get_primitive_topology_type(u8 draw_mode) noexcept;
/**
* Convert color surface format to DXGI_FORMAT
*/
DXGI_FORMAT get_color_surface_format(u8 format) noexcept;
/**
* Convert depth stencil surface format to DXGI_FORMAT
*/
DXGI_FORMAT get_depth_stencil_surface_format(u8 format) noexcept;
/**
*Convert depth stencil surface format to DXGI_FORMAT suited for clear value
*/
DXGI_FORMAT get_depth_stencil_surface_clear_format(u8 format) noexcept;
/**
* Convert depth surface format to DXGI_FORMAT using typeless for stencil
*/
DXGI_FORMAT get_depth_typeless_surface_format(u8 format) noexcept;
/**
* Convert front face value to bool value telling wheter front face is counterclockwise or not
*/
BOOL get_front_face_ccw(u32 set_front_face_value) noexcept;
/**
* Convert index type to DXGI_FORMAT
*/
DXGI_FORMAT get_index_type(u8 index_type) noexcept;
/**
* Convert vertex attribute format and size to DXGI_FORMAT
*/
DXGI_FORMAT get_vertex_attribute_format(u8 type, u8 size) noexcept;

View File

@ -62,6 +62,7 @@ void D3D12FragmentDecompiler::insertIntputs(std::stringstream & OS)
OS << " float4 tc6 : TEXCOORD6;" << std::endl;
OS << " float4 tc7 : TEXCOORD7;" << std::endl;
OS << " float4 tc8 : TEXCOORD8;" << std::endl;
OS << " float4 tc9 : TEXCOORD9;" << std::endl;
OS << "};" << std::endl;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
#pragma once
#include "D3D12.h"
#include "D3D12Utils.h"
#include "Utilities/rPlatform.h" // only for rImage
#include "Utilities/File.h"
#include "Utilities/Log.h"
@ -10,8 +10,8 @@
#include "D3D12RenderTargetSets.h"
#include "D3D12PipelineState.h"
#include "D3D12Buffer.h"
#include "d3dx12.h"
#include "D3D12MemoryHelpers.h"
/**
@ -36,244 +36,6 @@
* are not currently correctly signaled which leads to deadlock.
*/
template<typename T>
struct InitHeap
{
static T* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags);
};
template<>
struct InitHeap<ID3D12Heap>
{
static ID3D12Heap* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
ID3D12Heap *result;
D3D12_HEAP_DESC heapDesc = {};
heapDesc.SizeInBytes = heapSize;
heapDesc.Properties.Type = type;
heapDesc.Flags = flags;
ThrowIfFailed(device->CreateHeap(&heapDesc, IID_PPV_ARGS(&result)));
return result;
}
};
template<>
struct InitHeap<ID3D12Resource>
{
static ID3D12Resource* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
ID3D12Resource *result;
D3D12_HEAP_PROPERTIES heapProperties = {};
heapProperties.Type = type;
ThrowIfFailed(device->CreateCommittedResource(&heapProperties,
flags,
&CD3DX12_RESOURCE_DESC::Buffer(heapSize),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&result))
);
return result;
}
};
/**
* Wrapper around a ID3D12Resource or a ID3D12Heap.
* Acts as a ring buffer : hold a get and put pointers,
* put pointer is used as storage space offset
* and get is used as beginning of in use data space.
* This wrapper checks that put pointer doesn't cross get one.
*/
template<typename T, size_t Alignment>
struct DataHeap
{
T *m_heap;
size_t m_size;
size_t m_putPos; // Start of free space
size_t m_getPos; // End of free space
void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
m_size = heapSize;
m_heap = InitHeap<T>::Init(device, heapSize, type, flags);
m_putPos = 0;
m_getPos = heapSize - 1;
}
/**
* Does alloc cross get position ?
*/
bool canAlloc(size_t size) const
{
size_t allocSize = align(size, Alignment);
size_t currentGetPos = m_getPos;
if (m_putPos + allocSize < m_size)
{
// range before get
if (m_putPos + allocSize < m_getPos)
return true;
// range after get
if (m_putPos > m_getPos)
return true;
return false;
}
else
{
// ..]....[..get..
if (m_putPos < m_getPos)
return false;
// ..get..]...[...
// Actually all resources extending beyond heap space starts at 0
if (allocSize > m_getPos)
return false;
return true;
}
}
size_t alloc(size_t size)
{
assert(canAlloc(size));
size_t allocSize = align(size, Alignment);
if (m_putPos + allocSize < m_size)
{
size_t oldPutPos = m_putPos;
m_putPos += allocSize;
return oldPutPos;
}
else
{
m_putPos = allocSize;
return 0;
}
}
void Release()
{
m_heap->Release();
}
/**
* return current putpos - 1
*/
size_t getCurrentPutPosMinusOne() const
{
return (m_putPos - 1 > 0) ? m_putPos - 1 : m_size - 1;
}
};
struct TextureEntry
{
int m_format;
size_t m_width;
size_t m_height;
size_t m_mipmap;
bool m_isDirty;
TextureEntry() : m_format(0), m_width(0), m_height(0), m_isDirty(true)
{}
TextureEntry(int f, size_t w, size_t h, size_t m) : m_format(f), m_width(w), m_height(h), m_isDirty(false)
{}
bool operator==(const TextureEntry &other)
{
return (m_format == other.m_format && m_width == other.m_width && m_height == other.m_height);
}
};
/**
* Manages cache of data (texture/vertex/index)
*/
struct DataCache
{
private:
/**
* Mutex protecting m_dataCache access
* Memory protection fault catch can be generated by any thread and
* modifies it.
*/
std::mutex mut;
std::unordered_map<u64, std::pair<TextureEntry, ComPtr<ID3D12Resource>> > m_dataCache; // Storage
std::list <std::tuple<u64, u32, u32> > m_protectedRange; // address, start of protected range, size of protected range
public:
void storeAndProtectData(u64 key, u32 start, size_t size, int format, size_t w, size_t h, size_t m, ComPtr<ID3D12Resource> data)
{
std::lock_guard<std::mutex> lock(mut);
m_dataCache[key] = std::make_pair(TextureEntry(format, w, h, m), data);
protectData(key, start, size);
}
/**
* Make memory from start to start + size write protected.
* Associate key to this range so that when a write is detected, data at key is marked dirty.
*/
void protectData(u64 key, u32 start, size_t size)
{
/// align start to 4096 byte
u32 protected_range_start = align(start, 4096);
u32 protected_range_size = (u32)align(size, 4096);
m_protectedRange.push_back(std::make_tuple(key, protected_range_start, protected_range_size));
vm::page_protect(protected_range_start, protected_range_size, 0, 0, vm::page_writable);
}
/// remove all data containing addr from cache, unprotect them. Returns false if no data is modified.
bool invalidateAddress(u32 addr)
{
bool handled = false;
auto It = m_protectedRange.begin(), E = m_protectedRange.end();
for (; It != E;)
{
auto currentIt = It;
++It;
auto protectedTexture = *currentIt;
u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture);
if (addr >= protectedRangeStart && addr <= protectedRangeSize + protectedRangeStart)
{
std::lock_guard<std::mutex> lock(mut);
u64 texadrr = std::get<0>(protectedTexture);
m_dataCache[texadrr].first.m_isDirty = true;
vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0);
m_protectedRange.erase(currentIt);
handled = true;
}
}
return handled;
}
std::pair<TextureEntry, ComPtr<ID3D12Resource> > *findDataIfAvailable(u64 key)
{
std::lock_guard<std::mutex> lock(mut);
auto It = m_dataCache.find(key);
if (It == m_dataCache.end())
return nullptr;
return &It->second;
}
void unprotedAll()
{
std::lock_guard<std::mutex> lock(mut);
for (auto &protectedTexture : m_protectedRange)
{
u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture);
vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0);
}
}
/**
* Remove data stored at key, and returns a ComPtr owning it.
* The caller is responsible for releasing the ComPtr.
*/
ComPtr<ID3D12Resource> removeFromCache(u64 key)
{
auto result = m_dataCache[key].second;
m_dataCache.erase(key);
return result;
}
};
/**
* Structure used to load/unload D3D12 lib.
*/
@ -299,7 +61,7 @@ private:
ComPtr<ID3D12RootSignature> m_rootSignatures[17];
// TODO: Use a tree structure to parse more efficiently
DataCache m_textureCache;
data_cache m_textureCache;
bool invalidateAddress(u32 addr);
// Copy of RTT to be used as texture
@ -325,7 +87,7 @@ private:
size_t m_flipDuration;
} m_timers;
void ResetTimer();
void reset_timer();
struct Shader
{
@ -351,67 +113,18 @@ private:
ID3D12RootSignature *m_convertRootSignature;
void initConvertShader();
/**
* Stores data that are "ping ponged" between frame.
* For instance command allocator : maintains 2 command allocators and
* swap between them when frame is flipped.
*/
struct ResourceStorage
{
bool m_inUse; // False until command list has been populated at least once
ComPtr<ID3D12Fence> m_frameFinishedFence;
UINT64 m_fenceValue;
HANDLE m_frameFinishedHandle;
// Pointer to device, not owned by ResourceStorage
ID3D12Device *m_device;
ComPtr<ID3D12CommandAllocator> m_commandAllocator;
ComPtr<ID3D12GraphicsCommandList> m_commandList;
// Descriptor heap
ComPtr<ID3D12DescriptorHeap> m_descriptorsHeap;
size_t m_descriptorsHeapIndex;
// Sampler heap
ComPtr<ID3D12DescriptorHeap> m_samplerDescriptorHeap[2];
size_t m_samplerDescriptorHeapIndex;
size_t m_currentSamplerIndex;
ComPtr<ID3D12Resource> m_RAMFramebuffer;
// List of resources that can be freed after frame is flipped
std::vector<ComPtr<ID3D12Resource> > m_singleFrameLifetimeResources;
/// Texture that were invalidated
std::list<ComPtr<ID3D12Resource> > m_dirtyTextures;
size_t m_getPosConstantsHeap;
size_t m_getPosVertexIndexHeap;
size_t m_getPosTextureUploadHeap;
size_t m_getPosReadbackHeap;
size_t m_getPosUAVHeap;
void Reset();
void Init(ID3D12Device *device);
void setNewCommandList();
void WaitAndClean();
void Release();
};
ResourceStorage m_perFrameStorage[2];
ResourceStorage &getCurrentResourceStorage();
ResourceStorage &getNonCurrentResourceStorage();
resource_storage m_perFrameStorage[2];
resource_storage &getCurrentResourceStorage();
resource_storage &getNonCurrentResourceStorage();
// Constants storage
DataHeap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_constantsData;
data_heap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_constantsData;
// Vertex storage
DataHeap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_vertexIndexData;
data_heap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_vertexIndexData;
// Texture storage
DataHeap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_textureUploadData;
DataHeap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_UAVHeap;
DataHeap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_readbackResources;
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_textureUploadData;
data_heap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_UAVHeap;
data_heap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_readbackResources;
struct
{
@ -432,8 +145,6 @@ private:
// Used to fill unused texture slot
ID3D12Resource *m_dummyTexture;
size_t m_lastWidth, m_lastHeight, m_lastDepth;
// Store previous fbo addresses to detect RTT config changes.
u32 m_previous_address_a;
u32 m_previous_address_b;
@ -441,27 +152,19 @@ private:
u32 m_previous_address_d;
u32 m_previous_address_z;
public:
u32 m_draw_frames;
u32 m_skip_frames;
D3D12GSRender();
virtual ~D3D12GSRender();
void semaphore_PGRAPH_texture_read_release();
void semaphore_PGRAPH_backend_release();
private:
void InitD2DStructures();
void ReleaseD2DStructures();
ID3D12Resource *writeColorBuffer(ID3D12Resource *RTT, ID3D12GraphicsCommandList *cmdlist);
void init_d2d_structures();
void release_d2d_structures();
bool LoadProgram();
bool load_program();
/**
* Create vertex and index buffers (if needed) and set them to cmdlist.
* Non native primitive type are emulated by index buffers expansion.
*/
void upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist);
void upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list);
std::vector<std::pair<u32, u32> > m_first_count_pairs;
/**
@ -471,31 +174,37 @@ private:
*/
void upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges);
void setScaleOffset(size_t descriptorIndex);
void FillVertexShaderConstantsBuffer(size_t descriptorIndex);
void FillPixelShaderConstantsBuffer(size_t descriptorIndex);
void upload_and_bind_scale_offset_matrix(size_t descriptor_index);
void upload_and_bind_vertex_shader_constants(size_t descriptor_index);
void upload_and_bind_fragment_shader_constants(size_t descriptorIndex);
/**
* Fetch all textures recorded in the state in the render target cache and in the texture cache.
* If a texture is not cached, populate cmdlist with uploads command.
* Create necessary resource view/sampler descriptors in the per frame storage struct.
* returns the number of texture uploaded.
* If the count of enabled texture is below texture_count, fills with dummy texture and sampler.
*/
size_t UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex);
void upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t descriptor_index, size_t texture_count);
/**
* Creates render target if necessary.
* Populate cmdlist with render target state change (from RTT to generic read for previous rtt,
* from generic to rtt for rtt in cache).
*/
void PrepareRenderTargets(ID3D12GraphicsCommandList *cmdlist);
void prepare_render_targets(ID3D12GraphicsCommandList *command_list);
/**
* Render D2D overlay if enabled on top of the backbuffer.
*/
void renderOverlay();
void render_overlay();
void clear_surface(u32 arg);
/**
* Copy currently bound current target to the dma location affecting them.
* NOTE: We should also copy previously bound rtts.
*/
void copy_render_target_to_dma_location();
protected:
virtual void onexit_thread() override;
virtual bool domethod(u32 cmd, u32 arg) override;

View File

@ -0,0 +1,131 @@
#include "stdafx_d3d12.h"
#ifdef _WIN32
#include "D3D12MemoryHelpers.h"
void data_cache::store_and_protect_data(u64 key, u32 start, size_t size, int format, size_t w, size_t h, size_t m, ComPtr<ID3D12Resource> data) noexcept
{
std::lock_guard<std::mutex> lock(m_mut);
m_address_to_data[key] = std::make_pair(texture_entry(format, w, h, m), data);
protect_data(key, start, size);
}
void data_cache::protect_data(u64 key, u32 start, size_t size) noexcept
{
/// align start to 4096 byte
u32 protected_range_start = align(start, 4096);
u32 protected_range_size = (u32)align(size, 4096);
m_protected_ranges.push_back(std::make_tuple(key, protected_range_start, protected_range_size));
vm::page_protect(protected_range_start, protected_range_size, 0, 0, vm::page_writable);
}
bool data_cache::invalidate_address(u32 addr) noexcept
{
bool handled = false;
auto It = m_protected_ranges.begin(), E = m_protected_ranges.end();
for (; It != E;)
{
auto currentIt = It;
++It;
auto protectedTexture = *currentIt;
u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture);
if (addr >= protectedRangeStart && addr <= protectedRangeSize + protectedRangeStart)
{
std::lock_guard<std::mutex> lock(m_mut);
u64 texadrr = std::get<0>(protectedTexture);
m_address_to_data[texadrr].first.m_is_dirty = true;
vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0);
m_protected_ranges.erase(currentIt);
handled = true;
}
}
return handled;
}
std::pair<texture_entry, ComPtr<ID3D12Resource> > *data_cache::find_data_if_available(u64 key) noexcept
{
std::lock_guard<std::mutex> lock(m_mut);
auto It = m_address_to_data.find(key);
if (It == m_address_to_data.end())
return nullptr;
return &It->second;
}
void data_cache::unprotect_all() noexcept
{
std::lock_guard<std::mutex> lock(m_mut);
for (auto &protectedTexture : m_protected_ranges)
{
u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture);
vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0);
}
}
ComPtr<ID3D12Resource> data_cache::remove_from_cache(u64 key) noexcept
{
auto result = m_address_to_data[key].second;
m_address_to_data.erase(key);
return result;
}
void resource_storage::reset()
{
descriptors_heap_index = 0;
current_sampler_index = 0;
sampler_descriptors_heap_index = 0;
ThrowIfFailed(command_allocator->Reset());
set_new_command_list();
}
void resource_storage::set_new_command_list()
{
ThrowIfFailed(command_list->Reset(command_allocator.Get(), nullptr));
}
void resource_storage::init(ID3D12Device *device)
{
in_use = false;
m_device = device;
ram_framebuffer = nullptr;
// Create a global command allocator
ThrowIfFailed(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(command_allocator.GetAddressOf())));
ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator.Get(), nullptr, IID_PPV_ARGS(command_list.GetAddressOf())));
ThrowIfFailed(command_list->Close());
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
ThrowIfFailed(device->CreateDescriptorHeap(&descriptor_heap_desc, IID_PPV_ARGS(&descriptors_heap)));
D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
ThrowIfFailed(device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&sampler_descriptor_heap[0])));
ThrowIfFailed(device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&sampler_descriptor_heap[1])));
frame_finished_handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS);
fence_value = 0;
ThrowIfFailed(device->CreateFence(fence_value++, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(frame_finished_fence.GetAddressOf())));
}
void resource_storage::wait_and_clean()
{
if (in_use)
WaitForSingleObjectEx(frame_finished_handle, INFINITE, FALSE);
else
ThrowIfFailed(command_list->Close());
reset();
dirty_textures.clear();
ram_framebuffer = nullptr;
}
void resource_storage::release()
{
dirty_textures.clear();
// NOTE: Should be released only after gfx pipeline last command has been finished.
CloseHandle(frame_finished_handle);
}
#endif

View File

@ -0,0 +1,238 @@
#pragma once
#include "D3D12Utils.h"
#include "d3dx12.h"
template<typename T>
struct init_heap
{
static T* init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags);
};
template<>
struct init_heap<ID3D12Heap>
{
static ID3D12Heap* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
ID3D12Heap *result;
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = heap_size;
heap_desc.Properties.Type = type;
heap_desc.Flags = flags;
ThrowIfFailed(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&result)));
return result;
}
};
template<>
struct init_heap<ID3D12Resource>
{
static ID3D12Resource* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
ID3D12Resource *result;
D3D12_HEAP_PROPERTIES heap_properties = {};
heap_properties.Type = type;
ThrowIfFailed(device->CreateCommittedResource(&heap_properties,
flags,
&CD3DX12_RESOURCE_DESC::Buffer(heap_size),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&result))
);
return result;
}
};
/**
* Wrapper around a ID3D12Resource or a ID3D12Heap.
* Acts as a ring buffer : hold a get and put pointers,
* put pointer is used as storage space offset
* and get is used as beginning of in use data space.
* This wrapper checks that put pointer doesn't cross get one.
*/
template<typename T, size_t alignment>
struct data_heap
{
T *m_heap;
size_t m_size;
size_t m_put_pos; // Start of free space
size_t m_get_pos; // End of free space
void init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
m_size = heap_size;
m_heap = init_heap<T>::init(device, heap_size, type, flags);
m_put_pos = 0;
m_get_pos = heap_size - 1;
}
/**
* Does alloc cross get position ?
*/
bool can_alloc(size_t size) const noexcept
{
size_t alloc_size = align(size, alignment);
if (m_put_pos + alloc_size < m_size)
{
// range before get
if (m_put_pos + alloc_size < m_get_pos)
return true;
// range after get
if (m_put_pos > m_get_pos)
return true;
return false;
}
else
{
// ..]....[..get..
if (m_put_pos < m_get_pos)
return false;
// ..get..]...[...
// Actually all resources extending beyond heap space starts at 0
if (alloc_size > m_get_pos)
return false;
return true;
}
}
size_t alloc(size_t size) noexcept
{
assert(can_alloc(size));
size_t alloc_size = align(size, alignment);
if (m_put_pos + alloc_size < m_size)
{
size_t old_put_pos = m_put_pos;
m_put_pos += alloc_size;
return old_put_pos;
}
else
{
m_put_pos = alloc_size;
return 0;
}
}
void release() noexcept
{
m_heap->Release();
}
/**
* return current putpos - 1
*/
size_t get_current_put_pos_minus_one() const noexcept
{
return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1;
}
};
struct texture_entry
{
int m_format;
size_t m_width;
size_t m_height;
size_t m_mipmap;
bool m_is_dirty;
texture_entry() : m_format(0), m_width(0), m_height(0), m_is_dirty(true)
{}
texture_entry(int f, size_t w, size_t h, size_t m) : m_format(f), m_width(w), m_height(h), m_is_dirty(false)
{}
bool operator==(const texture_entry &other)
{
return (m_format == other.m_format && m_width == other.m_width && m_height == other.m_height);
}
};
/**
* Manages cache of data (texture/vertex/index)
*/
struct data_cache
{
private:
/**
* Mutex protecting m_dataCache access
* Memory protection fault catch can be generated by any thread and
* modifies it.
*/
std::mutex m_mut;
std::unordered_map<u64, std::pair<texture_entry, ComPtr<ID3D12Resource>> > m_address_to_data; // Storage
std::list <std::tuple<u64, u32, u32> > m_protected_ranges; // address, start of protected range, size of protected range
public:
void store_and_protect_data(u64 key, u32 start, size_t size, int format, size_t w, size_t h, size_t m, ComPtr<ID3D12Resource> data) noexcept;
/**
* Make memory from start to start + size write protected.
* Associate key to this range so that when a write is detected, data at key is marked dirty.
*/
void protect_data(u64 key, u32 start, size_t size) noexcept;
/**
* Remove all data containing addr from cache, unprotect them. Returns false if no data is modified.
*/
bool invalidate_address(u32 addr) noexcept;
std::pair<texture_entry, ComPtr<ID3D12Resource> > *find_data_if_available(u64 key) noexcept;
void unprotect_all() noexcept;
/**
* Remove data stored at key, and returns a ComPtr owning it.
* The caller is responsible for releasing the ComPtr.
*/
ComPtr<ID3D12Resource> remove_from_cache(u64 key) noexcept;
};
/**
* Stores data that are "ping ponged" between frame.
* For instance command allocator : maintains 2 command allocators and
* swap between them when frame is flipped.
*/
struct resource_storage
{
bool in_use; // False until command list has been populated at least once
ComPtr<ID3D12Fence> frame_finished_fence;
UINT64 fence_value;
HANDLE frame_finished_handle;
// Pointer to device, not owned by ResourceStorage
ID3D12Device *m_device;
ComPtr<ID3D12CommandAllocator> command_allocator;
ComPtr<ID3D12GraphicsCommandList> command_list;
// Descriptor heap
ComPtr<ID3D12DescriptorHeap> descriptors_heap;
size_t descriptors_heap_index;
// Sampler heap
ComPtr<ID3D12DescriptorHeap> sampler_descriptor_heap[2];
size_t sampler_descriptors_heap_index;
size_t current_sampler_index;
ComPtr<ID3D12Resource> ram_framebuffer;
/// Texture that were invalidated
std::list<ComPtr<ID3D12Resource> > dirty_textures;
/**
* Start position in heaps of resources used for this frame.
* This means newer resources shouldn't allocate memory crossing this position
* until the frame rendering is over.
*/
size_t constants_heap_get_pos;
size_t vertex_index_heap_get_pos;
size_t texture_upload_heap_get_pos;
size_t readback_heap_get_pos;
size_t uav_heap_get_pos;
void reset();
void init(ID3D12Device *device);
void set_new_command_list();
void wait_and_clean();
void release();
};

View File

@ -6,23 +6,59 @@
#include <d3d11on12.h>
#include <dxgi1_4.h>
namespace
{
// D2D
ComPtr<ID3D11Device> d3d11Device;
ComPtr<ID3D11DeviceContext> m_d3d11DeviceContext;
ComPtr<ID3D11On12Device> m_d3d11On12Device;
ComPtr<ID3D12Device> m_d3d12Device;
ComPtr<IDWriteFactory> m_dWriteFactory;
ComPtr<ID2D1Factory3> m_d2dFactory;
ComPtr<ID2D1Device2> m_d2dDevice;
ComPtr<ID2D1DeviceContext2> m_d2dDeviceContext;
ComPtr<ID3D11Resource> m_wrappedBackBuffers[2];
ComPtr<ID2D1Bitmap1> m_d2dRenderTargets[2];
ComPtr<IDWriteTextFormat> m_textFormat;
ComPtr<ID2D1SolidColorBrush> m_textBrush;
ComPtr<ID3D11Device> g_d3d11_device;
ComPtr<ID3D11DeviceContext> g_d3d11_device_context;
ComPtr<ID3D11On12Device> g_d3d11on12_device;
ComPtr<ID3D12Device> g_d3d12_device;
ComPtr<IDWriteFactory> g_dwrite_factory;
ComPtr<ID2D1Factory3> g_d2d_factory;
ComPtr<ID2D1Device2> g_d2d_device;
ComPtr<ID2D1DeviceContext2> g_d2d_device_context;
ComPtr<ID3D11Resource> g_wrapped_backbuffers[2];
ComPtr<ID2D1Bitmap1> g_d2d_render_targets[2];
ComPtr<IDWriteTextFormat> g_text_format;
ComPtr<ID2D1SolidColorBrush> g_text_brush;
void draw_strings(const D2D1_SIZE_F &rtSize, size_t backbuffer_id, const std::vector<std::wstring> &strings) noexcept
{
// Acquire our wrapped render target resource for the current back buffer.
g_d3d11on12_device->AcquireWrappedResources(g_wrapped_backbuffers[backbuffer_id ].GetAddressOf(), 1);
// Render text directly to the back buffer.
g_d2d_device_context->SetTarget(g_d2d_render_targets[backbuffer_id].Get());
g_d2d_device_context->BeginDraw();
g_d2d_device_context->SetTransform(D2D1::Matrix3x2F::Identity());
float xpos = 0.f;
for (const std::wstring &str : strings)
{
g_d2d_device_context->DrawTextW(
str.c_str(),
(UINT32)str.size(),
g_text_format.Get(),
&D2D1::RectF(0, xpos, rtSize.width, rtSize.height),
g_text_brush.Get()
);
xpos += 14.f;
}
g_d2d_device_context->EndDraw();
// Release our wrapped render target resource. Releasing
// transitions the back buffer resource to the state specified
// as the OutState when the wrapped resource was created.
g_d3d11on12_device->ReleaseWrappedResources(g_wrapped_backbuffers[backbuffer_id].GetAddressOf(), 1);
// Flush to submit the 11 command list to the shared command queue.
g_d3d11_device_context->Flush();
}
}
extern PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice;
void D3D12GSRender::InitD2DStructures()
void D3D12GSRender::init_d2d_structures()
{
wrapD3D11On12CreateDevice(
m_device.Get(),
@ -32,25 +68,25 @@ void D3D12GSRender::InitD2DStructures()
reinterpret_cast<IUnknown**>(m_commandQueueGraphic.GetAddressOf()),
1,
0,
&d3d11Device,
&m_d3d11DeviceContext,
&g_d3d11_device,
&g_d3d11_device_context,
nullptr
);
d3d11Device.As(&m_d3d11On12Device);
g_d3d11_device.As(&g_d3d11on12_device);
D2D1_DEVICE_CONTEXT_OPTIONS deviceOptions = D2D1_DEVICE_CONTEXT_OPTIONS_NONE;
D2D1_FACTORY_OPTIONS d2dFactoryOptions = {};
D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(ID2D1Factory3), &d2dFactoryOptions, &m_d2dFactory);
D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(ID2D1Factory3), &d2dFactoryOptions, &g_d2d_factory);
Microsoft::WRL::ComPtr<IDXGIDevice> dxgiDevice;
m_d3d11On12Device.As(&dxgiDevice);
m_d2dFactory->CreateDevice(dxgiDevice.Get(), &m_d2dDevice);
m_d2dDevice->CreateDeviceContext(deviceOptions, &m_d2dDeviceContext);
DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(IDWriteFactory), &m_dWriteFactory);
g_d3d11on12_device.As(&dxgiDevice);
g_d2d_factory->CreateDevice(dxgiDevice.Get(), &g_d2d_device);
g_d2d_device->CreateDeviceContext(deviceOptions, &g_d2d_device_context);
DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(IDWriteFactory), &g_dwrite_factory);
float dpiX;
float dpiY;
m_d2dFactory->GetDesktopDpi(&dpiX, &dpiY);
g_d2d_factory->GetDesktopDpi(&dpiX, &dpiY);
D2D1_BITMAP_PROPERTIES1 bitmapProperties = D2D1::BitmapProperties1(
D2D1_BITMAP_OPTIONS_TARGET | D2D1_BITMAP_OPTIONS_CANNOT_DRAW,
D2D1::PixelFormat(DXGI_FORMAT_UNKNOWN, D2D1_ALPHA_MODE_PREMULTIPLIED),
@ -61,26 +97,26 @@ void D3D12GSRender::InitD2DStructures()
for (unsigned i = 0; i < 2; i++)
{
D3D11_RESOURCE_FLAGS d3d11Flags = { D3D11_BIND_RENDER_TARGET };
m_d3d11On12Device->CreateWrappedResource(
g_d3d11on12_device->CreateWrappedResource(
m_backBuffer[i].Get(),
&d3d11Flags,
D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_PRESENT,
IID_PPV_ARGS(&m_wrappedBackBuffers[i])
IID_PPV_ARGS(&g_wrapped_backbuffers[i])
);
// Create a render target for D2D to draw directly to this back buffer.
Microsoft::WRL::ComPtr<IDXGISurface> surface;
m_wrappedBackBuffers[i].As(&surface);
m_d2dDeviceContext->CreateBitmapFromDxgiSurface(
g_wrapped_backbuffers[i].As(&surface);
g_d2d_device_context->CreateBitmapFromDxgiSurface(
surface.Get(),
&bitmapProperties,
&m_d2dRenderTargets[i]
&g_d2d_render_targets[i]
);
}
m_d2dDeviceContext->CreateSolidColorBrush(D2D1::ColorF(D2D1::ColorF::DarkGreen), &m_textBrush);
m_dWriteFactory->CreateTextFormat(
g_d2d_device_context->CreateSolidColorBrush(D2D1::ColorF(D2D1::ColorF::DarkGreen), &g_text_brush);
g_dwrite_factory->CreateTextFormat(
L"Verdana",
NULL,
DWRITE_FONT_WEIGHT_BOLD,
@ -88,33 +124,33 @@ void D3D12GSRender::InitD2DStructures()
DWRITE_FONT_STRETCH_NORMAL,
14,
L"en-us",
&m_textFormat
&g_text_format
);
m_textFormat->SetTextAlignment(DWRITE_TEXT_ALIGNMENT_LEADING);
m_textFormat->SetParagraphAlignment(DWRITE_PARAGRAPH_ALIGNMENT_NEAR);
g_text_format->SetTextAlignment(DWRITE_TEXT_ALIGNMENT_LEADING);
g_text_format->SetParagraphAlignment(DWRITE_PARAGRAPH_ALIGNMENT_NEAR);
}
void D3D12GSRender::ReleaseD2DStructures()
void D3D12GSRender::release_d2d_structures()
{
d3d11Device.Reset();
m_d3d11DeviceContext.Reset();
m_d3d11On12Device.Reset();
m_d3d12Device.Reset();
m_dWriteFactory.Reset();
m_d2dFactory.Reset();
m_d2dDevice.Reset();
m_d2dDeviceContext.Reset();
m_wrappedBackBuffers[0].Reset();
m_d2dRenderTargets[0].Reset();
m_wrappedBackBuffers[1].Reset();
m_d2dRenderTargets[1].Reset();
m_textFormat.Reset();
m_textBrush.Reset();
g_d3d11_device.Reset();
g_d3d11_device_context.Reset();
g_d3d11on12_device.Reset();
g_d3d12_device.Reset();
g_dwrite_factory.Reset();
g_d2d_factory.Reset();
g_d2d_device.Reset();
g_d2d_device_context.Reset();
g_wrapped_backbuffers[0].Reset();
g_d2d_render_targets[0].Reset();
g_wrapped_backbuffers[1].Reset();
g_d2d_render_targets[1].Reset();
g_text_format.Reset();
g_text_brush.Reset();
}
void D3D12GSRender::renderOverlay()
void D3D12GSRender::render_overlay()
{
D2D1_SIZE_F rtSize = m_d2dRenderTargets[m_swapChain->GetCurrentBackBufferIndex()]->GetSize();
D2D1_SIZE_F rtSize = g_d2d_render_targets[m_swapChain->GetCurrentBackBufferIndex()]->GetSize();
std::wstring duration = L"Draw duration : " + std::to_wstring(m_timers.m_drawCallDuration) + L" us";
float vtxIdxPercent = (float)m_timers.m_vertexIndexDuration / (float)m_timers.m_drawCallDuration;
std::wstring vertexIndexDuration = L"Vtx/Idx upload : " + std::to_wstring(m_timers.m_vertexIndexDuration) + L" us (" + std::to_wstring(100.f * vtxIdxPercent) + L" %)";
@ -130,85 +166,17 @@ void D3D12GSRender::renderOverlay()
std::wstring flipDuration = L"Flip : " + std::to_wstring(m_timers.m_flipDuration) + L" us";
std::wstring count = L"Draw count : " + std::to_wstring(m_timers.m_drawCallCount);
// Acquire our wrapped render target resource for the current back buffer.
m_d3d11On12Device->AcquireWrappedResources(m_wrappedBackBuffers[m_swapChain->GetCurrentBackBufferIndex()].GetAddressOf(), 1);
// Render text directly to the back buffer.
m_d2dDeviceContext->SetTarget(m_d2dRenderTargets[m_swapChain->GetCurrentBackBufferIndex()].Get());
m_d2dDeviceContext->BeginDraw();
m_d2dDeviceContext->SetTransform(D2D1::Matrix3x2F::Identity());
m_d2dDeviceContext->DrawTextW(
duration.c_str(),
(UINT32)duration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 0, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
count.c_str(),
(UINT32)count.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 14, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
rttDuration.c_str(),
(UINT32)rttDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 28, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
vertexIndexDuration.c_str(),
(UINT32)vertexIndexDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 42, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
size.c_str(),
(UINT32)size.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 56, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
programDuration.c_str(),
(UINT32)programDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 70, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
constantDuration.c_str(),
(UINT32)constantDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 86, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
texDuration.c_str(),
(UINT32)texDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 98, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->DrawTextW(
flipDuration.c_str(),
(UINT32)flipDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 112, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->EndDraw();
// Release our wrapped render target resource. Releasing
// transitions the back buffer resource to the state specified
// as the OutState when the wrapped resource was created.
m_d3d11On12Device->ReleaseWrappedResources(m_wrappedBackBuffers[m_swapChain->GetCurrentBackBufferIndex()].GetAddressOf(), 1);
// Flush to submit the 11 command list to the shared command queue.
m_d3d11DeviceContext->Flush();
draw_strings(rtSize, m_swapChain->GetCurrentBackBufferIndex(),
{
duration,
count,
rttDuration,
vertexIndexDuration,
size,
programDuration,
constantDuration,
texDuration,
flipDuration
});
}
#endif

View File

@ -4,6 +4,7 @@
#include "D3D12PipelineState.h"
#include "D3D12GSRender.h"
#include "Emu/state.h"
#include "D3D12Formats.h"
#pragma comment (lib, "d3dcompiler.lib")
@ -33,7 +34,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st)
}
}
bool D3D12GSRender::LoadProgram()
bool D3D12GSRender::load_program()
{
RSXVertexProgram vertex_program;
u32 transform_program_start = rsx::method_registers[NV4097_SET_TRANSFORM_PROGRAM_START];
@ -57,29 +58,7 @@ bool D3D12GSRender::LoadProgram()
fragment_program.ctrl = rsx::method_registers[NV4097_SET_SHADER_CONTROL];
D3D12PipelineProperties prop = {};
switch (draw_mode)
{
case CELL_GCM_PRIMITIVE_POINTS:
prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
break;
case CELL_GCM_PRIMITIVE_LINES:
case CELL_GCM_PRIMITIVE_LINE_LOOP:
case CELL_GCM_PRIMITIVE_LINE_STRIP:
prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
break;
case CELL_GCM_PRIMITIVE_TRIANGLES:
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
break;
case CELL_GCM_PRIMITIVE_QUADS:
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON:
default:
// LOG_ERROR(RSX, "Unsupported primitive type");
prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
break;
}
prop.Topology = get_primitive_topology_type(draw_mode);
static D3D12_BLEND_DESC CD3D12_BLEND_DESC =
{
@ -106,61 +85,61 @@ bool D3D12GSRender::LoadProgram()
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x8)
prop.Blend.RenderTarget[3].BlendEnable = true;
prop.Blend.RenderTarget[0].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[0].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
prop.Blend.RenderTarget[0].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[0].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x2)
{
prop.Blend.RenderTarget[1].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[1].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
prop.Blend.RenderTarget[1].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[1].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
}
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x4)
{
prop.Blend.RenderTarget[2].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[2].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
prop.Blend.RenderTarget[2].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[2].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
}
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x8)
{
prop.Blend.RenderTarget[3].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[3].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
prop.Blend.RenderTarget[3].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF);
prop.Blend.RenderTarget[3].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16);
}
prop.Blend.RenderTarget[0].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
prop.Blend.RenderTarget[0].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[0].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[0].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[0].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x2)
{
prop.Blend.RenderTarget[1].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[1].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[1].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[1].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
prop.Blend.RenderTarget[1].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[1].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[1].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[1].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
}
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x4)
{
prop.Blend.RenderTarget[2].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[2].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[2].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[2].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
prop.Blend.RenderTarget[2].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[2].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[2].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[2].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
}
if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x8)
{
prop.Blend.RenderTarget[3].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[3].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[3].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[3].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
prop.Blend.RenderTarget[3].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[3].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF);
prop.Blend.RenderTarget[3].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16);
prop.Blend.RenderTarget[3].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16);
}
}
if (rsx::method_registers[NV4097_SET_LOGIC_OP_ENABLE])
{
prop.Blend.RenderTarget[0].LogicOpEnable = true;
prop.Blend.RenderTarget[0].LogicOp = getLogicOp(rsx::method_registers[NV4097_SET_LOGIC_OP]);
prop.Blend.RenderTarget[0].LogicOp = get_logic_op(rsx::method_registers[NV4097_SET_LOGIC_OP]);
}
// if (m_set_blend_color)
@ -168,31 +147,8 @@ bool D3D12GSRender::LoadProgram()
// glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a);
// checkForGlError("glBlendColor");
}
switch (m_surface.depth_format)
{
case 0:
break;
case CELL_GCM_SURFACE_Z16:
prop.DepthStencilFormat = DXGI_FORMAT_D16_UNORM;
break;
case CELL_GCM_SURFACE_Z24S8:
prop.DepthStencilFormat = DXGI_FORMAT_D24_UNORM_S8_UINT;
break;
default:
LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format);
assert(0);
}
switch (m_surface.color_format)
{
case CELL_GCM_SURFACE_A8R8G8B8:
prop.RenderTargetsFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case CELL_GCM_SURFACE_F_W16Z16Y16X16:
prop.RenderTargetsFormat = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
}
prop.DepthStencilFormat = get_depth_stencil_surface_format(m_surface.depth_format);
prop.RenderTargetsFormat = get_color_surface_format(m_surface.color_format);
switch (u32 color_target = rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])
{
@ -210,33 +166,33 @@ bool D3D12GSRender::LoadProgram()
prop.numMRT = 4;
break;
default:
LOG_ERROR(RSX, "Bad surface color target: %d", color_target);
break;
}
prop.DepthStencil.DepthEnable = !!(rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE]);
prop.DepthStencil.DepthWriteMask = !!(rsx::method_registers[NV4097_SET_DEPTH_MASK]) ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
prop.DepthStencil.DepthFunc = getCompareFunc(rsx::method_registers[NV4097_SET_DEPTH_FUNC]);
prop.DepthStencil.DepthFunc = get_compare_func(rsx::method_registers[NV4097_SET_DEPTH_FUNC]);
prop.DepthStencil.StencilEnable = !!(rsx::method_registers[NV4097_SET_STENCIL_TEST_ENABLE]);
prop.DepthStencil.StencilReadMask = rsx::method_registers[NV4097_SET_STENCIL_FUNC_MASK];
prop.DepthStencil.StencilWriteMask = rsx::method_registers[NV4097_SET_STENCIL_MASK];
prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]);
prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]);
prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]);
prop.DepthStencil.FrontFace.StencilFunc = getCompareFunc(rsx::method_registers[NV4097_SET_STENCIL_FUNC]);
prop.DepthStencil.FrontFace.StencilPassOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]);
prop.DepthStencil.FrontFace.StencilDepthFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]);
prop.DepthStencil.FrontFace.StencilFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]);
prop.DepthStencil.FrontFace.StencilFunc = get_compare_func(rsx::method_registers[NV4097_SET_STENCIL_FUNC]);
if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE])
{
prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_FAIL]);
prop.DepthStencil.BackFace.StencilFunc = getCompareFunc(rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC]);
prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZPASS]);
prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZFAIL]);
prop.DepthStencil.BackFace.StencilFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_FAIL]);
prop.DepthStencil.BackFace.StencilFunc = get_compare_func(rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC]);
prop.DepthStencil.BackFace.StencilPassOp = get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZPASS]);
prop.DepthStencil.BackFace.StencilDepthFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZFAIL]);
}
else
{
prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]);
prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]);
prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]);
prop.DepthStencil.BackFace.StencilFunc = getCompareFunc(rsx::method_registers[NV4097_SET_STENCIL_FUNC]);
prop.DepthStencil.BackFace.StencilPassOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]);
prop.DepthStencil.BackFace.StencilDepthFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]);
prop.DepthStencil.BackFace.StencilFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]);
prop.DepthStencil.BackFace.StencilFunc = get_compare_func(rsx::method_registers[NV4097_SET_STENCIL_FUNC]);
}
// Sensible default value
@ -273,15 +229,7 @@ bool D3D12GSRender::LoadProgram()
else
prop.Rasterization.CullMode = D3D12_CULL_MODE_NONE;
switch (rsx::method_registers[NV4097_SET_FRONT_FACE])
{
case CELL_GCM_CW:
prop.Rasterization.FrontCounterClockwise = FALSE;
break;
case CELL_GCM_CCW:
prop.Rasterization.FrontCounterClockwise = TRUE;
break;
}
prop.Rasterization.FrontCounterClockwise = get_front_face_ccw(rsx::method_registers[NV4097_SET_FRONT_FACE]);
UINT8 mask = 0;
mask |= (rsx::method_registers[NV4097_SET_COLOR_MASK] >> 16) & 0xFF ? D3D12_COLOR_WRITE_ENABLE_RED : 0;

View File

@ -1,6 +1,6 @@
#pragma once
#include "D3D12.h"
#include "D3D12Utils.h"
#include "../Common/ProgramStateCache.h"
#include "D3D12VertexProgramDecompiler.h"
#include "D3D12FragmentProgramDecompiler.h"

View File

@ -11,8 +11,9 @@
#include "D3D12.h"
#include "D3D12GSRender.h"
#include "D3D12Formats.h"
void D3D12GSRender::PrepareRenderTargets(ID3D12GraphicsCommandList *copycmdlist)
void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlist)
{
u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
@ -78,16 +79,7 @@ void D3D12GSRender::PrepareRenderTargets(ID3D12GraphicsCommandList *copycmdlist)
D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart();
size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
DXGI_FORMAT dxgiFormat;
switch (m_surface.color_format)
{
case CELL_GCM_SURFACE_A8R8G8B8:
dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case CELL_GCM_SURFACE_F_W16Z16Y16X16:
dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
}
DXGI_FORMAT dxgiFormat = get_color_surface_format(m_surface.color_format);
D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {};
rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rttViewDesc.Format = dxgiFormat;
@ -168,23 +160,13 @@ void D3D12GSRender::PrepareRenderTargets(ID3D12GraphicsCommandList *copycmdlist)
}
}
ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device.Get(), copycmdlist, address_z, clip_width, clip_height, m_surface.depth_format, 1., 0);
ComPtr<ID3D12Resource> oldDS;
ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device.Get(), copycmdlist, address_z, clip_width, clip_height, m_surface.depth_format, 1., 0, oldDS);
if (oldDS)
getCurrentResourceStorage().dirty_textures.push_back(oldDS);
D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {};
switch (m_surface.depth_format)
{
case 0:
break;
case CELL_GCM_SURFACE_Z16:
depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM;
break;
case CELL_GCM_SURFACE_Z24S8:
depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
break;
default:
LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format);
assert(0);
}
depthStencilViewDesc.Format = get_depth_stencil_surface_format(m_surface.depth_format);
depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
m_device->CreateDepthStencilView(ds, &depthStencilViewDesc, m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart());
}
@ -203,16 +185,7 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device,
else
{
LOG_WARNING(RSX, "Creating RTT");
DXGI_FORMAT dxgiFormat;
switch (surfaceColorFormat)
{
case CELL_GCM_SURFACE_A8R8G8B8:
dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case CELL_GCM_SURFACE_F_W16Z16Y16X16:
dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
}
DXGI_FORMAT dxgiFormat = get_color_surface_format(surfaceColorFormat);
D3D12_CLEAR_VALUE clearColorValue = {};
clearColorValue.Format = dxgiFormat;
clearColorValue.Color[0] = clearColor[0];
@ -235,56 +208,51 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device,
return rtt;
}
ID3D12Resource * RenderTargets::bindAddressAsDepthStencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear)
ID3D12Resource * RenderTargets::bindAddressAsDepthStencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr<ID3D12Resource> &dirtyDS)
{
ID3D12Resource* ds;
auto It = m_depthStencil.find(address);
// TODO: Check if sizes and surface depth format match
m_currentlyBoundDepthStencilAddress = address;
// TODO: Check if surface depth format match
if (It != m_depthStencil.end())
{
ds = It->second;
cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE));
}
else
ComPtr<ID3D12Resource> ds = It->second;
if (ds->GetDesc().Width == width && ds->GetDesc().Height == height)
{
// set the resource as depth write
cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE));
m_currentlyBoundDepthStencil = ds.Get();
return ds.Get();
}
// If size doesn't match, remove ds from cache
m_depthStencil.erase(address);
dirtyDS = ds;
}
D3D12_CLEAR_VALUE clearDepthValue = {};
clearDepthValue.DepthStencil.Depth = depthClear;
D3D12_HEAP_PROPERTIES heapProp = {};
heapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
DXGI_FORMAT dxgiFormat;
switch (surfaceDepthFormat)
{
case 0:
break;
case CELL_GCM_SURFACE_Z16:
dxgiFormat = DXGI_FORMAT_R16_TYPELESS;
clearDepthValue.Format = DXGI_FORMAT_D16_UNORM;
break;
case CELL_GCM_SURFACE_Z24S8:
dxgiFormat = DXGI_FORMAT_R24G8_TYPELESS;
clearDepthValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
break;
default:
LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat);
assert(0);
}
DXGI_FORMAT dxgiFormat = get_depth_typeless_surface_format(surfaceDepthFormat);
clearDepthValue.Format = get_depth_stencil_surface_clear_format(surfaceDepthFormat);
ComPtr<ID3D12Resource> newds;
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL),
D3D12_RESOURCE_STATE_DEPTH_WRITE,
&clearDepthValue,
IID_PPV_ARGS(&ds)
IID_PPV_ARGS(newds.GetAddressOf())
);
m_depthStencil[address] = ds;
}
m_currentlyBoundDepthStencil = ds;
m_currentlyBoundDepthStencilAddress = address;
return ds;
m_depthStencil[address] = newds;
m_currentlyBoundDepthStencil = newds.Get();
return newds.Get();
}
void RenderTargets::Init(ID3D12Device *device)//, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth)

View File

@ -7,7 +7,7 @@ struct RenderTargets
std::unordered_map<u32, ID3D12Resource* > m_renderTargets;
ID3D12Resource *m_currentlyBoundRenderTargets[4];
u32 m_currentlyBoundRenderTargetsAddress[4];
std::unordered_map<u32, ID3D12Resource *> m_depthStencil;
std::unordered_map<u32, ComPtr<ID3D12Resource> > m_depthStencil;
ID3D12Resource *m_currentlyBoundDepthStencil;
u32 m_currentlyBoundDepthStencilAddress;
ID3D12DescriptorHeap *m_renderTargetsDescriptorsHeap;
@ -22,7 +22,7 @@ struct RenderTargets
size_t width, size_t height, u8 surfaceColorFormat, const std::array<float, 4> &clearColor);
ID3D12Resource *bindAddressAsDepthStencil(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address,
size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear);
size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr<ID3D12Resource> &dirtyDS);
void Init(ID3D12Device *device);
void Release();

View File

@ -4,9 +4,11 @@
#include "d3dx12.h"
#include "../Common/TextureUtils.h"
// For clarity this code deals with texture but belongs to D3D12GSRender class
#include "D3D12Formats.h"
static
D3D12_COMPARISON_FUNC getSamplerCompFunc[] =
namespace
{
D3D12_COMPARISON_FUNC get_sampler_compare_func[] =
{
D3D12_COMPARISON_FUNC_NEVER,
D3D12_COMPARISON_FUNC_LESS,
@ -18,103 +20,20 @@ D3D12_COMPARISON_FUNC getSamplerCompFunc[] =
D3D12_COMPARISON_FUNC_ALWAYS
};
static
size_t getSamplerMaxAniso(size_t aniso)
{
switch (aniso)
{
case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1;
case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2;
case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4;
case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6;
case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8;
case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10;
case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12;
case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16;
}
return 1;
}
static
D3D12_TEXTURE_ADDRESS_MODE getSamplerWrap(size_t wrap)
{
switch (wrap)
{
case CELL_GCM_TEXTURE_WRAP: return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
case CELL_GCM_TEXTURE_MIRROR: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
case CELL_GCM_TEXTURE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
case CELL_GCM_TEXTURE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
}
return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
}
static
D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter)
{
D3D12_FILTER_TYPE min, mag, mip;
switch (minFilter)
{
case CELL_GCM_TEXTURE_NEAREST:
min = D3D12_FILTER_TYPE_POINT;
mip = D3D12_FILTER_TYPE_POINT;
break;
case CELL_GCM_TEXTURE_LINEAR:
min = D3D12_FILTER_TYPE_LINEAR;
mip = D3D12_FILTER_TYPE_POINT;
break;
case CELL_GCM_TEXTURE_NEAREST_NEAREST:
min = D3D12_FILTER_TYPE_POINT;
mip = D3D12_FILTER_TYPE_POINT;
break;
case CELL_GCM_TEXTURE_LINEAR_NEAREST:
min = D3D12_FILTER_TYPE_LINEAR;
mip = D3D12_FILTER_TYPE_POINT;
break;
case CELL_GCM_TEXTURE_NEAREST_LINEAR:
min = D3D12_FILTER_TYPE_POINT;
mip = D3D12_FILTER_TYPE_LINEAR;
break;
case CELL_GCM_TEXTURE_LINEAR_LINEAR:
min = D3D12_FILTER_TYPE_LINEAR;
mip = D3D12_FILTER_TYPE_LINEAR;
break;
case CELL_GCM_TEXTURE_CONVOLUTION_MIN:
default:
LOG_ERROR(RSX, "Unknow min filter %x", minFilter);
}
switch (magFilter)
{
case CELL_GCM_TEXTURE_NEAREST:
mag = D3D12_FILTER_TYPE_POINT;
break;
case CELL_GCM_TEXTURE_LINEAR:
mag = D3D12_FILTER_TYPE_LINEAR;
break;
default:
LOG_ERROR(RSX, "Unknow mag filter %x", magFilter);
}
return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD);
}
static
D3D12_SAMPLER_DESC getSamplerDesc(const rsx::texture &texture)
D3D12_SAMPLER_DESC get_sampler_desc(const rsx::texture &texture) noexcept
{
D3D12_SAMPLER_DESC samplerDesc = {};
samplerDesc.Filter = getSamplerFilter(texture.min_filter(), texture.mag_filter());
samplerDesc.AddressU = getSamplerWrap(texture.wrap_s());
samplerDesc.AddressV = getSamplerWrap(texture.wrap_t());
samplerDesc.AddressW = getSamplerWrap(texture.wrap_r());
samplerDesc.ComparisonFunc = getSamplerCompFunc[texture.zfunc()];
samplerDesc.MaxAnisotropy = (UINT)getSamplerMaxAniso(texture.max_aniso());
samplerDesc.Filter = get_texture_filter(texture.min_filter(), texture.mag_filter());
samplerDesc.AddressU = get_texture_wrap_mode(texture.wrap_s());
samplerDesc.AddressV = get_texture_wrap_mode(texture.wrap_t());
samplerDesc.AddressW = get_texture_wrap_mode(texture.wrap_r());
samplerDesc.ComparisonFunc = get_sampler_compare_func[texture.zfunc()];
samplerDesc.MaxAnisotropy = get_texture_max_aniso(texture.max_aniso());
samplerDesc.MipLODBias = texture.bias();
samplerDesc.BorderColor[4] = (FLOAT)texture.border_color();
samplerDesc.BorderColor[0] = (FLOAT)texture.border_color();
samplerDesc.BorderColor[1] = (FLOAT)texture.border_color();
samplerDesc.BorderColor[2] = (FLOAT)texture.border_color();
samplerDesc.BorderColor[3] = (FLOAT)texture.border_color();
samplerDesc.MinLOD = (FLOAT)(texture.min_lod() >> 8);
samplerDesc.MaxLOD = (FLOAT)(texture.max_lod() >> 8);
return samplerDesc;
@ -125,168 +44,89 @@ D3D12_SAMPLER_DESC getSamplerDesc(const rsx::texture &texture)
* Create a texture residing in default heap and generate uploads commands in commandList,
* using a temporary texture buffer.
*/
static
ComPtr<ID3D12Resource> uploadSingleTexture(
ComPtr<ID3D12Resource> upload_single_texture(
const rsx::texture &texture,
ID3D12Device *device,
ID3D12GraphicsCommandList *commandList,
DataHeap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &textureBuffersHeap)
ID3D12GraphicsCommandList *command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &texture_buffer_heap)
{
ComPtr<ID3D12Resource> vramTexture;
size_t w = texture.width(), h = texture.height();
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format);
DXGI_FORMAT dxgi_format = get_texture_format(format);
size_t textureSize = getPlacedTextureStorageSpace(texture, 256);
assert(textureBuffersHeap.canAlloc(textureSize));
size_t heapOffset = textureBuffersHeap.alloc(textureSize);
size_t buffer_size = get_placed_texture_storage_size(texture, 256);
assert(texture_buffer_heap.can_alloc(buffer_size));
size_t heap_offset = texture_buffer_heap.alloc(buffer_size);
void *buffer;
ThrowIfFailed(textureBuffersHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer));
void *textureData = (char*)buffer + heapOffset;
std::vector<MipmapLevelInfo> mipInfos = uploadPlacedTexture(texture, 256, textureData);
textureBuffersHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize));
D3D12_RESOURCE_DESC texturedesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)w, (UINT)h, 1, texture.mipmap());
textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes;
ThrowIfFailed(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer);
texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
ComPtr<ID3D12Resource> result;
ThrowIfFailed(device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&texturedesc,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)w, (UINT)h, 1, texture.mipmap()),
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(vramTexture.GetAddressOf())
IID_PPV_ARGS(result.GetAddressOf())
));
size_t miplevel = 0;
size_t mip_level = 0;
for (const MipmapLevelInfo mli : mipInfos)
{
commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(vramTexture.Get(), (UINT)miplevel), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(textureBuffersHeap.m_heap, { heapOffset + mli.offset, { dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
miplevel++;
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(result.Get(), (UINT)mip_level), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset, { dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
mip_level++;
}
commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(vramTexture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
return vramTexture;
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(result.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
return result;
}
/**
*
*/
static
void updateExistingTexture(
void update_existing_texture(
const rsx::texture &texture,
ID3D12GraphicsCommandList *commandList,
DataHeap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &textureBuffersHeap,
ID3D12Resource *existingTexture)
ID3D12GraphicsCommandList *command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &texture_buffer_heap,
ID3D12Resource *existing_texture)
{
size_t w = texture.width(), h = texture.height();
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format);
DXGI_FORMAT dxgi_format = get_texture_format(format);
size_t textureSize = getPlacedTextureStorageSpace(texture, 256);
assert(textureBuffersHeap.canAlloc(textureSize));
size_t heapOffset = textureBuffersHeap.alloc(textureSize);
size_t buffer_size = get_placed_texture_storage_size(texture, 256);
assert(texture_buffer_heap.can_alloc(buffer_size));
size_t heap_offset = texture_buffer_heap.alloc(buffer_size);
void *buffer;
ThrowIfFailed(textureBuffersHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer));
void *textureData = (char*)buffer + heapOffset;
std::vector<MipmapLevelInfo> mipInfos = uploadPlacedTexture(texture, 256, textureData);
textureBuffersHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize));
ThrowIfFailed(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer);
texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existingTexture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST));
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST));
size_t miplevel = 0;
for (const MipmapLevelInfo mli : mipInfos)
{
commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existingTexture, (UINT)miplevel), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(textureBuffersHeap.m_heap, { heapOffset + mli.offset,{ dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)miplevel), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset,{ dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
miplevel++;
}
commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existingTexture, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
}
/**
* Get number of bytes occupied by texture in RSX mem
*/
static
size_t getTextureSize(const rsx::texture &texture)
{
size_t w = texture.width(), h = texture.height();
int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
// TODO: Take mipmaps into account
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
return 0;
case CELL_GCM_TEXTURE_B8:
return w * h;
case CELL_GCM_TEXTURE_A1R5G5B5:
return w * h * 2;
case CELL_GCM_TEXTURE_A4R4G4B4:
return w * h * 2;
case CELL_GCM_TEXTURE_R5G6B5:
return w * h * 2;
case CELL_GCM_TEXTURE_A8R8G8B8:
return w * h * 4;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
return w * h / 6;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
return w * h / 4;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return w * h / 4;
case CELL_GCM_TEXTURE_G8B8:
return w * h * 2;
case CELL_GCM_TEXTURE_R6G5B5:
return w * h * 2;
case CELL_GCM_TEXTURE_DEPTH24_D8:
return w * h * 4;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return w * h * 4;
case CELL_GCM_TEXTURE_DEPTH16:
return w * h * 2;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return w * h * 2;
case CELL_GCM_TEXTURE_X16:
return w * h * 2;
case CELL_GCM_TEXTURE_Y16_X16:
return w * h * 4;
case CELL_GCM_TEXTURE_R5G5B5A1:
return w * h * 2;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return w * h * 8;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return w * h * 16;
case CELL_GCM_TEXTURE_X32_FLOAT:
return w * h * 4;
case CELL_GCM_TEXTURE_D1R5G5B5:
return w * h * 2;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
return w * h * 4;
case CELL_GCM_TEXTURE_D8R8G8B8:
return w * h * 4;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
return w * h * 4;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return w * h * 4;
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
}
}
size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex)
void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t descriptor_index, size_t texture_count)
{
size_t usedTexture = 0;
size_t used_texture = 0;
for (u32 i = 0; i < rsx::limits::textures_count; ++i)
{
@ -297,40 +137,39 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
const u32 texaddr = rsx::get_address(textures[i].offset(), textures[i].location());
int format = textures[i].format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format);
bool is_swizzled = !(textures[i].format() & CELL_GCM_TEXTURE_LN);
ID3D12Resource *vramTexture;
ID3D12Resource *vram_texture;
std::unordered_map<u32, ID3D12Resource* >::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr);
std::pair<TextureEntry, ComPtr<ID3D12Resource> > *cachedTex = m_textureCache.findDataIfAvailable(texaddr);
std::pair<texture_entry, ComPtr<ID3D12Resource> > *cached_texture = m_textureCache.find_data_if_available(texaddr);
bool isRenderTarget = false;
if (ItRTT != m_rtts.m_renderTargets.end())
{
vramTexture = ItRTT->second;
vram_texture = ItRTT->second;
isRenderTarget = true;
}
else if (cachedTex != nullptr && (cachedTex->first == TextureEntry(format, w, h, textures[i].mipmap())))
else if (cached_texture != nullptr && (cached_texture->first == texture_entry(format, w, h, textures[i].mipmap())))
{
if (cachedTex->first.m_isDirty)
if (cached_texture->first.m_is_dirty)
{
updateExistingTexture(textures[i], cmdlist, m_textureUploadData, cachedTex->second.Get());
m_textureCache.protectData(texaddr, texaddr, getTextureSize(textures[i]));
update_existing_texture(textures[i], command_list, m_textureUploadData, cached_texture->second.Get());
m_textureCache.protect_data(texaddr, texaddr, get_texture_size(textures[i]));
}
vramTexture = cachedTex->second.Get();
vram_texture = cached_texture->second.Get();
}
else
{
if (cachedTex != nullptr)
getCurrentResourceStorage().m_dirtyTextures.push_back(m_textureCache.removeFromCache(texaddr));
ComPtr<ID3D12Resource> tex = uploadSingleTexture(textures[i], m_device.Get(), cmdlist, m_textureUploadData);
vramTexture = tex.Get();
m_textureCache.storeAndProtectData(texaddr, texaddr, getTextureSize(textures[i]), format, w, h, textures[i].mipmap(), tex);
if (cached_texture != nullptr)
getCurrentResourceStorage().dirty_textures.push_back(m_textureCache.remove_from_cache(texaddr));
ComPtr<ID3D12Resource> tex = upload_single_texture(textures[i], m_device.Get(), command_list, m_textureUploadData);
vram_texture = tex.Get();
m_textureCache.store_and_protect_data(texaddr, texaddr, get_texture_size(textures[i]), format, w, h, textures[i].mipmap(), tex);
}
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Format = dxgiFormat;
srvDesc.Texture2D.MipLevels = textures[i].mipmap();
D3D12_SHADER_RESOURCE_VIEW_DESC shared_resource_view_desc = {};
shared_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
shared_resource_view_desc.Format = get_texture_format(format);
shared_resource_view_desc.Texture2D.MipLevels = textures[i].mipmap();
switch (format)
{
@ -342,7 +181,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
break;
case CELL_GCM_TEXTURE_B8:
srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
@ -351,7 +190,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
break;
case CELL_GCM_TEXTURE_A8R8G8B8:
{
@ -373,7 +212,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2
};
srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
RemapValue[remap_r],
RemapValue[remap_g],
RemapValue[remap_b],
@ -391,7 +230,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3
};
srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
RemapValue[remap_r],
RemapValue[remap_g],
RemapValue[remap_b],
@ -416,7 +255,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
case CELL_GCM_TEXTURE_X32_FLOAT:
case CELL_GCM_TEXTURE_D1R5G5B5:
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
break;
case CELL_GCM_TEXTURE_D8R8G8B8:
{
@ -433,7 +272,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
u8 remap_g = (textures[i].remap() >> 4) & 0x3;
u8 remap_b = (textures[i].remap() >> 6) & 0x3;
srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
RemapValue[remap_a],
RemapValue[remap_r],
RemapValue[remap_g],
@ -441,32 +280,58 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t
break;
}
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
break;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
break;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
break;
}
m_device->CreateShaderResourceView(vramTexture, &srvDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)descriptorIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV));
m_device->CreateShaderResourceView(vram_texture, &shared_resource_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)descriptor_index + (UINT)used_texture, g_descriptorStrideSRVCBVUAV));
if (getCurrentResourceStorage().m_currentSamplerIndex + 16 > 2048)
if (getCurrentResourceStorage().current_sampler_index + 16 > 2048)
{
getCurrentResourceStorage().m_samplerDescriptorHeapIndex = 1;
getCurrentResourceStorage().m_currentSamplerIndex = 0;
getCurrentResourceStorage().sampler_descriptors_heap_index = 1;
getCurrentResourceStorage().current_sampler_index = 0;
}
m_device->CreateSampler(&getSamplerDesc(textures[i]),
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)getCurrentResourceStorage().m_currentSamplerIndex + (UINT)usedTexture, g_descriptorStrideSamplers));
m_device->CreateSampler(&get_sampler_desc(textures[i]),
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().sampler_descriptor_heap[getCurrentResourceStorage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)getCurrentResourceStorage().current_sampler_index + (UINT)used_texture, g_descriptorStrideSamplers));
usedTexture++;
used_texture++;
}
return usedTexture;
// Now fill remaining texture slots with dummy texture/sampler
for (; used_texture < texture_count; used_texture++)
{
D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {};
shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
shader_resource_view_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
shader_resource_view_desc.Texture2D.MipLevels = 1;
shader_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
m_device->CreateShaderResourceView(m_dummyTexture, &shader_resource_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptor_index + (INT)used_texture, g_descriptorStrideSRVCBVUAV)
);
D3D12_SAMPLER_DESC sampler_desc = {};
sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
m_device->CreateSampler(&sampler_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().sampler_descriptor_heap[getCurrentResourceStorage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().current_sampler_index + (INT)used_texture, g_descriptorStrideSamplers)
);
}
}
#endif

View File

@ -1 +0,0 @@
#pragma once

View File

@ -262,4 +262,13 @@ void D3D12GSRender::initConvertShader()
p.first->Release();
p.second->Release();
}
void unreachable_internal(const char *msg, const char *file, unsigned line)
{
abort();
#ifdef LLVM_BUILTIN_UNREACHABLE
LLVM_BUILTIN_UNREACHABLE;
#endif
}
#endif

View File

@ -0,0 +1,118 @@
#pragma once
#include <d3d12.h>
#include <cassert>
#include <wrl/client.h>
#include "Utilities/Log.h"
#include "Emu/Memory/vm.h"
#include "Emu/RSX/GCM.h"
// From llvm Compiler.h
// Need to be set by define
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
/// \macro LLVM_GNUC_PREREQ
/// \brief Extend the default __GNUC_PREREQ even if glibc's features.h isn't
/// available.
#ifndef LLVM_GNUC_PREREQ
# if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
#define LLVM_GNUC_PREREQ(maj, min, patch) \
((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \
((maj) << 20) + ((min) << 10) + (patch))
# elif defined(__GNUC__) && defined(__GNUC_MINOR__)
#define LLVM_GNUC_PREREQ(maj, min, patch) \
((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
#else
#define LLVM_GNUC_PREREQ(maj, min, patch) 0
#endif
#endif
#ifdef __GNUC__
#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn)
#else
#define LLVM_ATTRIBUTE_NORETURN
#endif
#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0)
# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
#elif defined(_MSC_VER)
# define LLVM_BUILTIN_UNREACHABLE __assume(false)
#endif
LLVM_ATTRIBUTE_NORETURN void unreachable_internal(const char *msg = nullptr, const char *file = nullptr, unsigned line = 0);
/// Marks that the current location is not supposed to be reachable.
/// In !NDEBUG builds, prints the message and location info to stderr.
/// In NDEBUG builds, becomes an optimizer hint that the current location
/// is not supposed to be reachable. On compilers that don't support
/// such hints, prints a reduced message instead.
///
/// Use this instead of assert(0). It conveys intent more clearly and
/// allows compilers to omit some unnecessary code.
#ifndef NDEBUG
#define unreachable(msg) \
unreachable_internal(msg, __FILE__, __LINE__)
#elif defined(LLVM_BUILTIN_UNREACHABLE)
#define unreachable(msg) LLVM_BUILTIN_UNREACHABLE
#else
#define unreachable(msg) unreachable_internal()
#endif
using namespace Microsoft::WRL;
// From DX12 D3D11On12 Sample (MIT Licensed)
inline void ThrowIfFailed(HRESULT hr)
{
if (FAILED(hr))
{
throw;
}
}
/**
* Send data to dst pointer without polluting cache.
* Usefull to write to mapped memory from upload heap.
*/
inline
void streamToBuffer(void* dst, void* src, size_t sizeInBytes)
{
for (int i = 0; i < sizeInBytes / 16; i++)
{
const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16));
_mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr);
}
}
/**
* copy src to dst pointer without polluting cache.
* Usefull to write to mapped memory from upload heap.
*/
inline
void streamBuffer(void* dst, void* src, size_t sizeInBytes)
{
// Assume 64 bytes cache line
int offset = 0;
bool isAligned = !((size_t)src & 15);
for (offset = 0; offset < sizeInBytes - 64; offset += 64)
{
char *line = (char*)src + offset;
char *dstline = (char*)dst + offset;
// prefetch next line
_mm_prefetch(line + 16, _MM_HINT_NTA);
__m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line);
_mm_stream_si128((__m128i*)dstline, srcPtr);
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16));
_mm_stream_si128((__m128i*)(dstline + 16), srcPtr);
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32));
_mm_stream_si128((__m128i*)(dstline + 32), srcPtr);
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48));
_mm_stream_si128((__m128i*)(dstline + 48), srcPtr);
}
memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset);
}

View File

@ -84,6 +84,7 @@ void D3D12VertexProgramDecompiler::insertOutputs(std::stringstream & OS, const s
OS << " float4 dst_reg13 : TEXCOORD6;" << std::endl;
OS << " float4 dst_reg14 : TEXCOORD7;" << std::endl;
OS << " float4 dst_reg15 : TEXCOORD8;" << std::endl;
OS << " float4 dst_reg16 : TEXCOORD9;" << std::endl;
OS << "};" << std::endl;
}

View File

@ -1,3 +1,9 @@
#pragma once
#include "stdafx.h"
#ifdef _WIN32
#include <d3d12.h>
#include "Emu\RSX\D3D12\D3D12Utils.h"
#include "Emu\RSX\D3D12\D3D12Formats.h"
#include "Emu\RSX\D3D12\D3D12GSRender.h"
#endif