Texture uploads/basic formats

Fixed swizzle one/zero mismatch
Sampler setup
Remove samplers from the descriptor set layout
This commit is contained in:
Dr. Chat 2016-04-01 21:51:17 -05:00
parent 1ea72c5e06
commit 2eca3ce9e6
4 changed files with 437 additions and 130 deletions

View File

@ -55,11 +55,11 @@ void SpirvShaderTranslator::StartTranslation() {
bool_type_ = b.makeBoolType();
float_type_ = b.makeFloatType(32);
int_type_ = b.makeIntType(32);
Id uint_type = b.makeUintType(32);
uint_type_ = b.makeUintType(32);
vec2_float_type_ = b.makeVectorType(float_type_, 2);
vec3_float_type_ = b.makeVectorType(float_type_, 3);
vec4_float_type_ = b.makeVectorType(float_type_, 4);
vec4_uint_type_ = b.makeVectorType(uint_type, 4);
vec4_uint_type_ = b.makeVectorType(uint_type_, 4);
vec4_bool_type_ = b.makeVectorType(bool_type_, 4);
vec4_float_one_ = b.makeCompositeConstant(
@ -136,7 +136,7 @@ void SpirvShaderTranslator::StartTranslation() {
// Push constants, represented by SpirvPushConstants.
Id push_constants_type = b.makeStructType(
{vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type},
{vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type_},
"push_consts_type");
b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock);
@ -164,7 +164,6 @@ void SpirvShaderTranslator::StartTranslation() {
push_constants_type, "push_consts");
// Texture bindings
Id sampler_t = b.makeSamplerType();
Id tex_t[] = {b.makeSampledImageType(b.makeImageType(
float_type_, spv::Dim::Dim1D, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown)),
@ -178,23 +177,17 @@ void SpirvShaderTranslator::StartTranslation() {
float_type_, spv::Dim::DimCube, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown))};
Id samplers_a = b.makeArrayType(sampler_t, b.makeUintConstant(32), 0);
Id tex_a_t[] = {b.makeArrayType(tex_t[0], b.makeUintConstant(32), 0),
b.makeArrayType(tex_t[1], b.makeUintConstant(32), 0),
b.makeArrayType(tex_t[2], b.makeUintConstant(32), 0),
b.makeArrayType(tex_t[3], b.makeUintConstant(32), 0)};
// TODO(DrChat): See texture_cache.cc - do we need separate samplers here?
samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant,
samplers_a, "samplers");
b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1);
b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0);
for (int i = 0; i < 4; i++) {
tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant,
tex_a_t[i],
xe::format_string("textures%dD", i + 1).c_str());
b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1);
b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i + 1);
b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i);
}
// Interpolators.
@ -254,6 +247,20 @@ void SpirvShaderTranslator::StartTranslation() {
vec4_float_type_, "gl_Position");
b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInPosition);
vertex_id_ = b.createVariable(spv::StorageClass::StorageClassInput,
int_type_, "gl_VertexId");
b.addDecoration(vertex_id_, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInVertexId);
auto vertex_id = b.createLoad(vertex_id_);
auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction,
registers_ptr_,
std::vector<Id>({b.makeUintConstant(0)}));
auto r0 = b.createLoad(r0_ptr);
r0 = b.createCompositeInsert(vertex_id, r0, vec4_float_type_,
std::vector<uint32_t>({0}));
b.createStore(r0, r0_ptr);
} else {
// Pixel inputs from vertex shader.
interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput,
@ -267,9 +274,9 @@ void SpirvShaderTranslator::StartTranslation() {
frag_outputs_type, "oC");
b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0);
Id frag_depth = b.createVariable(spv::StorageClass::StorageClassOutput,
vec4_float_type_, "gl_FragDepth");
b.addDecoration(frag_depth, spv::Decoration::DecorationBuiltIn,
frag_depth_ = b.createVariable(spv::StorageClass::StorageClassOutput,
float_type_, "gl_FragDepth");
b.addDecoration(frag_depth_, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInFragDepth);
// TODO(benvanik): frag depth, etc.
@ -388,6 +395,25 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
{p, p_scaled, 4, 5, 2, 3});
b.createStore(p, pos_);
} else {
// Alpha test
auto alpha_test_x = b.createCompositeExtract(
push_consts_, float_type_, std::vector<uint32_t>{2, 0});
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x, b.makeFloatConstant(1.f));
spv::Builder::If alpha_if(cond, b);
// TODO(DrChat): Apply alpha test.
// if (alpha_func == 0) passes = false;
// if (alpha_func == 1 && oC[0].a < alpha_ref) passes = true;
// if (alpha_func == 2 && oC[0].a == alpha_ref) passes = true;
// if (alpha_func == 3 && oC[0].a <= alpha_ref) passes = true;
// if (alpha_func == 4 && oC[0].a > alpha_ref) passes = true;
// if (alpha_func == 5 && oC[0].a != alpha_ref) passes = true;
// if (alpha_func == 6 && oC[0].a >= alpha_ref) passes = true;
// if (alpha_func == 7) passes = true;
alpha_if.makeEndIf();
}
b.makeReturn(false);
@ -592,9 +618,9 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
v = b.createLoad(v);
// Bitfield extract the bool constant.
v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v,
b.makeUintConstant(instr.bool_constant_index % 32),
b.makeUintConstant(1));
v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v,
b.makeIntConstant(instr.bool_constant_index % 32),
b.makeIntConstant(1));
// Conditional branch
auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v,
@ -642,17 +668,57 @@ void SpirvShaderTranslator::ProcessAllocInstruction(
void SpirvShaderTranslator::ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) {
auto& b = *builder_;
assert_true(is_vertex_shader());
assert_not_zero(vertex_id_);
// TODO: instr.is_predicated
// Operand 0 is the index
// Operand 1 is the binding
// TODO: Indexed fetch
auto vertex_id = LoadFromOperand(instr.operands[0]);
vertex_id = b.createCompositeExtract(vertex_id, float_type_, 0);
vertex_id = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_id);
auto shader_vertex_id = b.createLoad(vertex_id_);
auto cond =
b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id);
// Skip loading if it's an indexed fetch.
auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index]
[instr.attributes.offset];
assert_not_zero(vertex_ptr);
auto vertex = b.createLoad(vertex_ptr);
auto vertex_components = b.getNumComponents(vertex);
Id alt_vertex = 0;
switch (vertex_components) {
case 1:
alt_vertex = b.makeFloatConstant(0.f);
break;
case 2:
alt_vertex = b.makeCompositeConstant(
vec2_float_type_, std::vector<Id>({b.makeFloatConstant(0.f),
b.makeFloatConstant(1.f)}));
break;
case 3:
alt_vertex = b.makeCompositeConstant(
vec3_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
b.makeFloatConstant(1.f)}));
break;
case 4:
alt_vertex = b.makeCompositeConstant(
vec4_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
b.makeFloatConstant(0.f),
b.makeFloatConstant(1.f)}));
break;
default:
assert_unhandled_case(vertex_components);
}
vertex = b.createTriOp(spv::Op::OpSelect, b.getTypeId(vertex), cond, vertex,
alt_vertex);
StoreToResult(vertex, instr.result);
}
@ -1594,14 +1660,14 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
case InstructionStorageAddressingMode::kAddressAbsolute: {
// storage_index + a0
storage_index =
b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_),
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(storage_base + op.storage_index));
} break;
case InstructionStorageAddressingMode::kAddressRelative: {
// TODO: Based on loop index
// storage_index + aL.x
storage_index = b.createBinOp(
spv::Op::OpIAdd, b.makeUintType(32), b.makeUintConstant(0),
storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.makeUintConstant(0),
b.makeUintConstant(storage_base + op.storage_index));
} break;
default:
@ -1723,7 +1789,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
case InstructionStorageAddressingMode::kAddressAbsolute: {
// storage_index + a0
storage_index =
b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_),
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(result.storage_index));
} break;
case InstructionStorageAddressingMode::kAddressRelative: {
@ -1776,7 +1842,11 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
break;
case InstructionStorageTarget::kDepth:
assert_true(is_pixel_shader());
// TODO(benvanik): result.storage_index
storage_pointer = frag_depth_;
storage_class = spv::StorageClass::StorageClassOutput;
storage_type = float_type_;
storage_offsets.push_back(0);
storage_array = false;
break;
case InstructionStorageTarget::kNone:
assert_unhandled_case(result.storage_target);

View File

@ -104,7 +104,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Function* translated_main_ = 0;
// Types.
spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0;
spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0;
spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0;
spv::Id vec4_uint_type_ = 0;
spv::Id vec4_bool_type_ = 0;
@ -120,7 +120,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id pos_ = 0;
spv::Id push_consts_ = 0;
spv::Id interpolators_ = 0;
spv::Id frag_outputs_ = 0;
spv::Id vertex_id_ = 0;
spv::Id frag_outputs_ = 0, frag_depth_ = 0;
spv::Id samplers_ = 0;
spv::Id tex_[4] = {0}; // Images {1D, 2D, 3D, Cube}

View File

@ -31,6 +31,81 @@ struct TextureConfig {
VkFormat host_format;
};
static const TextureConfig texture_configs[64] = {
{TextureFormat::k_1_REVERSE, VK_FORMAT_UNDEFINED},
{TextureFormat::k_1, VK_FORMAT_UNDEFINED},
{TextureFormat::k_8, VK_FORMAT_R8_UNORM},
{TextureFormat::k_1_5_5_5, VK_FORMAT_R5G5B5A1_UNORM_PACK16},
{TextureFormat::k_5_6_5, VK_FORMAT_R5G6B5_UNORM_PACK16},
{TextureFormat::k_6_5_5, VK_FORMAT_UNDEFINED},
{TextureFormat::k_8_8_8_8, VK_FORMAT_R8G8B8A8_UNORM},
{TextureFormat::k_2_10_10_10, VK_FORMAT_A2R10G10B10_UNORM_PACK32},
{TextureFormat::k_8_A, VK_FORMAT_UNDEFINED},
{TextureFormat::k_8_B, VK_FORMAT_UNDEFINED},
{TextureFormat::k_8_8, VK_FORMAT_R8G8_UNORM},
{TextureFormat::k_Cr_Y1_Cb_Y0, VK_FORMAT_UNDEFINED},
{TextureFormat::k_Y1_Cr_Y0_Cb, VK_FORMAT_UNDEFINED},
{TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
{TextureFormat::k_8_8_8_8_A, VK_FORMAT_UNDEFINED},
{TextureFormat::k_4_4_4_4, VK_FORMAT_R4G4B4A4_UNORM_PACK16},
{TextureFormat::k_10_11_11, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
{TextureFormat::k_11_11_10, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
{TextureFormat::k_DXT1, VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // ?
{TextureFormat::k_DXT2_3, VK_FORMAT_BC3_SRGB_BLOCK}, // ?
{TextureFormat::k_DXT4_5, VK_FORMAT_BC5_UNORM_BLOCK}, // ?
{TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
{TextureFormat::k_24_8, VK_FORMAT_D24_UNORM_S8_UINT},
{TextureFormat::k_24_8_FLOAT, VK_FORMAT_D24_UNORM_S8_UINT}, // ?
{TextureFormat::k_16, VK_FORMAT_R16_UNORM},
{TextureFormat::k_16_16, VK_FORMAT_R16G16_UNORM},
{TextureFormat::k_16_16_16_16, VK_FORMAT_R16G16B16A16_UNORM},
{TextureFormat::k_16_EXPAND, VK_FORMAT_R16_UNORM}, // ?
{TextureFormat::k_16_16_EXPAND, VK_FORMAT_R16G16_UNORM}, // ?
{TextureFormat::k_16_16_16_16_EXPAND, VK_FORMAT_R16G16B16A16_UNORM}, // ?
{TextureFormat::k_16_FLOAT, VK_FORMAT_R16_SFLOAT},
{TextureFormat::k_16_16_FLOAT, VK_FORMAT_R16G16_SFLOAT},
{TextureFormat::k_16_16_16_16_FLOAT, VK_FORMAT_R16G16B16A16_SFLOAT},
{TextureFormat::k_32, VK_FORMAT_R32_SINT},
{TextureFormat::k_32_32, VK_FORMAT_R32G32_SINT},
{TextureFormat::k_32_32_32_32, VK_FORMAT_R32G32B32A32_SINT},
{TextureFormat::k_32_FLOAT, VK_FORMAT_R32_SFLOAT},
{TextureFormat::k_32_32_FLOAT, VK_FORMAT_R32G32_SFLOAT},
{TextureFormat::k_32_32_32_32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT},
{TextureFormat::k_32_AS_8, VK_FORMAT_UNDEFINED},
{TextureFormat::k_32_AS_8_8, VK_FORMAT_UNDEFINED},
{TextureFormat::k_16_MPEG, VK_FORMAT_UNDEFINED},
{TextureFormat::k_16_16_MPEG, VK_FORMAT_UNDEFINED},
{TextureFormat::k_8_INTERLACED, VK_FORMAT_UNDEFINED},
{TextureFormat::k_32_AS_8_INTERLACED, VK_FORMAT_UNDEFINED},
{TextureFormat::k_32_AS_8_8_INTERLACED, VK_FORMAT_UNDEFINED},
{TextureFormat::k_16_INTERLACED, VK_FORMAT_UNDEFINED},
{TextureFormat::k_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED},
{TextureFormat::k_16_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED},
{TextureFormat::k_DXN, VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RG_RGTC2 */},
{TextureFormat::k_8_8_8_8_AS_16_16_16_16, VK_FORMAT_R8G8B8A8_UNORM},
{TextureFormat::k_DXT1_AS_16_16_16_16,
VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGB_S3TC_DXT1_EXT */},
{TextureFormat::k_DXT2_3_AS_16_16_16_16,
VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT3_EXT */},
{TextureFormat::k_DXT4_5_AS_16_16_16_16,
VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT5_EXT */},
{TextureFormat::k_2_10_10_10_AS_16_16_16_16,
VK_FORMAT_A2R10G10B10_UNORM_PACK32},
{TextureFormat::k_10_11_11_AS_16_16_16_16,
VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
{TextureFormat::k_11_11_10_AS_16_16_16_16,
VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
{TextureFormat::k_32_32_32_FLOAT, VK_FORMAT_R32G32B32_SFLOAT},
{TextureFormat::k_DXT3A,
VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT3_EXT */},
{TextureFormat::k_DXT5A,
VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT5_EXT */},
{TextureFormat::k_CTX1, VK_FORMAT_UNDEFINED},
{TextureFormat::k_DXT3A_AS_1_1_1_1, VK_FORMAT_UNDEFINED},
{TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
{TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
};
TextureCache::TextureCache(Memory* memory, RegisterFile* register_file,
TraceWriter* trace_writer,
ui::vulkan::VulkanDevice* device)
@ -46,12 +121,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file,
descriptor_pool_info.flags =
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
descriptor_pool_info.maxSets = 4096;
VkDescriptorPoolSize pool_sizes[2];
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER;
pool_sizes[0].descriptorCount = 32;
pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
pool_sizes[1].descriptorCount = 32;
descriptor_pool_info.poolSizeCount = 2;
VkDescriptorPoolSize pool_sizes[1];
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
pool_sizes[0].descriptorCount = 4096;
descriptor_pool_info.poolSizeCount = 1;
descriptor_pool_info.pPoolSizes = pool_sizes;
auto err = vkCreateDescriptorPool(*device_, &descriptor_pool_info, nullptr,
&descriptor_pool_);
@ -59,17 +132,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file,
// Create the descriptor set layout used for rendering.
// We always have the same number of samplers but only some are used.
VkDescriptorSetLayoutBinding bindings[5];
auto& sampler_binding = bindings[0];
sampler_binding.binding = 0;
sampler_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
sampler_binding.descriptorCount = kMaxTextureSamplers;
sampler_binding.stageFlags =
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
sampler_binding.pImmutableSamplers = nullptr;
VkDescriptorSetLayoutBinding bindings[4];
for (int i = 0; i < 4; ++i) {
auto& texture_binding = bindings[1 + i];
texture_binding.binding = 1 + i;
auto& texture_binding = bindings[i];
texture_binding.binding = i;
texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
texture_binding.descriptorCount = kMaxTextureSamplers;
texture_binding.stageFlags =
@ -103,6 +169,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file,
((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF;
}
}
invalidated_textures_sets_[0].reserve(64);
invalidated_textures_sets_[1].reserve(64);
invalidated_textures_ = &invalidated_textures_sets_[0];
}
TextureCache::~TextureCache() {
@ -135,8 +205,27 @@ TextureCache::Texture* TextureCache::AllocateTexture(
return nullptr;
}
// TODO: Format
image_info.format = VK_FORMAT_R8G8B8A8_UNORM;
VkFormat format = VK_FORMAT_UNDEFINED;
if (texture_info.format_info) {
auto& config = texture_configs[int(texture_info.format_info->format)];
format = config.host_format != VK_FORMAT_UNDEFINED
? config.host_format
: VK_FORMAT_R8G8B8A8_UNORM;
} else {
format = VK_FORMAT_R8G8B8A8_UNORM;
}
VkFormatProperties props;
uint32_t required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT |
VK_FORMAT_FEATURE_BLIT_SRC_BIT;
vkGetPhysicalDeviceFormatProperties(*device_, format, &props);
if ((props.optimalTilingFeatures & required_flags) != required_flags) {
// Texture needs conversion on upload to a native format.
// assert_always();
}
image_info.format = format;
image_info.extent = {texture_info.width + 1, texture_info.height + 1,
texture_info.depth + 1};
image_info.mipLevels = 1;
@ -212,17 +301,24 @@ TextureCache::Texture* TextureCache::AllocateTexture(
}
bool TextureCache::FreeTexture(Texture* texture) {
// TODO(DrChat)
return false;
for (auto it = texture->views.begin(); it != texture->views.end();) {
vkDestroyImageView(*device_, (*it)->view, nullptr);
it = texture->views.erase(it);
}
vkDestroyImage(*device_, texture->image, nullptr);
vkFreeMemory(*device_, texture->image_memory, nullptr);
delete texture;
return true;
}
TextureCache::Texture* TextureCache::DemandResolveTexture(
const TextureInfo& texture_info, TextureFormat format,
uint32_t* out_offset_x, uint32_t* out_offset_y) {
VkOffset2D* out_offset) {
// Check to see if we've already used a texture at this location.
auto texture = LookupAddress(
texture_info.guest_address, texture_info.size_2d.block_width,
texture_info.size_2d.block_height, format, out_offset_x, out_offset_y);
texture_info.size_2d.block_height, format, out_offset);
if (texture) {
return texture;
}
@ -230,7 +326,7 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
// No texture at this location. Make a new one.
texture = AllocateTexture(texture_info);
texture->is_full_texture = false;
resolve_textures_.push_back(std::unique_ptr<Texture>(texture));
resolve_textures_.push_back(texture);
return texture;
}
@ -241,14 +337,14 @@ TextureCache::Texture* TextureCache::Demand(
auto texture_hash = texture_info.hash();
for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) {
if (it->second->texture_info == texture_info) {
return it->second.get();
return it->second;
}
}
// Check resolve textures.
for (auto it = resolve_textures_.begin(); it != resolve_textures_.end();
++it) {
auto texture = (*it).get();
auto texture = (*it);
if (texture_info.guest_address == texture->texture_info.guest_address &&
texture_info.size_2d.logical_width ==
texture->texture_info.size_2d.logical_width &&
@ -259,9 +355,9 @@ TextureCache::Texture* TextureCache::Demand(
// Upgrade this texture to a full texture.
texture->is_full_texture = true;
texture->texture_info = texture_info;
textures_[texture_hash] = std::move(*it);
textures_[texture_hash] = *it;
it = resolve_textures_.erase(it);
return textures_[texture_hash].get();
return textures_[texture_hash];
}
}
@ -290,15 +386,34 @@ TextureCache::Texture* TextureCache::Demand(
break;
}
// Okay. Now that the texture is uploaded from system memory, put a writewatch
// on it to tell us if it's been modified from the guest.
if (!uploaded) {
// TODO: Destroy the texture.
assert_always();
return nullptr;
}
// Copy in overlapping resolve textures.
/*
for (auto it = resolve_textures_.begin(); it != resolve_textures_.end();
++it) {
auto texture = (*it);
if (texture_info.guest_address == texture->texture_info.guest_address &&
texture_info.size_2d.logical_width ==
texture->texture_info.size_2d.logical_width &&
texture_info.size_2d.logical_height ==
texture->texture_info.size_2d.logical_height) {
// Exact match.
// TODO: Lazy match (at an offset)
// Upgrade this texture to a full texture.
texture->is_full_texture = true;
texture->texture_info = texture_info;
textures_[texture_hash] = *it;
it = resolve_textures_.erase(it);
return textures_[texture_hash];
}
}
*/
// Though we didn't find an exact match, that doesn't mean we're out of the
// woods yet. This texture could either be a portion of another texture or
// vice versa. Copy any overlapping textures into this texture.
@ -306,8 +421,26 @@ TextureCache::Texture* TextureCache::Demand(
for (auto it = textures_.begin(); it != textures_.end(); ++it) {
}
textures_[texture_hash] = std::unique_ptr<Texture>(texture);
// Okay. Now that the texture is uploaded from system memory, put a writewatch
// on it to tell us if it's been modified from the guest.
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
texture_info.guest_address, texture_info.input_length,
cpu::MMIOHandler::kWatchWrite,
[](void* context_ptr, void* data_ptr, uint32_t address) {
auto self = reinterpret_cast<TextureCache*>(context_ptr);
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
// Clear watch handle first so we don't redundantly
// remove.
touched_texture->access_watch_handle = 0;
touched_texture->pending_invalidation = true;
// Add to pending list so Scavenge will clean it up.
self->invalidated_textures_mutex_.lock();
self->invalidated_textures_->push_back(touched_texture);
self->invalidated_textures_mutex_.unlock();
},
this, texture);
textures_[texture_hash] = texture;
return texture;
}
@ -346,7 +479,7 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture,
VkComponentSwizzle swiz_component_map[] = {
VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A,
VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_IDENTITY,
};
@ -373,11 +506,15 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture,
}
TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
auto sampler_hash = sampler_info.hash();
for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) {
if (it->second->sampler_info == sampler_info) {
// Found a compatible sampler.
return it->second.get();
return it->second;
}
}
@ -389,10 +526,55 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_create_info.pNext = nullptr;
sampler_create_info.flags = 0;
sampler_create_info.minFilter = VK_FILTER_NEAREST;
sampler_create_info.magFilter = VK_FILTER_NEAREST;
sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
// Texture level filtering.
VkSamplerMipmapMode mip_filter;
switch (sampler_info.mip_filter) {
case TextureFilter::kBaseMap:
// TODO(DrChat): ?
mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST;
break;
case TextureFilter::kPoint:
mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST;
break;
case TextureFilter::kLinear:
mip_filter = VK_SAMPLER_MIPMAP_MODE_LINEAR;
break;
default:
assert_unhandled_case(sampler_info.mip_filter);
return nullptr;
}
VkFilter min_filter;
switch (sampler_info.min_filter) {
case TextureFilter::kPoint:
min_filter = VK_FILTER_NEAREST;
break;
case TextureFilter::kLinear:
min_filter = VK_FILTER_LINEAR;
break;
default:
assert_unhandled_case(sampler_info.min_filter);
return nullptr;
}
VkFilter mag_filter;
switch (sampler_info.mag_filter) {
case TextureFilter::kPoint:
mag_filter = VK_FILTER_NEAREST;
break;
case TextureFilter::kLinear:
mag_filter = VK_FILTER_LINEAR;
break;
default:
assert_unhandled_case(mag_filter);
return nullptr;
}
sampler_create_info.minFilter = min_filter;
sampler_create_info.magFilter = mag_filter;
sampler_create_info.mipmapMode = mip_filter;
// FIXME: Both halfway / mirror clamp to border aren't mapped properly.
VkSamplerAddressMode address_mode_map[] = {
/* kRepeat */ VK_SAMPLER_ADDRESS_MODE_REPEAT,
@ -431,37 +613,46 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
auto sampler = new Sampler();
sampler->sampler = vk_sampler;
sampler->sampler_info = sampler_info;
samplers_[sampler_hash] = std::unique_ptr<Sampler>(sampler);
samplers_[sampler_hash] = sampler;
return sampler;
}
TextureCache::Texture* TextureCache::LookupAddress(
uint32_t guest_address, uint32_t width, uint32_t height,
TextureFormat format, uint32_t* offset_x, uint32_t* offset_y) {
TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
uint32_t width,
uint32_t height,
TextureFormat format,
VkOffset2D* out_offset) {
for (auto it = textures_.begin(); it != textures_.end(); ++it) {
const auto& texture_info = it->second->texture_info;
if (guest_address >= texture_info.guest_address &&
guest_address <
texture_info.guest_address + texture_info.input_length &&
offset_x && offset_y) {
texture_info.size_2d.input_width >= width &&
texture_info.size_2d.input_height >= height && out_offset) {
auto offset_bytes = guest_address - texture_info.guest_address;
if (texture_info.dimension == Dimension::k2D) {
*offset_y = offset_bytes / texture_info.size_2d.input_pitch;
out_offset->x = 0;
out_offset->y = offset_bytes / texture_info.size_2d.input_pitch;
if (offset_bytes % texture_info.size_2d.input_pitch != 0) {
// TODO: offset_x
}
}
return it->second.get();
return it->second;
}
if (texture_info.guest_address == guest_address &&
texture_info.dimension == Dimension::k2D &&
texture_info.size_2d.input_width == width &&
texture_info.size_2d.input_height == height) {
return it->second.get();
if (out_offset) {
out_offset->x = 0;
out_offset->y = 0;
}
return it->second;
}
}
@ -469,27 +660,16 @@ TextureCache::Texture* TextureCache::LookupAddress(
for (auto it = resolve_textures_.begin(); it != resolve_textures_.end();
++it) {
const auto& texture_info = (*it)->texture_info;
if (guest_address >= texture_info.guest_address &&
guest_address <
texture_info.guest_address + texture_info.input_length &&
offset_x && offset_y) {
auto offset_bytes = guest_address - texture_info.guest_address;
if (texture_info.dimension == Dimension::k2D) {
*offset_y = offset_bytes / texture_info.size_2d.input_pitch;
if (offset_bytes % texture_info.size_2d.input_pitch != 0) {
// TODO: offset_x
}
}
return (*it).get();
}
if (texture_info.guest_address == guest_address &&
texture_info.dimension == Dimension::k2D &&
texture_info.size_2d.input_width == width &&
texture_info.size_2d.input_height == height) {
return (*it).get();
if (out_offset) {
out_offset->x = 0;
out_offset->y = 0;
}
return (*it);
}
}
@ -531,19 +711,74 @@ bool TextureCache::UploadTexture2D(
}
// Grab some temporary memory for staging.
auto alloc = staging_buffer_.Acquire(src.input_length, completion_fence);
size_t unpack_length = src.output_length;
auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
assert_not_null(alloc);
// TODO: Support these cases.
// assert_false(src.is_tiled);
// TODO: Support compression.
// assert_false(src.is_compressed());
// Upload texture into GPU memory.
// TODO: If the GPU supports it, we can submit a compute batch to convert the
// texture and copy it to its destination. Otherwise, fallback to conversion
// on the CPU.
auto guest_ptr = memory_->TranslatePhysical(src.guest_address);
TextureSwap(src.endianness, alloc->host_ptr, guest_ptr, src.input_length);
void* host_address = memory_->TranslatePhysical(src.guest_address);
if (!src.is_tiled) {
if (src.size_2d.input_pitch == src.size_2d.output_pitch) {
// Fast path copy entire image.
TextureSwap(src.endianness, alloc->host_ptr, host_address, unpack_length);
} else {
// Slow path copy row-by-row because strides differ.
// UNPACK_ROW_LENGTH only works for uncompressed images, and likely does
// this exact thing under the covers, so we just always do it here.
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr);
uint32_t pitch =
std::min(src.size_2d.input_pitch, src.size_2d.output_pitch);
for (uint32_t y = 0;
y < std::min(src.size_2d.block_height, src.size_2d.logical_height);
y++) {
TextureSwap(src.endianness, dest, src_mem, pitch);
src_mem += src.size_2d.input_pitch;
dest += src.size_2d.output_pitch;
}
}
} else {
// Untile image.
// We could do this in a shader to speed things up, as this is pretty slow.
// TODO(benvanik): optimize this inner loop (or work by tiles).
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr);
uint32_t bytes_per_block = src.format_info->block_width *
src.format_info->block_height *
src.format_info->bits_per_pixel / 8;
// Tiled textures can be packed; get the offset into the packed texture.
uint32_t offset_x;
uint32_t offset_y;
TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y);
auto bpp = (bytes_per_block >> 2) +
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
for (uint32_t y = 0, output_base_offset = 0;
y < std::min(src.size_2d.block_height, src.size_2d.logical_height);
y++, output_base_offset += src.size_2d.output_pitch) {
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
offset_y + y,
(src.size_2d.input_width / src.format_info->block_width), bpp);
for (uint32_t x = 0, output_offset = output_base_offset;
x < src.size_2d.block_width; x++, output_offset += bytes_per_block) {
auto input_offset =
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp,
input_base_offset) >>
bpp;
TextureSwap(src.endianness, dest + output_offset,
src_mem + input_offset * bytes_per_block, bytes_per_block);
}
}
}
staging_buffer_.Flush(alloc);
// Insert a memory barrier into the command buffer to ensure the upload has
@ -580,21 +815,15 @@ bool TextureCache::UploadTexture2D(
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
assert_true(src.size_2d.input_width >=
dest->texture_info.size_2d.output_width);
assert_true(src.size_2d.input_height >=
dest->texture_info.size_2d.output_height);
// For now, just transfer the grid we uploaded earlier into the texture.
// Now move the converted texture into the destination.
VkBufferImageCopy copy_region;
copy_region.bufferOffset = alloc->offset;
copy_region.bufferRowLength = src.width + 1;
copy_region.bufferImageHeight = src.height + 1;
copy_region.bufferRowLength = src.size_2d.output_width;
copy_region.bufferImageHeight = src.size_2d.output_height;
copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region.imageOffset = {0, 0, 0};
copy_region.imageExtent = {dest->texture_info.width + 1,
dest->texture_info.height + 1,
dest->texture_info.depth + 1};
copy_region.imageExtent = {src.size_2d.output_width,
src.size_2d.output_height, 1};
vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&copy_region);
@ -659,27 +888,13 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
VkWriteDescriptorSet descriptor_writes[4];
std::memset(descriptor_writes, 0, sizeof(descriptor_writes));
uint32_t descriptor_write_count = 0;
/*
// TODO(DrChat): Do we really need to separate samplers and images here?
if (update_set_info->sampler_write_count) {
auto& sampler_write = descriptor_writes[descriptor_write_count++];
sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
sampler_write.pNext = nullptr;
sampler_write.dstSet = descriptor_set;
sampler_write.dstBinding = 0;
sampler_write.dstArrayElement = 0;
sampler_write.descriptorCount = update_set_info->sampler_write_count;
sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
sampler_write.pImageInfo = update_set_info->sampler_infos;
}
*/
// FIXME: These are not be lined up properly with tf binding points!!!!!
if (update_set_info->image_1d_write_count) {
auto& image_write = descriptor_writes[descriptor_write_count++];
image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
image_write.pNext = nullptr;
image_write.dstSet = descriptor_set;
image_write.dstBinding = 1;
image_write.dstBinding = 0;
image_write.dstArrayElement = 0;
image_write.descriptorCount = update_set_info->image_1d_write_count;
image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -690,7 +905,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
image_write.pNext = nullptr;
image_write.dstSet = descriptor_set;
image_write.dstBinding = 2;
image_write.dstBinding = 1;
image_write.dstArrayElement = 0;
image_write.descriptorCount = update_set_info->image_2d_write_count;
image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -701,7 +916,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
image_write.pNext = nullptr;
image_write.dstSet = descriptor_set;
image_write.dstBinding = 3;
image_write.dstBinding = 2;
image_write.dstArrayElement = 0;
image_write.descriptorCount = update_set_info->image_3d_write_count;
image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -712,7 +927,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
image_write.pNext = nullptr;
image_write.dstSet = descriptor_set;
image_write.dstBinding = 4;
image_write.dstBinding = 3;
image_write.dstArrayElement = 0;
image_write.descriptorCount = update_set_info->image_cube_write_count;
image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -814,7 +1029,7 @@ bool TextureCache::SetupTextureBinding(
return false;
}
image_write->imageView = view->view;
image_write->imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_write->imageLayout = texture->image_layout;
image_write->sampler = sampler->sampler;
return true;
@ -838,6 +1053,25 @@ void TextureCache::Scavenge() {
}
staging_buffer_.Scavenge();
// Clean up any invalidated textures.
invalidated_textures_mutex_.lock();
std::vector<Texture*>& invalidated_textures = *invalidated_textures_;
if (invalidated_textures_ == &invalidated_textures_sets_[0]) {
invalidated_textures_ = &invalidated_textures_sets_[1];
} else {
invalidated_textures_ = &invalidated_textures_sets_[0];
}
invalidated_textures_mutex_.unlock();
if (invalidated_textures.empty()) {
return;
}
for (auto& texture : invalidated_textures) {
textures_.erase(texture->texture_info.hash());
FreeTexture(texture);
}
invalidated_textures.clear();
}
} // namespace vulkan

View File

@ -101,12 +101,12 @@ class TextureCache {
// contains this address at an offset.
Texture* LookupAddress(uint32_t guest_address, uint32_t width,
uint32_t height, TextureFormat format,
uint32_t* offset_x = nullptr,
uint32_t* offset_y = nullptr);
VkOffset2D* out_offset = nullptr);
// Demands a texture for the purpose of resolving from EDRAM. This either
// creates a new texture or returns a previously created texture. texture_info
// is not required to be completely filled out, just guest_address and size.
// is not required to be completely filled out, just guest_address and all
// sizes.
//
// It's possible that this may return an image that is larger than the
// requested size (e.g. resolving into a bigger texture) or an image that
@ -114,8 +114,7 @@ class TextureCache {
// At the very least, it's guaranteed that the image will be large enough to
// hold the requested size.
Texture* DemandResolveTexture(const TextureInfo& texture_info,
TextureFormat format, uint32_t* out_offset_x,
uint32_t* out_offset_y);
TextureFormat format, VkOffset2D* out_offset);
// Clears all cached content.
void ClearCache();
@ -172,11 +171,14 @@ class TextureCache {
std::vector<std::pair<VkDescriptorSet, std::shared_ptr<ui::vulkan::Fence>>>
in_flight_sets_;
// Temporary until we have circular buffers.
ui::vulkan::CircularBuffer staging_buffer_;
std::unordered_map<uint64_t, std::unique_ptr<Texture>> textures_;
std::unordered_map<uint64_t, std::unique_ptr<Sampler>> samplers_;
std::vector<std::unique_ptr<Texture>> resolve_textures_;
std::unordered_map<uint64_t, Texture*> textures_;
std::unordered_map<uint64_t, Sampler*> samplers_;
std::vector<Texture*> resolve_textures_;
std::mutex invalidated_textures_mutex_;
std::vector<Texture*>* invalidated_textures_;
std::vector<Texture*> invalidated_textures_sets_[2];
struct UpdateSetInfo {
// Bitmap of all 32 fetch constants and whether they have been setup yet.