Basic cubemap support, though the sampling code is wrong.

This commit is contained in:
Ben Vanik 2015-03-22 19:15:22 -07:00
parent 7677d6ea9f
commit b392afbfae
8 changed files with 265 additions and 19 deletions

View File

@ -74,6 +74,59 @@ std::string GL4Shader::GetHeader() {
return header;
}
std::string GL4Shader::GetFooter() {
// http://www.nvidia.com/object/cube_map_ogl_tutorial.html
// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
// src0 = Rn.zzxy, src1 = Rn.yxzz
// dst.W = FaceId;
// dst.Z = 2.0f * MajorAxis;
// dst.Y = S cube coordinate;
// dst.X = T cube coordinate;
/*
major axis
direction target sc tc ma
---------- ------------------------------------ --- --- ---
+rx GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT=0 -rz -ry rx
-rx GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT=1 +rz -ry rx
+ry GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT=2 +rx +rz ry
-ry GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT=3 +rx -rz ry
+rz GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT=4 +rx -ry rz
-rz GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT=5 -rx -ry rz
*/
static const std::string footer =
"vec4 cube(vec4 src0, vec4 src1) {\n"
" vec3 src = vec3(src1.y, src1.x, src1.z);\n"
" vec3 abs_src = abs(src);\n"
" int face_id;\n"
" float sc;\n"
" float tc;\n"
" float ma;\n"
" if (abs_src.x > abs_src.y && abs_src.x > abs_src.z) {\n"
" if (src.x > 0.0) {\n"
" face_id = 0; sc = -abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n"
" } else {\n"
" face_id = 1; sc = abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n"
" }\n"
" } else if (abs_src.y > abs_src.x && abs_src.y > abs_src.z) {\n"
" if (src.y > 0.0) {\n"
" face_id = 2; sc = abs_src.x; tc = abs_src.z; ma = abs_src.y;\n"
" } else {\n"
" face_id = 3; sc = abs_src.x; tc = -abs_src.z; ma = abs_src.y;\n"
" }\n"
" } else {\n"
" if (src.z > 0.0) {\n"
" face_id = 4; sc = abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n"
" } else {\n"
" face_id = 5; sc = -abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n"
" }\n"
" }\n"
" float s = (sc / ma + 1.0) / 2.0;\n"
" float t = (tc / ma + 1.0) / 2.0;\n"
" return vec4(t, s, 2.0 * ma, float(face_id));\n"
"}\n";
return footer;
}
bool GL4Shader::PrepareVertexArrayObject() {
glCreateVertexArrays(1, &vao_);
@ -182,7 +235,6 @@ bool GL4Shader::PrepareVertexShader(
PLOGE("Unable to prepare vertex shader array object");
return false;
}
std::string apply_transform =
"vec4 applyTransform(const in StateData state, vec4 pos) {\n"
" if (state.vtx_fmt.w == 0.0) {\n"
@ -221,7 +273,8 @@ bool GL4Shader::PrepareVertexShader(
" processVertex(state);\n"
" gl_Position = applyTransform(state, gl_Position);\n"
" draw_id = gl_DrawIDARB;\n"
"}\n";
"}\n" +
GetFooter();
std::string translated_source =
shader_translator_.TranslateVertexShader(this, program_cntl);
@ -273,7 +326,8 @@ bool GL4Shader::PreparePixelShader(
" if (state.alpha_test.x != 0.0) {\n"
" applyAlphaTest(int(state.alpha_test.y), state.alpha_test.z);\n"
" }\n"
"}\n";
"}\n" +
GetFooter();
std::string translated_source =
shader_translator_.TranslatePixelShader(this, program_cntl);

View File

@ -34,6 +34,7 @@ class GL4Shader : public Shader {
protected:
std::string GetHeader();
std::string GetFooter();
bool PrepareVertexArrayObject();
bool CompileProgram(std::string source);

View File

@ -675,7 +675,16 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
return true;
}
// CUBEv
bool GL4ShaderTranslator::TranslateALU_CUBEv(const instr_alu_t& alu) {
BeginAppendVectorOp(alu);
Append("cube(");
AppendVectorOpSrcReg(alu, 1);
Append(", ");
AppendVectorOpSrcReg(alu, 2);
Append(")");
EndAppendVectorOp(alu);
return true;
}
bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
BeginAppendVectorOp(alu);
@ -1206,7 +1215,7 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
ALU_INSTR_IMPL(DOT4v, 2), // 15
ALU_INSTR_IMPL(DOT3v, 2), // 16
ALU_INSTR_IMPL(DOT2ADDv, 3), // 17 -- ???
ALU_INSTR(CUBEv, 2), // 18
ALU_INSTR_IMPL(CUBEv, 2), // 18
ALU_INSTR_IMPL(MAX4v, 1), // 19
ALU_INSTR_IMPL(PRED_SETE_PUSHv, 2), // 20
ALU_INSTR_IMPL(PRED_SETNE_PUSHv, 2), // 21
@ -1876,15 +1885,29 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
// Translate.
// TODO(benvanik): if sampler == null, set to invalid color.
Append(" if (state.texture_samplers[%d].x != 0) {\n", tex->const_idx & 0xF);
Append(" t = texture(");
Append("%s(state.texture_samplers[%d])", sampler_type, tex->const_idx & 0xF);
Append(", r%u.", tex->src_reg);
src_swiz = tex->src_swiz;
for (int i = 0; i < src_component_count; i++) {
Append("%c", chan_names[src_swiz & 0x3]);
src_swiz >>= 2;
if (tex->dimension == DIMENSION_CUBE) {
Append(" t.xyz = r%u.", tex->src_reg);
src_swiz = tex->src_swiz;
for (int i = 0; i < src_component_count; i++) {
Append("%c", chan_names[src_swiz & 0x3]);
src_swiz >>= 2;
}
Append(";\n");
// TODO(benvanik): undo CUBEv logic on t? (s,t,faceid)
Append(" t = texture(%s(state.texture_samplers[%d]), t.xyz);\n",
sampler_type, tex->const_idx & 0xF);
} else {
Append(" t = texture(");
Append("%s(state.texture_samplers[%d])", sampler_type,
tex->const_idx & 0xF);
Append(", r%u.", tex->src_reg);
src_swiz = tex->src_swiz;
for (int i = 0; i < src_component_count; i++) {
Append("%c", chan_names[src_swiz & 0x3]);
src_swiz >>= 2;
}
Append(");\n");
}
Append(");\n");
Append(" } else {\n");
Append(" t = vec4(r%u.", tex->src_reg);
src_swiz = tex->src_swiz;

View File

@ -85,7 +85,7 @@ class GL4ShaderTranslator {
bool TranslateALU_DOT4v(const ucode::instr_alu_t& alu);
bool TranslateALU_DOT3v(const ucode::instr_alu_t& alu);
bool TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu);
// CUBEv
bool TranslateALU_CUBEv(const ucode::instr_alu_t& alu);
bool TranslateALU_MAX4v(const ucode::instr_alu_t& alu);
bool TranslateALU_PRED_SETXX_PUSHv(const ucode::instr_alu_t& alu,
const char* op);

View File

@ -128,7 +128,8 @@ static const TextureConfig texture_configs[64] = {
GL_INVALID_ENUM, GL_INVALID_ENUM},
{TextureFormat::k_32_32_32_FLOAT, GL_RGB32F, GL_RGB, GL_FLOAT},
{TextureFormat::k_DXT3A, GL_INVALID_ENUM, GL_INVALID_ENUM, GL_INVALID_ENUM},
{TextureFormat::k_DXT5A, GL_INVALID_ENUM, GL_INVALID_ENUM, GL_INVALID_ENUM},
{TextureFormat::k_DXT5A, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,
GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE},
{TextureFormat::k_CTX1, GL_INVALID_ENUM, GL_INVALID_ENUM, GL_INVALID_ENUM},
{TextureFormat::k_DXT3A_AS_1_1_1_1, GL_INVALID_ENUM, GL_INVALID_ENUM,
GL_INVALID_ENUM},
@ -470,9 +471,11 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
case Dimension::k2D:
uploaded = UploadTexture2D(entry->handle, texture_info);
break;
case Dimension::kCube:
uploaded = UploadTextureCube(entry->handle, texture_info);
break;
case Dimension::k1D:
case Dimension::k3D:
case Dimension::kCube:
assert_unhandled_case(texture_info.dimension);
return false;
}
@ -773,6 +776,110 @@ bool TextureCache::UploadTexture2D(GLuint texture,
return true;
}
bool TextureCache::UploadTextureCube(GLuint texture,
const TextureInfo& texture_info) {
const auto host_address = memory_->Translate(texture_info.guest_address);
const auto& config =
texture_configs[uint32_t(texture_info.format_info->format)];
if (config.format == GL_INVALID_ENUM) {
assert_always("Unhandled texture format");
return false;
}
size_t unpack_length = texture_info.output_length;
glTextureStorage2D(texture, 1, config.internal_format,
texture_info.size_cube.output_width,
texture_info.size_cube.output_height);
auto allocation = scratch_buffer_->Acquire(unpack_length);
if (!texture_info.is_tiled) {
if (texture_info.size_cube.input_pitch ==
texture_info.size_cube.output_pitch) {
// Fast path copy entire image.
TextureSwap(texture_info.endianness, allocation.host_ptr, host_address,
unpack_length);
} else {
// Slow path copy row-by-row because strides differ.
// UNPACK_ROW_LENGTH only works for uncompressed images, and likely does
// this exact thing under the covers, so we just always do it here.
const uint8_t* src = host_address;
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
for (int face = 0; face < 6; ++face) {
uint32_t pitch = std::min(texture_info.size_cube.input_pitch,
texture_info.size_cube.output_pitch);
for (uint32_t y = 0; y < texture_info.size_cube.block_height; y++) {
TextureSwap(texture_info.endianness, dest, src, pitch);
src += texture_info.size_cube.input_pitch;
dest += texture_info.size_cube.output_pitch;
}
}
}
} else {
// TODO(benvanik): optimize this inner loop (or work by tiles).
const uint8_t* src = host_address;
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
uint32_t bytes_per_block = texture_info.format_info->block_width *
texture_info.format_info->block_height *
texture_info.format_info->bits_per_pixel / 8;
// Tiled textures can be packed; get the offset into the packed texture.
uint32_t offset_x;
uint32_t offset_y;
TextureInfo::GetPackedTileOffset(texture_info, &offset_x, &offset_y);
auto bpp = (bytes_per_block >> 2) +
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
for (int face = 0; face < 6; ++face) {
for (uint32_t y = 0, output_base_offset = 0;
y < texture_info.size_cube.block_height;
y++, output_base_offset += texture_info.size_cube.output_pitch) {
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
offset_y + y, (texture_info.size_cube.input_width /
texture_info.format_info->block_width),
bpp);
for (uint32_t x = 0, output_offset = output_base_offset;
x < texture_info.size_cube.block_width;
x++, output_offset += bytes_per_block) {
auto input_offset =
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp,
input_base_offset) >>
bpp;
TextureSwap(texture_info.endianness, dest + output_offset,
src + input_offset * bytes_per_block, bytes_per_block);
}
}
src += texture_info.size_cube.input_face_length;
dest += texture_info.size_cube.output_face_length;
}
}
size_t unpack_offset = allocation.offset;
scratch_buffer_->Commit(std::move(allocation));
// TODO(benvanik): avoid flush on entire buffer by using another texture
// buffer.
scratch_buffer_->Flush();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
if (texture_info.is_compressed()) {
glCompressedTextureSubImage3D(
texture, 0, 0, 0, 0, texture_info.size_cube.output_width,
texture_info.size_cube.output_height, 6, config.format,
static_cast<GLsizei>(unpack_length),
reinterpret_cast<void*>(unpack_offset));
} else {
// Most of these don't seem to have an effect on compressed images.
// glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
// glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
// glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTextureSubImage3D(texture, 0, 0, 0, 0,
texture_info.size_cube.output_width,
texture_info.size_cube.output_height, 6, config.format,
config.type, reinterpret_cast<void*>(unpack_offset));
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
return true;
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -91,6 +91,7 @@ class TextureCache {
void EvictTexture(TextureEntry* entry);
bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info);
Memory* memory_;
CircularBuffer* scratch_buffer_;

View File

@ -121,11 +121,15 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
info.height = fetch.size_2d.height;
break;
case Dimension::k3D:
case Dimension::kCube:
info.width = fetch.size_3d.width;
info.height = fetch.size_3d.height;
info.depth = fetch.size_3d.depth;
break;
case Dimension::kCube:
info.width = fetch.size_stack.width;
info.height = fetch.size_stack.height;
info.depth = fetch.size_stack.depth;
break;
}
info.format_info = FormatInfo::Get(fetch.format);
info.endianness = static_cast<Endian>(fetch.endianness);
@ -152,8 +156,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
// TODO(benvanik): calculate size.
return false;
case Dimension::kCube:
// TODO(benvanik): calculate size.
return false;
info.CalculateTextureSizesCube(fetch);
break;
}
return true;
@ -208,6 +212,49 @@ void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
output_length = size_2d.output_pitch * block_height;
}
void TextureInfo::CalculateTextureSizesCube(const xe_gpu_texture_fetch_t& fetch) {
assert_true(fetch.size_stack.depth + 1 == 6);
size_cube.logical_width = 1 + fetch.size_stack.width;
size_cube.logical_height = 1 + fetch.size_stack.height;
// w/h in blocks must be a multiple of block size.
uint32_t block_width =
poly::round_up(size_cube.logical_width, format_info->block_width) /
format_info->block_width;
uint32_t block_height =
poly::round_up(size_cube.logical_height, format_info->block_height) /
format_info->block_height;
// Tiles are 32x32 blocks. All textures must be multiples of tile dimensions.
uint32_t tile_width = uint32_t(std::ceilf(block_width / 32.0f));
uint32_t tile_height = uint32_t(std::ceilf(block_height / 32.0f));
size_cube.block_width = tile_width * 32;
size_cube.block_height = tile_height * 32;
uint32_t bytes_per_block = format_info->block_width *
format_info->block_height *
format_info->bits_per_pixel / 8;
uint32_t byte_pitch = tile_width * 32 * bytes_per_block;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = poly::round_up(byte_pitch, 256);
}
size_cube.input_width = tile_width * 32 * format_info->block_width;
size_cube.input_height = tile_height * 32 * format_info->block_height;
size_cube.output_width = block_width * format_info->block_width;
size_cube.output_height = block_height * format_info->block_height;
size_cube.input_pitch = byte_pitch;
size_cube.output_pitch = block_width * bytes_per_block;
size_cube.input_face_length = size_cube.input_pitch * size_cube.block_height;
input_length = size_cube.input_face_length * 6;
size_cube.output_face_length = size_cube.output_pitch * block_height;
output_length = size_cube.output_face_length * 6;
}
void TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y) {

View File

@ -187,6 +187,18 @@ struct TextureInfo {
struct {
} size_3d;
struct {
uint32_t logical_width;
uint32_t logical_height;
uint32_t block_width;
uint32_t block_height;
uint32_t input_width;
uint32_t input_height;
uint32_t input_pitch;
uint32_t output_width;
uint32_t output_height;
uint32_t output_pitch;
uint32_t input_face_length;
uint32_t output_face_length;
} size_cube;
};
@ -209,6 +221,7 @@ struct TextureInfo {
private:
void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
void CalculateTextureSizesCube(const xenos::xe_gpu_texture_fetch_t& fetch);
};
} // namespace gpu