Basic cubemap support, though the sampling code is wrong.
This commit is contained in:
parent
7677d6ea9f
commit
b392afbfae
|
@ -74,6 +74,59 @@ std::string GL4Shader::GetHeader() {
|
|||
return header;
|
||||
}
|
||||
|
||||
std::string GL4Shader::GetFooter() {
|
||||
// http://www.nvidia.com/object/cube_map_ogl_tutorial.html
|
||||
// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
|
||||
// src0 = Rn.zzxy, src1 = Rn.yxzz
|
||||
// dst.W = FaceId;
|
||||
// dst.Z = 2.0f * MajorAxis;
|
||||
// dst.Y = S cube coordinate;
|
||||
// dst.X = T cube coordinate;
|
||||
/*
|
||||
major axis
|
||||
direction target sc tc ma
|
||||
---------- ------------------------------------ --- --- ---
|
||||
+rx GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT=0 -rz -ry rx
|
||||
-rx GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT=1 +rz -ry rx
|
||||
+ry GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT=2 +rx +rz ry
|
||||
-ry GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT=3 +rx -rz ry
|
||||
+rz GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT=4 +rx -ry rz
|
||||
-rz GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT=5 -rx -ry rz
|
||||
*/
|
||||
static const std::string footer =
|
||||
"vec4 cube(vec4 src0, vec4 src1) {\n"
|
||||
" vec3 src = vec3(src1.y, src1.x, src1.z);\n"
|
||||
" vec3 abs_src = abs(src);\n"
|
||||
" int face_id;\n"
|
||||
" float sc;\n"
|
||||
" float tc;\n"
|
||||
" float ma;\n"
|
||||
" if (abs_src.x > abs_src.y && abs_src.x > abs_src.z) {\n"
|
||||
" if (src.x > 0.0) {\n"
|
||||
" face_id = 0; sc = -abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n"
|
||||
" } else {\n"
|
||||
" face_id = 1; sc = abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n"
|
||||
" }\n"
|
||||
" } else if (abs_src.y > abs_src.x && abs_src.y > abs_src.z) {\n"
|
||||
" if (src.y > 0.0) {\n"
|
||||
" face_id = 2; sc = abs_src.x; tc = abs_src.z; ma = abs_src.y;\n"
|
||||
" } else {\n"
|
||||
" face_id = 3; sc = abs_src.x; tc = -abs_src.z; ma = abs_src.y;\n"
|
||||
" }\n"
|
||||
" } else {\n"
|
||||
" if (src.z > 0.0) {\n"
|
||||
" face_id = 4; sc = abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n"
|
||||
" } else {\n"
|
||||
" face_id = 5; sc = -abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" float s = (sc / ma + 1.0) / 2.0;\n"
|
||||
" float t = (tc / ma + 1.0) / 2.0;\n"
|
||||
" return vec4(t, s, 2.0 * ma, float(face_id));\n"
|
||||
"}\n";
|
||||
return footer;
|
||||
}
|
||||
|
||||
bool GL4Shader::PrepareVertexArrayObject() {
|
||||
glCreateVertexArrays(1, &vao_);
|
||||
|
||||
|
@ -182,7 +235,6 @@ bool GL4Shader::PrepareVertexShader(
|
|||
PLOGE("Unable to prepare vertex shader array object");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string apply_transform =
|
||||
"vec4 applyTransform(const in StateData state, vec4 pos) {\n"
|
||||
" if (state.vtx_fmt.w == 0.0) {\n"
|
||||
|
@ -221,7 +273,8 @@ bool GL4Shader::PrepareVertexShader(
|
|||
" processVertex(state);\n"
|
||||
" gl_Position = applyTransform(state, gl_Position);\n"
|
||||
" draw_id = gl_DrawIDARB;\n"
|
||||
"}\n";
|
||||
"}\n" +
|
||||
GetFooter();
|
||||
|
||||
std::string translated_source =
|
||||
shader_translator_.TranslateVertexShader(this, program_cntl);
|
||||
|
@ -273,7 +326,8 @@ bool GL4Shader::PreparePixelShader(
|
|||
" if (state.alpha_test.x != 0.0) {\n"
|
||||
" applyAlphaTest(int(state.alpha_test.y), state.alpha_test.z);\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
"}\n" +
|
||||
GetFooter();
|
||||
|
||||
std::string translated_source =
|
||||
shader_translator_.TranslatePixelShader(this, program_cntl);
|
||||
|
|
|
@ -34,6 +34,7 @@ class GL4Shader : public Shader {
|
|||
|
||||
protected:
|
||||
std::string GetHeader();
|
||||
std::string GetFooter();
|
||||
bool PrepareVertexArrayObject();
|
||||
bool CompileProgram(std::string source);
|
||||
|
||||
|
|
|
@ -675,7 +675,16 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// CUBEv
|
||||
bool GL4ShaderTranslator::TranslateALU_CUBEv(const instr_alu_t& alu) {
|
||||
BeginAppendVectorOp(alu);
|
||||
Append("cube(");
|
||||
AppendVectorOpSrcReg(alu, 1);
|
||||
Append(", ");
|
||||
AppendVectorOpSrcReg(alu, 2);
|
||||
Append(")");
|
||||
EndAppendVectorOp(alu);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
|
||||
BeginAppendVectorOp(alu);
|
||||
|
@ -1206,7 +1215,7 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
|
|||
ALU_INSTR_IMPL(DOT4v, 2), // 15
|
||||
ALU_INSTR_IMPL(DOT3v, 2), // 16
|
||||
ALU_INSTR_IMPL(DOT2ADDv, 3), // 17 -- ???
|
||||
ALU_INSTR(CUBEv, 2), // 18
|
||||
ALU_INSTR_IMPL(CUBEv, 2), // 18
|
||||
ALU_INSTR_IMPL(MAX4v, 1), // 19
|
||||
ALU_INSTR_IMPL(PRED_SETE_PUSHv, 2), // 20
|
||||
ALU_INSTR_IMPL(PRED_SETNE_PUSHv, 2), // 21
|
||||
|
@ -1876,15 +1885,29 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
|
|||
// Translate.
|
||||
// TODO(benvanik): if sampler == null, set to invalid color.
|
||||
Append(" if (state.texture_samplers[%d].x != 0) {\n", tex->const_idx & 0xF);
|
||||
Append(" t = texture(");
|
||||
Append("%s(state.texture_samplers[%d])", sampler_type, tex->const_idx & 0xF);
|
||||
Append(", r%u.", tex->src_reg);
|
||||
src_swiz = tex->src_swiz;
|
||||
for (int i = 0; i < src_component_count; i++) {
|
||||
Append("%c", chan_names[src_swiz & 0x3]);
|
||||
src_swiz >>= 2;
|
||||
if (tex->dimension == DIMENSION_CUBE) {
|
||||
Append(" t.xyz = r%u.", tex->src_reg);
|
||||
src_swiz = tex->src_swiz;
|
||||
for (int i = 0; i < src_component_count; i++) {
|
||||
Append("%c", chan_names[src_swiz & 0x3]);
|
||||
src_swiz >>= 2;
|
||||
}
|
||||
Append(";\n");
|
||||
// TODO(benvanik): undo CUBEv logic on t? (s,t,faceid)
|
||||
Append(" t = texture(%s(state.texture_samplers[%d]), t.xyz);\n",
|
||||
sampler_type, tex->const_idx & 0xF);
|
||||
} else {
|
||||
Append(" t = texture(");
|
||||
Append("%s(state.texture_samplers[%d])", sampler_type,
|
||||
tex->const_idx & 0xF);
|
||||
Append(", r%u.", tex->src_reg);
|
||||
src_swiz = tex->src_swiz;
|
||||
for (int i = 0; i < src_component_count; i++) {
|
||||
Append("%c", chan_names[src_swiz & 0x3]);
|
||||
src_swiz >>= 2;
|
||||
}
|
||||
Append(");\n");
|
||||
}
|
||||
Append(");\n");
|
||||
Append(" } else {\n");
|
||||
Append(" t = vec4(r%u.", tex->src_reg);
|
||||
src_swiz = tex->src_swiz;
|
||||
|
|
|
@ -85,7 +85,7 @@ class GL4ShaderTranslator {
|
|||
bool TranslateALU_DOT4v(const ucode::instr_alu_t& alu);
|
||||
bool TranslateALU_DOT3v(const ucode::instr_alu_t& alu);
|
||||
bool TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu);
|
||||
// CUBEv
|
||||
bool TranslateALU_CUBEv(const ucode::instr_alu_t& alu);
|
||||
bool TranslateALU_MAX4v(const ucode::instr_alu_t& alu);
|
||||
bool TranslateALU_PRED_SETXX_PUSHv(const ucode::instr_alu_t& alu,
|
||||
const char* op);
|
||||
|
|
|
@ -128,7 +128,8 @@ static const TextureConfig texture_configs[64] = {
|
|||
GL_INVALID_ENUM, GL_INVALID_ENUM},
|
||||
{TextureFormat::k_32_32_32_FLOAT, GL_RGB32F, GL_RGB, GL_FLOAT},
|
||||
{TextureFormat::k_DXT3A, GL_INVALID_ENUM, GL_INVALID_ENUM, GL_INVALID_ENUM},
|
||||
{TextureFormat::k_DXT5A, GL_INVALID_ENUM, GL_INVALID_ENUM, GL_INVALID_ENUM},
|
||||
{TextureFormat::k_DXT5A, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,
|
||||
GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE},
|
||||
{TextureFormat::k_CTX1, GL_INVALID_ENUM, GL_INVALID_ENUM, GL_INVALID_ENUM},
|
||||
{TextureFormat::k_DXT3A_AS_1_1_1_1, GL_INVALID_ENUM, GL_INVALID_ENUM,
|
||||
GL_INVALID_ENUM},
|
||||
|
@ -470,9 +471,11 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
|
|||
case Dimension::k2D:
|
||||
uploaded = UploadTexture2D(entry->handle, texture_info);
|
||||
break;
|
||||
case Dimension::kCube:
|
||||
uploaded = UploadTextureCube(entry->handle, texture_info);
|
||||
break;
|
||||
case Dimension::k1D:
|
||||
case Dimension::k3D:
|
||||
case Dimension::kCube:
|
||||
assert_unhandled_case(texture_info.dimension);
|
||||
return false;
|
||||
}
|
||||
|
@ -773,6 +776,110 @@ bool TextureCache::UploadTexture2D(GLuint texture,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TextureCache::UploadTextureCube(GLuint texture,
|
||||
const TextureInfo& texture_info) {
|
||||
const auto host_address = memory_->Translate(texture_info.guest_address);
|
||||
|
||||
const auto& config =
|
||||
texture_configs[uint32_t(texture_info.format_info->format)];
|
||||
if (config.format == GL_INVALID_ENUM) {
|
||||
assert_always("Unhandled texture format");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t unpack_length = texture_info.output_length;
|
||||
glTextureStorage2D(texture, 1, config.internal_format,
|
||||
texture_info.size_cube.output_width,
|
||||
texture_info.size_cube.output_height);
|
||||
|
||||
auto allocation = scratch_buffer_->Acquire(unpack_length);
|
||||
if (!texture_info.is_tiled) {
|
||||
if (texture_info.size_cube.input_pitch ==
|
||||
texture_info.size_cube.output_pitch) {
|
||||
// Fast path copy entire image.
|
||||
TextureSwap(texture_info.endianness, allocation.host_ptr, host_address,
|
||||
unpack_length);
|
||||
} else {
|
||||
// Slow path copy row-by-row because strides differ.
|
||||
// UNPACK_ROW_LENGTH only works for uncompressed images, and likely does
|
||||
// this exact thing under the covers, so we just always do it here.
|
||||
const uint8_t* src = host_address;
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
|
||||
for (int face = 0; face < 6; ++face) {
|
||||
uint32_t pitch = std::min(texture_info.size_cube.input_pitch,
|
||||
texture_info.size_cube.output_pitch);
|
||||
for (uint32_t y = 0; y < texture_info.size_cube.block_height; y++) {
|
||||
TextureSwap(texture_info.endianness, dest, src, pitch);
|
||||
src += texture_info.size_cube.input_pitch;
|
||||
dest += texture_info.size_cube.output_pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO(benvanik): optimize this inner loop (or work by tiles).
|
||||
const uint8_t* src = host_address;
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
|
||||
uint32_t bytes_per_block = texture_info.format_info->block_width *
|
||||
texture_info.format_info->block_height *
|
||||
texture_info.format_info->bits_per_pixel / 8;
|
||||
// Tiled textures can be packed; get the offset into the packed texture.
|
||||
uint32_t offset_x;
|
||||
uint32_t offset_y;
|
||||
TextureInfo::GetPackedTileOffset(texture_info, &offset_x, &offset_y);
|
||||
auto bpp = (bytes_per_block >> 2) +
|
||||
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
|
||||
for (int face = 0; face < 6; ++face) {
|
||||
for (uint32_t y = 0, output_base_offset = 0;
|
||||
y < texture_info.size_cube.block_height;
|
||||
y++, output_base_offset += texture_info.size_cube.output_pitch) {
|
||||
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
|
||||
offset_y + y, (texture_info.size_cube.input_width /
|
||||
texture_info.format_info->block_width),
|
||||
bpp);
|
||||
for (uint32_t x = 0, output_offset = output_base_offset;
|
||||
x < texture_info.size_cube.block_width;
|
||||
x++, output_offset += bytes_per_block) {
|
||||
auto input_offset =
|
||||
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp,
|
||||
input_base_offset) >>
|
||||
bpp;
|
||||
TextureSwap(texture_info.endianness, dest + output_offset,
|
||||
src + input_offset * bytes_per_block, bytes_per_block);
|
||||
}
|
||||
}
|
||||
src += texture_info.size_cube.input_face_length;
|
||||
dest += texture_info.size_cube.output_face_length;
|
||||
}
|
||||
}
|
||||
size_t unpack_offset = allocation.offset;
|
||||
scratch_buffer_->Commit(std::move(allocation));
|
||||
// TODO(benvanik): avoid flush on entire buffer by using another texture
|
||||
// buffer.
|
||||
scratch_buffer_->Flush();
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
|
||||
if (texture_info.is_compressed()) {
|
||||
glCompressedTextureSubImage3D(
|
||||
texture, 0, 0, 0, 0, texture_info.size_cube.output_width,
|
||||
texture_info.size_cube.output_height, 6, config.format,
|
||||
static_cast<GLsizei>(unpack_length),
|
||||
reinterpret_cast<void*>(unpack_offset));
|
||||
} else {
|
||||
// Most of these don't seem to have an effect on compressed images.
|
||||
// glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
|
||||
// glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
|
||||
// glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
|
||||
glTextureSubImage3D(texture, 0, 0, 0, 0,
|
||||
texture_info.size_cube.output_width,
|
||||
texture_info.size_cube.output_height, 6, config.format,
|
||||
config.type, reinterpret_cast<void*>(unpack_offset));
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -91,6 +91,7 @@ class TextureCache {
|
|||
void EvictTexture(TextureEntry* entry);
|
||||
|
||||
bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
|
||||
bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info);
|
||||
|
||||
Memory* memory_;
|
||||
CircularBuffer* scratch_buffer_;
|
||||
|
|
|
@ -121,11 +121,15 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
|||
info.height = fetch.size_2d.height;
|
||||
break;
|
||||
case Dimension::k3D:
|
||||
case Dimension::kCube:
|
||||
info.width = fetch.size_3d.width;
|
||||
info.height = fetch.size_3d.height;
|
||||
info.depth = fetch.size_3d.depth;
|
||||
break;
|
||||
case Dimension::kCube:
|
||||
info.width = fetch.size_stack.width;
|
||||
info.height = fetch.size_stack.height;
|
||||
info.depth = fetch.size_stack.depth;
|
||||
break;
|
||||
}
|
||||
info.format_info = FormatInfo::Get(fetch.format);
|
||||
info.endianness = static_cast<Endian>(fetch.endianness);
|
||||
|
@ -152,8 +156,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
|||
// TODO(benvanik): calculate size.
|
||||
return false;
|
||||
case Dimension::kCube:
|
||||
// TODO(benvanik): calculate size.
|
||||
return false;
|
||||
info.CalculateTextureSizesCube(fetch);
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -208,6 +212,49 @@ void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
|
|||
output_length = size_2d.output_pitch * block_height;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizesCube(const xe_gpu_texture_fetch_t& fetch) {
|
||||
assert_true(fetch.size_stack.depth + 1 == 6);
|
||||
size_cube.logical_width = 1 + fetch.size_stack.width;
|
||||
size_cube.logical_height = 1 + fetch.size_stack.height;
|
||||
|
||||
// w/h in blocks must be a multiple of block size.
|
||||
uint32_t block_width =
|
||||
poly::round_up(size_cube.logical_width, format_info->block_width) /
|
||||
format_info->block_width;
|
||||
uint32_t block_height =
|
||||
poly::round_up(size_cube.logical_height, format_info->block_height) /
|
||||
format_info->block_height;
|
||||
|
||||
// Tiles are 32x32 blocks. All textures must be multiples of tile dimensions.
|
||||
uint32_t tile_width = uint32_t(std::ceilf(block_width / 32.0f));
|
||||
uint32_t tile_height = uint32_t(std::ceilf(block_height / 32.0f));
|
||||
size_cube.block_width = tile_width * 32;
|
||||
size_cube.block_height = tile_height * 32;
|
||||
|
||||
uint32_t bytes_per_block = format_info->block_width *
|
||||
format_info->block_height *
|
||||
format_info->bits_per_pixel / 8;
|
||||
uint32_t byte_pitch = tile_width * 32 * bytes_per_block;
|
||||
if (!is_tiled) {
|
||||
// Each row must be a multiple of 256 in linear textures.
|
||||
byte_pitch = poly::round_up(byte_pitch, 256);
|
||||
}
|
||||
|
||||
size_cube.input_width = tile_width * 32 * format_info->block_width;
|
||||
size_cube.input_height = tile_height * 32 * format_info->block_height;
|
||||
|
||||
size_cube.output_width = block_width * format_info->block_width;
|
||||
size_cube.output_height = block_height * format_info->block_height;
|
||||
|
||||
size_cube.input_pitch = byte_pitch;
|
||||
size_cube.output_pitch = block_width * bytes_per_block;
|
||||
|
||||
size_cube.input_face_length = size_cube.input_pitch * size_cube.block_height;
|
||||
input_length = size_cube.input_face_length * 6;
|
||||
size_cube.output_face_length = size_cube.output_pitch * block_height;
|
||||
output_length = size_cube.output_face_length * 6;
|
||||
}
|
||||
|
||||
void TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
|
||||
uint32_t* out_offset_x,
|
||||
uint32_t* out_offset_y) {
|
||||
|
|
|
@ -187,6 +187,18 @@ struct TextureInfo {
|
|||
struct {
|
||||
} size_3d;
|
||||
struct {
|
||||
uint32_t logical_width;
|
||||
uint32_t logical_height;
|
||||
uint32_t block_width;
|
||||
uint32_t block_height;
|
||||
uint32_t input_width;
|
||||
uint32_t input_height;
|
||||
uint32_t input_pitch;
|
||||
uint32_t output_width;
|
||||
uint32_t output_height;
|
||||
uint32_t output_pitch;
|
||||
uint32_t input_face_length;
|
||||
uint32_t output_face_length;
|
||||
} size_cube;
|
||||
};
|
||||
|
||||
|
@ -209,6 +221,7 @@ struct TextureInfo {
|
|||
private:
|
||||
void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
void CalculateTextureSizesCube(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
Loading…
Reference in New Issue