[SPIR-V] Wrap 4-operand ops and 1-3-operand GLSL std calls

This commit is contained in:
Triang3l 2023-04-19 21:44:24 +03:00
parent 19d56001d2
commit 8aaa6f1f7d
7 changed files with 721 additions and 947 deletions

View File

@ -0,0 +1,105 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2023 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv_builder.h"
#include <memory>
#include <utility>
#include <vector>
namespace xe {
namespace gpu {
spv::Id SpirvBuilder::createQuadOp(spv::Op op_code, spv::Id type_id,
spv::Id operand1, spv::Id operand2,
spv::Id operand3, spv::Id operand4) {
if (generatingOpCodeForSpecConst) {
std::vector<spv::Id> operands(4);
operands[0] = operand1;
operands[1] = operand2;
operands[2] = operand3;
operands[3] = operand4;
return createSpecConstantOp(op_code, type_id, operands,
std::vector<spv::Id>());
}
std::unique_ptr<spv::Instruction> op =
std::make_unique<spv::Instruction>(getUniqueId(), type_id, op_code);
op->addIdOperand(operand1);
op->addIdOperand(operand2);
op->addIdOperand(operand3);
op->addIdOperand(operand4);
spv::Id result = op->getResultId();
buildPoint->addInstruction(std::move(op));
return result;
}
spv::Id SpirvBuilder::createNoContractionUnaryOp(spv::Op op_code,
spv::Id type_id,
spv::Id operand) {
spv::Id result = createUnaryOp(op_code, type_id, operand);
addDecoration(result, spv::DecorationNoContraction);
return result;
}
spv::Id SpirvBuilder::createNoContractionBinOp(spv::Op op_code, spv::Id type_id,
spv::Id operand1,
spv::Id operand2) {
spv::Id result = createBinOp(op_code, type_id, operand1, operand2);
addDecoration(result, spv::DecorationNoContraction);
return result;
}
spv::Id SpirvBuilder::createUnaryBuiltinCall(spv::Id result_type,
spv::Id builtins, int entry_point,
spv::Id operand) {
std::unique_ptr<spv::Instruction> instruction =
std::make_unique<spv::Instruction>(getUniqueId(), result_type,
spv::OpExtInst);
instruction->addIdOperand(builtins);
instruction->addImmediateOperand(entry_point);
instruction->addIdOperand(operand);
spv::Id result = instruction->getResultId();
getBuildPoint()->addInstruction(std::move(instruction));
return result;
}
spv::Id SpirvBuilder::createBinBuiltinCall(spv::Id result_type,
spv::Id builtins, int entry_point,
spv::Id operand1, spv::Id operand2) {
std::unique_ptr<spv::Instruction> instruction =
std::make_unique<spv::Instruction>(getUniqueId(), result_type,
spv::OpExtInst);
instruction->addIdOperand(builtins);
instruction->addImmediateOperand(entry_point);
instruction->addIdOperand(operand1);
instruction->addIdOperand(operand2);
spv::Id result = instruction->getResultId();
getBuildPoint()->addInstruction(std::move(instruction));
return result;
}
spv::Id SpirvBuilder::createTriBuiltinCall(spv::Id result_type,
spv::Id builtins, int entry_point,
spv::Id operand1, spv::Id operand2,
spv::Id operand3) {
std::unique_ptr<spv::Instruction> instruction =
std::make_unique<spv::Instruction>(getUniqueId(), result_type,
spv::OpExtInst);
instruction->addIdOperand(builtins);
instruction->addImmediateOperand(entry_point);
instruction->addIdOperand(operand1);
instruction->addIdOperand(operand2);
instruction->addIdOperand(operand3);
spv::Id result = instruction->getResultId();
getBuildPoint()->addInstruction(std::move(instruction));
return result;
}
} // namespace gpu
} // namespace xe

View File

@ -26,19 +26,22 @@ class SpirvBuilder : public spv::Builder {
// Make public rather than protected.
using spv::Builder::createSelectionMerge;
spv::Id createNoContractionUnaryOp(spv::Op op_code, spv::Id type_id,
spv::Id operand) {
spv::Id result = createUnaryOp(op_code, type_id, operand);
addDecoration(result, spv::DecorationNoContraction);
return result;
}
spv::Id createQuadOp(spv::Op op_code, spv::Id type_id, spv::Id operand1,
spv::Id operand2, spv::Id operand3, spv::Id operand4);
spv::Id createNoContractionUnaryOp(spv::Op op_code, spv::Id type_id,
spv::Id operand);
spv::Id createNoContractionBinOp(spv::Op op_code, spv::Id type_id,
spv::Id operand1, spv::Id operand2) {
spv::Id result = createBinOp(op_code, type_id, operand1, operand2);
addDecoration(result, spv::DecorationNoContraction);
return result;
}
spv::Id operand1, spv::Id operand2);
spv::Id createUnaryBuiltinCall(spv::Id result_type, spv::Id builtins,
int entry_point, spv::Id operand);
spv::Id createBinBuiltinCall(spv::Id result_type, spv::Id builtins,
int entry_point, spv::Id operand1,
spv::Id operand2);
spv::Id createTriBuiltinCall(spv::Id result_type, spv::Id builtins,
int entry_point, spv::Id operand1,
spv::Id operand2, spv::Id operand3);
};
} // namespace gpu

View File

@ -2037,19 +2037,15 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
assert_true(input_fragment_coordinates_ != spv::NoResult);
id_vector_temp_.clear();
id_vector_temp_.push_back(const_int_0_);
spv::Id param_gen_x =
builder_->createLoad(builder_->createAccessChain(
spv::StorageClassInput,
input_fragment_coordinates_, id_vector_temp_),
spv::NoPrecision);
id_vector_temp_.clear();
id_vector_temp_.push_back(param_gen_x);
param_gen_x = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_);
id_vector_temp_.clear();
id_vector_temp_.push_back(param_gen_x);
param_gen_x = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_);
spv::Id param_gen_x = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs,
builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput,
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision)));
if (!modification.pixel.param_gen_point) {
assert_true(input_front_facing_ != spv::NoResult);
param_gen_x = builder_->createTriOp(
@ -2076,19 +2072,15 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
// Y - pixel Y .0 in the magnitude, is point in the sign bit.
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(1));
spv::Id param_gen_y =
builder_->createLoad(builder_->createAccessChain(
spv::StorageClassInput,
input_fragment_coordinates_, id_vector_temp_),
spv::NoPrecision);
id_vector_temp_.clear();
id_vector_temp_.push_back(param_gen_y);
param_gen_y = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_);
id_vector_temp_.clear();
id_vector_temp_.push_back(param_gen_y);
param_gen_y = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_);
spv::Id param_gen_y = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs,
builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput,
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision)));
if (modification.pixel.param_gen_point) {
param_gen_y = builder_->createUnaryOp(
spv::OpBitcast, type_float_,
@ -2104,14 +2096,10 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
assert_true(input_point_coordinates_ != spv::NoResult);
// Saturate to avoid negative point coordinates if the center of the pixel
// is not covered, and extrapolation is done.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->createLoad(input_point_coordinates_, spv::NoPrecision));
id_vector_temp_.push_back(const_float2_0_);
id_vector_temp_.push_back(const_float2_1_);
spv::Id param_gen_point_coordinates =
builder_->createBuiltinCall(type_float2_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_);
spv::Id param_gen_point_coordinates = builder_->createTriBuiltinCall(
type_float2_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createLoad(input_point_coordinates_, spv::NoPrecision),
const_float2_0_, const_float2_1_);
param_gen_z = builder_->createCompositeExtract(
param_gen_point_coordinates, type_float_, 0);
param_gen_w = builder_->createCompositeExtract(
@ -2397,10 +2385,8 @@ spv::Id SpirvShaderTranslator::ApplyOperandModifiers(
}
if (original_operand.is_absolute_value || force_absolute) {
EnsureBuildPointAvailable();
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(operand_value);
operand_value = builder_->createBuiltinCall(
type, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_util_);
operand_value = builder_->createUnaryBuiltinCall(
type, ext_inst_glsl_std_450_, GLSLstd450FAbs, operand_value);
}
if (original_operand.is_negated != invert_negate) {
EnsureBuildPointAvailable();
@ -2464,11 +2450,9 @@ spv::Id SpirvShaderTranslator::GetAbsoluteOperand(
return operand_storage;
}
EnsureBuildPointAvailable();
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(operand_storage);
return builder_->createBuiltinCall(builder_->getTypeId(operand_storage),
ext_inst_glsl_std_450_, GLSLstd450FAbs,
id_vector_temp_util_);
return builder_->createUnaryBuiltinCall(builder_->getTypeId(operand_storage),
ext_inst_glsl_std_450_,
GLSLstd450FAbs, operand_storage);
}
void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
@ -2557,15 +2541,11 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
if (result.is_clamped && non_constant_components) {
// Apply the saturation modifier to the result.
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(value);
id_vector_temp_util_.push_back(
const_float_vectors_0_[value_num_components - 1]);
id_vector_temp_util_.push_back(
const_float_vectors_1_[value_num_components - 1]);
value = builder_->createBuiltinCall(
value = builder_->createTriBuiltinCall(
type_float_vectors_[value_num_components - 1], ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_util_);
GLSLstd450NClamp, value,
const_float_vectors_0_[value_num_components - 1],
const_float_vectors_1_[value_num_components - 1]);
}
// The value contains either result.GetUsedResultComponents() in a condensed
@ -2783,12 +2763,9 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
uniform_system_constants_,
id_vector_temp_util_),
spv::NoPrecision));
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(point_vertex_diameter_min);
id_vector_temp_util_.push_back(point_size);
point_size =
builder_->createBuiltinCall(type_int_, ext_inst_glsl_std_450_,
GLSLstd450SMax, id_vector_temp_util_);
point_size = builder_->createBinBuiltinCall(
type_int_, ext_inst_glsl_std_450_, GLSLstd450SMax,
point_vertex_diameter_min, point_size);
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(
builder_->makeIntConstant(kSystemConstantPointVertexDiameterMax));
@ -2799,12 +2776,9 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
uniform_system_constants_,
id_vector_temp_util_),
spv::NoPrecision));
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(point_vertex_diameter_max);
id_vector_temp_util_.push_back(point_size);
point_size =
builder_->createBuiltinCall(type_int_, ext_inst_glsl_std_450_,
GLSLstd450SMin, id_vector_temp_util_);
point_size = builder_->createBinBuiltinCall(
type_int_, ext_inst_glsl_std_450_, GLSLstd450SMin,
point_vertex_diameter_max, point_size);
value_to_store = builder_->createCompositeInsert(
builder_->createUnaryOp(spv::OpBitcast, type_float_, point_size),
value_to_store, type_float3_, 0);
@ -2902,14 +2876,11 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
builder_->createConditionalBranch(is_8in32_or_16in32, &block_16in32,
&block_16in32_merge);
builder_->setBuildPoint(&block_16in32);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->createBinOp(
spv::OpShiftRightLogical, type, value, const_uint_16_typed));
id_vector_temp_.push_back(value);
id_vector_temp_.insert(id_vector_temp_.cend(), 2,
builder_->makeIntConstant(16));
spv::Id swapped_16in32 =
builder_->createOp(spv::OpBitFieldInsert, type, id_vector_temp_);
spv::Id swapped_16in32 = builder_->createQuadOp(
spv::OpBitFieldInsert, type,
builder_->createBinOp(spv::OpShiftRightLogical, type, value,
const_uint_16_typed),
value, builder_->makeIntConstant(16), builder_->makeIntConstant(16));
builder_->createBranch(&block_16in32_merge);
builder_->setBuildPoint(&block_16in32_merge);
{
@ -3021,12 +2992,9 @@ spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma,
if (!gamma_pre_saturated) {
// Saturate, flushing NaN to 0.
id_vector_temp_.clear();
id_vector_temp_.push_back(gamma);
id_vector_temp_.push_back(const_vector_0);
id_vector_temp_.push_back(const_vector_1);
gamma = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_);
gamma = builder_->createTriBuiltinCall(value_type, ext_inst_glsl_std_450_,
GLSLstd450NClamp, gamma,
const_vector_0, const_vector_1);
}
spv::Id is_piece_at_least_3 = builder_->createBinOp(
@ -3086,14 +3054,12 @@ spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma,
scale),
offset);
// linear += trunc(linear * scale)
spv::Id linear_integer_term = builder_->createNoContractionBinOp(
spv::OpFMul, value_type, linear, scale);
id_vector_temp_.clear();
id_vector_temp_.push_back(linear_integer_term);
linear_integer_term = builder_->createBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc, id_vector_temp_);
linear = builder_->createNoContractionBinOp(spv::OpFAdd, value_type, linear,
linear_integer_term);
linear = builder_->createNoContractionBinOp(
spv::OpFAdd, value_type, linear,
builder_->createUnaryBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc,
builder_->createNoContractionBinOp(spv::OpFMul, value_type, linear,
scale)));
// linear *= 1.0f / 1023.0f
linear = builder_->createNoContractionBinOp(
value_times_scalar_opcode, value_type, linear,
@ -3117,12 +3083,9 @@ spv::Id SpirvShaderTranslator::LinearToPWLGamma(spv::Id linear,
if (!linear_pre_saturated) {
// Saturate, flushing NaN to 0.
id_vector_temp_.clear();
id_vector_temp_.push_back(linear);
id_vector_temp_.push_back(const_vector_0);
id_vector_temp_.push_back(const_vector_1);
linear = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_);
linear = builder_->createTriBuiltinCall(value_type, ext_inst_glsl_std_450_,
GLSLstd450NClamp, linear,
const_vector_0, const_vector_1);
}
spv::Id is_piece_at_least_3 = builder_->createBinOp(
@ -3170,19 +3133,16 @@ spv::Id SpirvShaderTranslator::LinearToPWLGamma(spv::Id linear,
offset_3_or_2, offset_1_or_0);
// gamma = trunc(linear * scale) * (1.0f / 255.0f) + offset
spv::Id gamma = builder_->createNoContractionBinOp(spv::OpFMul, value_type,
linear, scale);
id_vector_temp_.clear();
id_vector_temp_.push_back(gamma);
gamma = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_,
GLSLstd450Trunc, id_vector_temp_);
gamma = builder_->createNoContractionBinOp(
return builder_->createNoContractionBinOp(
spv::OpFAdd, value_type,
builder_->createNoContractionBinOp(
is_vector ? spv::OpVectorTimesScalar : spv::OpFMul, value_type, gamma,
is_vector ? spv::OpVectorTimesScalar : spv::OpFMul, value_type,
builder_->createUnaryBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc,
builder_->createNoContractionBinOp(spv::OpFMul, value_type,
linear, scale)),
builder_->makeFloatConstant(1.0f / 255.0f)),
offset);
return gamma;
}
} // namespace gpu

View File

@ -28,16 +28,13 @@ spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value,
int num_components = builder_->getNumComponents(value);
assert_true(builder_->getNumComponents(operand_0_abs) == num_components);
assert_true(builder_->getNumComponents(operand_1_abs) == num_components);
id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(operand_0_abs);
id_vector_temp_util_.push_back(operand_1_abs);
return builder_->createTriOp(
spv::OpSelect, type_float_,
builder_->createBinOp(
spv::OpFOrdEqual, type_bool_vectors_[num_components - 1],
builder_->createBuiltinCall(type_float_vectors_[num_components - 1],
ext_inst_glsl_std_450_, GLSLstd450NMin,
id_vector_temp_util_),
builder_->createBinBuiltinCall(
type_float_vectors_[num_components - 1], ext_inst_glsl_std_450_,
GLSLstd450NMin, operand_0_abs, operand_1_abs),
const_float_vectors_0_[num_components - 1]),
const_float_vectors_0_[num_components - 1], value);
}
@ -252,15 +249,12 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
different_operands[i] = GetAbsoluteOperand(different_operands[i],
instr.vector_operands[i]);
}
id_vector_temp_.clear();
id_vector_temp_.push_back(different_operands[0]);
id_vector_temp_.push_back(different_operands[1]);
spv::Id different_abs_min =
builder_->createBuiltinCall(different_type, ext_inst_glsl_std_450_,
GLSLstd450NMin, id_vector_temp_);
spv::Id different_zero = builder_->createBinOp(
spv::OpFOrdEqual, type_bool_vectors_[different_count - 1],
different_abs_min, const_float_vectors_0_[different_count - 1]);
builder_->createBinBuiltinCall(
different_type, ext_inst_glsl_std_450_, GLSLstd450NMin,
different_operands[0], different_operands[1]),
const_float_vectors_0_[different_count - 1]);
// Replace with +0.
different_result = builder_->createTriOp(
spv::OpSelect, different_type, different_zero,
@ -325,23 +319,18 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
} else {
maxa_operand_0_w = operand_0;
}
spv::Id maxa_address = builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, maxa_operand_0_w,
builder_->makeFloatConstant(0.5f));
id_vector_temp_.clear();
id_vector_temp_.push_back(maxa_address);
maxa_address =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Floor, id_vector_temp_);
id_vector_temp_.clear();
id_vector_temp_.push_back(maxa_address);
id_vector_temp_.push_back(builder_->makeFloatConstant(-256.0f));
id_vector_temp_.push_back(builder_->makeFloatConstant(255.0f));
builder_->createStore(
builder_->createUnaryOp(
spv::OpConvertFToS, type_int_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_)),
builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, maxa_operand_0_w,
builder_->makeFloatConstant(0.5f))),
builder_->makeFloatConstant(-256.0f),
builder_->makeFloatConstant(255.0f))),
var_main_address_register_);
}
if (!used_result_components) {
@ -455,13 +444,11 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
case ucode::AluVectorOpcode::kFrc:
case ucode::AluVectorOpcode::kTrunc:
case ucode::AluVectorOpcode::kFloor:
id_vector_temp_.clear();
id_vector_temp_.push_back(GetOperandComponents(operand_storage[0],
instr.vector_operands[0],
used_result_components));
return builder_->createBuiltinCall(
return builder_->createUnaryBuiltinCall(
result_type, ext_inst_glsl_std_450_,
GLSLstd450(kOps[size_t(instr.vector_opcode)]), id_vector_temp_);
GLSLstd450(kOps[size_t(instr.vector_opcode)]),
GetOperandComponents(operand_storage[0], instr.vector_operands[0],
used_result_components));
case ucode::AluVectorOpcode::kCndEq:
case ucode::AluVectorOpcode::kCndGe:
@ -553,11 +540,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
if (!instr.vector_operands[0].is_absolute_value ||
instr.vector_operands[0].is_negated) {
for (unsigned int i = 0; i < 3; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(operand[i]);
operand_abs[i] =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450FAbs, id_vector_temp_);
operand_abs[i] = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, operand[i]);
}
} else {
for (unsigned int i = 0; i < 3; ++i) {
@ -749,13 +733,10 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
operand, type_float_, static_cast<unsigned int>(component));
while (xe::bit_scan_forward(components_remaining, &component)) {
components_remaining &= ~(uint32_t(1) << component);
id_vector_temp_.clear();
id_vector_temp_.push_back(result);
id_vector_temp_.push_back(builder_->createCompositeExtract(
operand, type_float_, static_cast<unsigned int>(component)));
result =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450NMax, id_vector_temp_);
result = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax, result,
builder_->createCompositeExtract(
operand, type_float_, static_cast<unsigned int>(component)));
}
return result;
}
@ -1014,12 +995,10 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
spv::Id result =
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, a, ps);
// Shader Model 3: +0 or denormal * anything = +-0.
id_vector_temp_.clear();
id_vector_temp_.push_back(ps);
return ZeroIfAnyOperandIsZero(
result, GetAbsoluteOperand(a, instr.scalar_operands[0]),
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450FAbs, id_vector_temp_));
builder_->createUnaryBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450FAbs, ps));
}
case ucode::AluScalarOpcode::kMulsPrev2: {
// Check if need to select the src0.a * ps case.
@ -1033,10 +1012,8 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
spv::OpFUnordNotEqual, type_bool_, ps, const_float_max_neg);
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX
// is already loaded to an SGPR, this is also false if it's NaN.
id_vector_temp_.clear();
id_vector_temp_.push_back(ps);
spv::Id ps_abs = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_);
spv::Id ps_abs = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, ps);
spv::Id ps_abs_neg = builder_->createNoContractionUnaryOp(
spv::OpFNegate, type_float_, ps_abs);
condition = builder_->createBinOp(
@ -1048,11 +1025,8 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
instr.scalar_operands[0], 0b0010);
spv::Id b_abs_neg = b;
if (!instr.scalar_operands[0].is_absolute_value) {
id_vector_temp_.clear();
id_vector_temp_.push_back(b_abs_neg);
b_abs_neg =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450FAbs, id_vector_temp_);
b_abs_neg = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, b_abs_neg);
}
if (!instr.scalar_operands[0].is_absolute_value ||
!instr.scalar_operands[0].is_negated) {
@ -1120,20 +1094,16 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
} else {
maxa_address = a;
}
id_vector_temp_.clear();
id_vector_temp_.push_back(maxa_address);
maxa_address =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Floor, id_vector_temp_);
id_vector_temp_.clear();
id_vector_temp_.push_back(maxa_address);
id_vector_temp_.push_back(builder_->makeFloatConstant(-256.0f));
id_vector_temp_.push_back(builder_->makeFloatConstant(255.0f));
builder_->createStore(
builder_->createUnaryOp(
spv::OpConvertFToS, type_int_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_)),
builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
maxa_address),
builder_->makeFloatConstant(-256.0f),
builder_->makeFloatConstant(255.0f))),
var_main_address_register_);
}
if (a == b) {
@ -1171,18 +1141,16 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
case ucode::AluScalarOpcode::kSqrt:
case ucode::AluScalarOpcode::kSin:
case ucode::AluScalarOpcode::kCos:
id_vector_temp_.clear();
id_vector_temp_.push_back(GetOperandComponents(
operand_storage[0], instr.scalar_operands[0], 0b0001));
return builder_->createBuiltinCall(
return builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_,
GLSLstd450(kOps[size_t(instr.scalar_opcode)]), id_vector_temp_);
GLSLstd450(kOps[size_t(instr.scalar_opcode)]),
GetOperandComponents(operand_storage[0], instr.scalar_operands[0],
0b0001));
case ucode::AluScalarOpcode::kLogc: {
id_vector_temp_.clear();
id_vector_temp_.push_back(GetOperandComponents(
operand_storage[0], instr.scalar_operands[0], 0b0001));
spv::Id result = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Log2, id_vector_temp_);
spv::Id result = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Log2,
GetOperandComponents(operand_storage[0], instr.scalar_operands[0],
0b0001));
return builder_->createTriOp(
spv::OpSelect, type_float_,
builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result,
@ -1232,12 +1200,10 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
0b0001));
}
case ucode::AluScalarOpcode::kRsqc: {
id_vector_temp_.clear();
id_vector_temp_.push_back(GetOperandComponents(
operand_storage[0], instr.scalar_operands[0], 0b0001));
spv::Id result =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450InverseSqrt, id_vector_temp_);
spv::Id result = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450InverseSqrt,
GetOperandComponents(operand_storage[0], instr.scalar_operands[0],
0b0001));
result = builder_->createTriOp(
spv::OpSelect, type_float_,
builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result,
@ -1250,12 +1216,10 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
builder_->makeFloatConstant(FLT_MAX), result);
}
case ucode::AluScalarOpcode::kRsqf: {
id_vector_temp_.clear();
id_vector_temp_.push_back(GetOperandComponents(
operand_storage[0], instr.scalar_operands[0], 0b0001));
spv::Id result =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450InverseSqrt, id_vector_temp_);
spv::Id result = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450InverseSqrt,
GetOperandComponents(operand_storage[0], instr.scalar_operands[0],
0b0001));
result = builder_->createTriOp(
spv::OpSelect, type_float_,
builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result,

View File

@ -83,12 +83,10 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
index = builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, index, builder_->makeFloatConstant(0.5f));
}
id_vector_temp_.clear();
id_vector_temp_.push_back(index);
index = builder_->createUnaryOp(
spv::OpConvertFToS, type_int_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Floor, id_vector_temp_));
builder_->createUnaryBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Floor, index));
if (instr.attributes.stride > 1) {
index = builder_->createBinOp(
spv::OpIMul, type_int_, index,
@ -246,11 +244,9 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
} else {
word = words;
}
id_vector_temp_.clear();
id_vector_temp_.push_back(word);
word = builder_->createBuiltinCall(type_float2_, ext_inst_glsl_std_450_,
GLSLstd450UnpackHalf2x16,
id_vector_temp_);
word = builder_->createUnaryBuiltinCall(type_float2_,
ext_inst_glsl_std_450_,
GLSLstd450UnpackHalf2x16, word);
if (word_needed_components != 0b11) {
// If only one of two components is needed, extract it.
word = builder_->createCompositeExtract(
@ -454,18 +450,14 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
spv::Id const_minus_1 = builder_->makeFloatConstant(-1.0f);
if (used_format_component_count > 1) {
id_vector_temp_.clear();
id_vector_temp_.insert(id_vector_temp_.cend(),
used_format_component_count,
id_vector_temp_.resize(used_format_component_count,
const_minus_1);
const_minus_1 =
builder_->makeCompositeConstant(result_type, id_vector_temp_);
}
id_vector_temp_.clear();
id_vector_temp_.push_back(result);
id_vector_temp_.push_back(const_minus_1);
result =
builder_->createBuiltinCall(result_type, ext_inst_glsl_std_450_,
GLSLstd450FMax, id_vector_temp_);
result = builder_->createBinBuiltinCall(
result_type, ext_inst_glsl_std_450_, GLSLstd450FMax, result,
const_minus_1);
} break;
case xenos::SignedRepeatingFractionMode::kNoZero:
id_vector_temp_.clear();
@ -1104,11 +1096,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
builder_->makeFloatConstant(component_offset));
}
// 0.5 has already been subtracted via offsets previously.
id_vector_temp_.clear();
id_vector_temp_.push_back(result_component);
result_component =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Fract, id_vector_temp_);
result_component = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract,
result_component);
result[coordinate_component_index] = result_component;
}
} else {
@ -1256,14 +1246,11 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
spv::OpFAdd, type_float_, face,
builder_->makeFloatConstant(offset_values[2]));
}
id_vector_temp_.clear();
id_vector_temp_.push_back(face);
id_vector_temp_.push_back(const_float_0_);
id_vector_temp_.push_back(builder_->makeFloatConstant(5.0f));
face = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_));
builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp, face,
const_float_0_, builder_->makeFloatConstant(5.0f)));
// Split the face index into the axis and the sign.
spv::Id const_uint_1 = builder_->makeUintConstant(1);
spv::Id face_axis = builder_->createBinOp(
@ -1580,11 +1567,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
// in getCompTexLOD, so not doing it here too for now. Apply the
// gradient exponent biases from the word 4 of the fetch constant in
// the future when it's handled in getCompTexLOD somehow.
id_vector_temp_.clear();
id_vector_temp_.push_back(lod);
spv::Id lod_gradient_scale =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Exp2, id_vector_temp_);
spv::Id lod_gradient_scale = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Exp2, lod);
switch (instr.dimension) {
case xenos::FetchOpDimension::k1D: {
spv::Id gradient_h_1d, gradient_v_1d;
@ -1841,14 +1825,10 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
(vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const ||
vol_mag_filter_is_linear != vol_min_filter_is_linear)) {
// Check if minifying along layers (derivative > 1 along any axis).
id_vector_temp_.clear();
for (uint32_t i = 0; i < 2; ++i) {
id_vector_temp_.push_back(builder_->createCompositeExtract(
i ? gradients_v : gradients_h, type_float_, 2));
}
spv::Id layer_max_gradient =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450NMax, id_vector_temp_);
spv::Id layer_max_gradient = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax,
builder_->createCompositeExtract(gradients_h, type_float_, 2),
builder_->createCompositeExtract(gradients_v, type_float_, 2));
if (!instr.attributes.unnormalized_coordinates) {
// Denormalize the gradient if provided as normalized.
assert_true(size[2] != spv::NoResult);
@ -1927,11 +1907,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
// floor even for the layer index, but on the Xenos, addressing is
// similar to that of 3D textures). This is needed for both point and
// linear filtering (with linear, 0.5 was subtracted previously).
id_vector_temp_.clear();
id_vector_temp_.push_back(layer_coordinate);
spv::Id layer_0_coordinate =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Floor, id_vector_temp_);
spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
layer_coordinate);
id_vector_temp_.clear();
id_vector_temp_.push_back(coordinates[0]);
id_vector_temp_.push_back(coordinates[1]);
@ -1972,11 +1950,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
id_vector_temp_.push_back(layer_1_coordinate);
texture_parameters.coords = builder_->createCompositeConstruct(
type_float3_, id_vector_temp_);
id_vector_temp_.clear();
id_vector_temp_.push_back(layer_coordinate);
spv::Id layer_lerp_factor =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Fract, id_vector_temp_);
spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract,
layer_coordinate);
spv::Id sample_result_unsigned_stacked_filtered;
spv::Id sample_result_signed_stacked_filtered;
SampleTexture(
@ -2302,14 +2278,13 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
// Apply the exponent bias from the bits 13:18 of the fetch constant
// word 4.
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeFloatConstant(1.0f));
id_vector_temp_.push_back(builder_->createTriOp(
spv::OpBitFieldSExtract, type_int_, fetch_constant_word_4_signed,
builder_->makeUintConstant(13), builder_->makeUintConstant(6)));
spv::Id result_exponent_bias =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Ldexp, id_vector_temp_);
spv::Id result_exponent_bias = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Ldexp,
const_float_1_,
builder_->createTriOp(spv::OpBitFieldSExtract, type_int_,
fetch_constant_word_4_signed,
builder_->makeUintConstant(13),
builder_->makeUintConstant(6)));
{
uint32_t result_remaining_components = used_result_nonzero_components;
uint32_t result_component_index;

View File

@ -1531,15 +1531,12 @@ void SpirvShaderTranslator::FSI_LoadSampleMask(spv::Id msaa_samples) {
builder_->makeUintConstant(32 - 2));
} else {
// 0 and 3 to 0 and 1.
id_vector_temp_.clear();
id_vector_temp_.push_back(input_sample_mask_value);
id_vector_temp_.push_back(builder_->createTriOp(
spv::OpBitFieldUExtract, type_uint_, input_sample_mask_value,
const_uint_2, const_uint_1));
id_vector_temp_.push_back(const_uint_1);
id_vector_temp_.push_back(builder_->makeUintConstant(32 - 1));
sample_mask_2x =
builder_->createOp(spv::OpBitFieldInsert, type_uint_, id_vector_temp_);
sample_mask_2x = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, input_sample_mask_value,
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_,
input_sample_mask_value, const_uint_2,
const_uint_1),
const_uint_1, builder_->makeUintConstant(32 - 1));
}
builder_->createBranch(&block_msaa_merge);
@ -1547,17 +1544,14 @@ void SpirvShaderTranslator::FSI_LoadSampleMask(spv::Id msaa_samples) {
builder_->setBuildPoint(&block_msaa_4x);
// Flip samples in bits 0:1 by reversing the whole coverage mask and inserting
// the reversing bits.
id_vector_temp_.clear();
id_vector_temp_.push_back(input_sample_mask_value);
id_vector_temp_.push_back(builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_,
builder_->createUnaryOp(spv::OpBitReverse, type_uint_,
input_sample_mask_value),
builder_->makeUintConstant(32 - 1 - 2)));
id_vector_temp_.push_back(const_uint_1);
id_vector_temp_.push_back(const_uint_2);
spv::Id sample_mask_4x =
builder_->createOp(spv::OpBitFieldInsert, type_uint_, id_vector_temp_);
spv::Id sample_mask_4x = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, input_sample_mask_value,
builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_,
builder_->createUnaryOp(spv::OpBitReverse, type_uint_,
input_sample_mask_value),
builder_->makeUintConstant(32 - 1 - 2)),
const_uint_1, const_uint_2);
builder_->createBranch(&block_msaa_merge);
// Select the result depending on the MSAA sample count.
@ -1955,16 +1949,12 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
// https://docs.microsoft.com/en-us/windows/desktop/direct3d9/depth-bias
std::array<spv::Id, 2> depth_dxy_abs;
for (uint32_t i = 0; i < 2; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(depth_dxy[i]);
depth_dxy_abs[i] = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_);
depth_dxy_abs[i] = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, depth_dxy[i]);
}
id_vector_temp_.clear();
id_vector_temp_.push_back(depth_dxy_abs[0]);
id_vector_temp_.push_back(depth_dxy_abs[1]);
spv::Id depth_max_slope = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FMax, id_vector_temp_);
spv::Id depth_max_slope = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FMax, depth_dxy_abs[0],
depth_dxy_abs[1]);
// Calculate the polygon offset.
spv::Id slope_scaled_poly_offset = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, poly_offset_scale, depth_max_slope);
@ -2074,17 +2064,14 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
sample_depth_dxy[j] = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, sample_location[j], depth_dxy[j]);
}
spv::Id sample_depth32 = builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, center_depth32_biased,
builder_->createNoContractionBinOp(spv::OpFAdd, type_float_,
sample_depth_dxy[0],
sample_depth_dxy[1]));
id_vector_temp_.clear();
id_vector_temp_.push_back(sample_depth32);
id_vector_temp_.push_back(const_float_0_);
id_vector_temp_.push_back(const_float_1_);
sample_depth32 = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp, id_vector_temp_);
spv::Id sample_depth32 = builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, center_depth32_biased,
builder_->createNoContractionBinOp(spv::OpFAdd, type_float_,
sample_depth_dxy[0],
sample_depth_dxy[1])),
const_float_0_, const_float_1_);
// Convert the new depth to 24-bit.
spv::Block& block_depth_format_float = builder_->makeNewBlock();
@ -2105,14 +2092,13 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
// Round to the nearest even integer. This seems to be the correct
// conversion, adding +0.5 and rounding towards zero results in red instead
// of black in the 4D5307E6 clear shader.
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, sample_depth32,
builder_->makeFloatConstant(float(0xFFFFFF))));
spv::Id sample_depth_unorm24 = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450RoundEven, id_vector_temp_));
builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450RoundEven,
builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, sample_depth32,
builder_->makeFloatConstant(float(0xFFFFFF)))));
builder_->createBranch(&block_depth_format_merge);
spv::Block& block_depth_format_unorm_end = *builder_->getBuildPoint();
// Merge between the two formats.
@ -2253,28 +2239,25 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
builder_->createBranch(&block_stencil_op_merge);
// Increment and clamp.
builder_->setBuildPoint(&block_stencil_op_increment_clamp);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeUintConstant(UINT8_MAX - 1));
id_vector_temp_.push_back(
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, old_depth_stencil,
builder_->makeUintConstant(UINT8_MAX)));
spv::Id new_stencil_in_low_bits_increment_clamp = builder_->createBinOp(
spv::OpIAdd, type_uint_,
builder_->createBuiltinCall(type_uint_, ext_inst_glsl_std_450_,
GLSLstd450UMin, id_vector_temp_),
builder_->createBinBuiltinCall(
type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMin,
builder_->makeUintConstant(UINT8_MAX - 1),
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
old_depth_stencil,
builder_->makeUintConstant(UINT8_MAX))),
const_uint_1);
builder_->createBranch(&block_stencil_op_merge);
// Decrement and clamp.
builder_->setBuildPoint(&block_stencil_op_decrement_clamp);
id_vector_temp_.clear();
id_vector_temp_.push_back(const_uint_1);
id_vector_temp_.push_back(
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, old_depth_stencil,
builder_->makeUintConstant(UINT8_MAX)));
spv::Id new_stencil_in_low_bits_decrement_clamp = builder_->createBinOp(
spv::OpISub, type_uint_,
builder_->createBuiltinCall(type_uint_, ext_inst_glsl_std_450_,
GLSLstd450UMax, id_vector_temp_),
builder_->createBinBuiltinCall(
type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMax, const_uint_1,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
old_depth_stencil,
builder_->makeUintConstant(UINT8_MAX))),
const_uint_1);
builder_->createBranch(&block_stencil_op_merge);
// Invert.
@ -2360,13 +2343,9 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
// Combine the new depth and the new stencil taking into account whether the
// new depth should be written.
id_vector_temp_.clear();
id_vector_temp_.push_back(new_stencil_and_old_depth);
id_vector_temp_.push_back(sample_depth24);
id_vector_temp_.push_back(const_uint_8);
id_vector_temp_.push_back(builder_->makeUintConstant(24));
spv::Id new_stencil_and_unconditional_new_depth =
builder_->createOp(spv::OpBitFieldInsert, type_uint_, id_vector_temp_);
spv::Id new_stencil_and_unconditional_new_depth = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, new_stencil_and_old_depth,
sample_depth24, const_uint_8, builder_->makeUintConstant(24));
spv::Id new_depth_stencil = builder_->createTriOp(
spv::OpSelect, type_uint_,
builder_->createBinOp(spv::OpLogicalAnd, type_bool_,
@ -2568,14 +2547,11 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
spv::Id packed_8_8_8_8;
{
builder_->setBuildPoint(&block_format_8_8_8_8);
id_vector_temp_.clear();
id_vector_temp_.push_back(color_float4);
id_vector_temp_.push_back(const_float4_0_);
id_vector_temp_.push_back(const_float4_1_);
spv::Id color_scaled = builder_->createNoContractionBinOp(
spv::OpVectorTimesScalar, type_float4_,
builder_->createBuiltinCall(type_float4_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_),
builder_->createTriBuiltinCall(type_float4_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, color_float4,
const_float4_0_, const_float4_1_),
builder_->makeFloatConstant(255.0f));
spv::Id color_offset = builder_->createNoContractionBinOp(
spv::OpFAdd, type_float4_, color_scaled, unorm_round_offset_float4);
@ -2585,14 +2561,10 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
builder_->createCompositeExtract(color_uint4, type_uint_, 0);
spv::Id component_width = builder_->makeUintConstant(8);
for (uint32_t i = 1; i < 4; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(packed_8_8_8_8);
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_uint4, type_uint_, i));
id_vector_temp_.push_back(builder_->makeUintConstant(8 * i));
id_vector_temp_.push_back(component_width);
packed_8_8_8_8 = builder_->createOp(spv::OpBitFieldInsert, type_uint_,
id_vector_temp_);
packed_8_8_8_8 = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, packed_8_8_8_8,
builder_->createCompositeExtract(color_uint4, type_uint_, i),
builder_->makeUintConstant(8 * i), component_width);
}
builder_->createBranch(&block_format_merge);
}
@ -2614,13 +2586,10 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
builder_->createRvalueSwizzle(spv::NoPrecision, type_float3_,
color_float4, uint_vector_temp_),
false);
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_float4, type_float_, 3));
id_vector_temp_.push_back(const_float_0_);
id_vector_temp_.push_back(const_float_1_);
spv::Id alpha_clamped = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp, id_vector_temp_);
spv::Id alpha_clamped = builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createCompositeExtract(color_float4, type_float_, 3),
const_float_0_, const_float_1_);
// Bypass the `getNumTypeConstituents(typeId) == (int)constituents.size()`
// assertion in createCompositeConstruct, OpCompositeConstruct can
// construct vectors not only from scalars, but also from other vectors.
@ -2646,14 +2615,10 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
builder_->createCompositeExtract(color_uint4, type_uint_, 0);
spv::Id component_width = builder_->makeUintConstant(8);
for (uint32_t i = 1; i < 4; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(packed_8_8_8_8_gamma);
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_uint4, type_uint_, i));
id_vector_temp_.push_back(builder_->makeUintConstant(8 * i));
id_vector_temp_.push_back(component_width);
packed_8_8_8_8_gamma = builder_->createOp(spv::OpBitFieldInsert,
type_uint_, id_vector_temp_);
packed_8_8_8_8_gamma = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, packed_8_8_8_8_gamma,
builder_->createCompositeExtract(color_uint4, type_uint_, i),
builder_->makeUintConstant(8 * i), component_width);
}
builder_->createBranch(&block_format_merge);
}
@ -2666,13 +2631,9 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
spv::Id packed_2_10_10_10;
{
builder_->setBuildPoint(&block_format_2_10_10_10);
id_vector_temp_.clear();
id_vector_temp_.push_back(color_float4);
id_vector_temp_.push_back(const_float4_0_);
id_vector_temp_.push_back(const_float4_1_);
spv::Id color_clamped =
builder_->createBuiltinCall(type_float4_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_);
spv::Id color_clamped = builder_->createTriBuiltinCall(
type_float4_, ext_inst_glsl_std_450_, GLSLstd450NClamp, color_float4,
const_float4_0_, const_float4_1_);
id_vector_temp_.clear();
id_vector_temp_.resize(3, builder_->makeFloatConstant(1023.0f));
id_vector_temp_.push_back(builder_->makeFloatConstant(3.0f));
@ -2688,14 +2649,10 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
spv::Id rgb_width = builder_->makeUintConstant(10);
spv::Id alpha_width = builder_->makeUintConstant(2);
for (uint32_t i = 1; i < 4; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(packed_2_10_10_10);
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_uint4, type_uint_, i));
id_vector_temp_.push_back(builder_->makeUintConstant(10 * i));
id_vector_temp_.push_back(i == 3 ? alpha_width : rgb_width);
packed_2_10_10_10 = builder_->createOp(spv::OpBitFieldInsert, type_uint_,
id_vector_temp_);
packed_2_10_10_10 = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, packed_2_10_10_10,
builder_->createCompositeExtract(color_uint4, type_uint_, i),
builder_->makeUintConstant(10 * i), i == 3 ? alpha_width : rgb_width);
}
builder_->createBranch(&block_format_merge);
}
@ -2717,15 +2674,12 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
ext_inst_glsl_std_450_);
}
// Alpha.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_float4, type_float_, 3));
id_vector_temp_.push_back(const_float_0_);
id_vector_temp_.push_back(const_float_1_);
spv::Id alpha_scaled = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450NClamp, id_vector_temp_),
builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createCompositeExtract(color_float4, type_float_, 3),
const_float_0_, const_float_1_),
builder_->makeFloatConstant(3.0f));
spv::Id alpha_offset = builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, alpha_scaled, unorm_round_offset_float);
@ -2735,21 +2689,14 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
packed_2_10_10_10_float = color_components[0];
spv::Id rgb_width = builder_->makeUintConstant(10);
for (uint32_t i = 1; i < 3; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(packed_2_10_10_10_float);
id_vector_temp_.push_back(color_components[i]);
id_vector_temp_.push_back(builder_->makeUintConstant(10 * i));
id_vector_temp_.push_back(rgb_width);
packed_2_10_10_10_float = builder_->createOp(spv::OpBitFieldInsert,
type_uint_, id_vector_temp_);
packed_2_10_10_10_float = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, packed_2_10_10_10_float,
color_components[i], builder_->makeUintConstant(10 * i), rgb_width);
}
id_vector_temp_.clear();
id_vector_temp_.push_back(packed_2_10_10_10_float);
id_vector_temp_.push_back(color_components[3]);
id_vector_temp_.push_back(builder_->makeUintConstant(30));
id_vector_temp_.push_back(builder_->makeUintConstant(2));
packed_2_10_10_10_float =
builder_->createOp(spv::OpBitFieldInsert, type_uint_, id_vector_temp_);
packed_2_10_10_10_float = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, packed_2_10_10_10_float,
color_components[3], builder_->makeUintConstant(30),
builder_->makeUintConstant(2));
builder_->createBranch(&block_format_merge);
}
spv::Block& block_format_2_10_10_10_float_end = *builder_->getBuildPoint();
@ -2771,16 +2718,15 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
id_vector_temp_.clear();
// NaN to 0, not to -32.
id_vector_temp_.push_back(builder_->createTriOp(
spv::OpSelect, type_float4_,
builder_->createUnaryOp(spv::OpIsNan, type_bool4_, color_float4),
const_float4_0_, color_float4));
id_vector_temp_.push_back(const_float4_minus_32);
id_vector_temp_.push_back(const_float4_32);
spv::Id color_scaled = builder_->createNoContractionBinOp(
spv::OpVectorTimesScalar, type_float4_,
builder_->createBuiltinCall(type_float4_, ext_inst_glsl_std_450_,
GLSLstd450FClamp, id_vector_temp_),
builder_->createTriBuiltinCall(
type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
builder_->createTriOp(spv::OpSelect, type_float4_,
builder_->createUnaryOp(
spv::OpIsNan, type_bool4_, color_float4),
const_float4_0_, color_float4),
const_float4_minus_32, const_float4_32),
builder_->makeFloatConstant(32767.0f / 32.0f));
id_vector_temp_.clear();
id_vector_temp_.resize(4, builder_->makeFloatConstant(-0.5f));
@ -2798,15 +2744,11 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
builder_->createUnaryOp(spv::OpConvertFToS, type_int4_, color_offset));
spv::Id component_offset_width = builder_->makeUintConstant(16);
for (uint32_t i = 0; i < 2; ++i) {
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_uint4, type_uint_, 2 * i));
id_vector_temp_.push_back(
builder_->createCompositeExtract(color_uint4, type_uint_, 2 * i + 1));
id_vector_temp_.push_back(component_offset_width);
id_vector_temp_.push_back(component_offset_width);
packed_16[i] = builder_->createOp(spv::OpBitFieldInsert, type_uint_,
id_vector_temp_);
packed_16[i] = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_,
builder_->createCompositeExtract(color_uint4, type_uint_, 2 * i),
builder_->createCompositeExtract(color_uint4, type_uint_, 2 * i + 1),
component_offset_width, component_offset_width);
}
builder_->createBranch(&block_format_merge);
}
@ -2828,27 +2770,22 @@ std::array<spv::Id, 2> SpirvShaderTranslator::FSI_ClampAndPackColor(
id_vector_temp_.resize(4, builder_->makeFloatConstant(65504.0f));
spv::Id const_float4_float16_max =
builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
id_vector_temp_.clear();
// NaN to 0, not to -max.
id_vector_temp_.push_back(builder_->createTriOp(
spv::OpSelect, type_float4_,
builder_->createUnaryOp(spv::OpIsNan, type_bool4_, color_float4),
const_float4_0_, color_float4));
id_vector_temp_.push_back(const_float4_minus_float16_max);
id_vector_temp_.push_back(const_float4_float16_max);
spv::Id color_clamped =
builder_->createBuiltinCall(type_float4_, ext_inst_glsl_std_450_,
GLSLstd450FClamp, id_vector_temp_);
spv::Id color_clamped = builder_->createTriBuiltinCall(
type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
builder_->createTriOp(
spv::OpSelect, type_float4_,
builder_->createUnaryOp(spv::OpIsNan, type_bool4_, color_float4),
const_float4_0_, color_float4),
const_float4_minus_float16_max, const_float4_float16_max);
for (uint32_t i = 0; i < 2; ++i) {
uint_vector_temp_.clear();
uint_vector_temp_.push_back(2 * i);
uint_vector_temp_.push_back(2 * i + 1);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->createRvalueSwizzle(
spv::NoPrecision, type_float2_, color_clamped, uint_vector_temp_));
packed_16_float[i] =
builder_->createBuiltinCall(type_uint_, ext_inst_glsl_std_450_,
GLSLstd450PackHalf2x16, id_vector_temp_);
packed_16_float[i] = builder_->createUnaryBuiltinCall(
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
color_clamped, uint_vector_temp_));
}
builder_->createBranch(&block_format_merge);
}
@ -3113,12 +3050,9 @@ std::array<spv::Id, 4> SpirvShaderTranslator::FSI_UnpackColor(
builder_->makeUintConstant(16 * (j & 1)),
component_width)),
component_scale);
id_vector_temp_.clear();
id_vector_temp_.push_back(component_min);
id_vector_temp_.push_back(component);
component =
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450FMax, id_vector_temp_);
component = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450FMax, component_min,
component);
unpacked_16[i][j] = component;
}
builder_->createBranch(&block_format_merge);
@ -3141,11 +3075,9 @@ std::array<spv::Id, 4> SpirvShaderTranslator::FSI_UnpackColor(
: &block_format_16_16_float);
// TODO(Triang3l): Xenos extended-range float16.
for (uint32_t j = 0; j <= i; ++j) {
id_vector_temp_.clear();
id_vector_temp_.push_back(color_packed[j]);
spv::Id components_float2 = builder_->createBuiltinCall(
spv::Id components_float2 = builder_->createUnaryBuiltinCall(
type_float2_, ext_inst_glsl_std_450_, GLSLstd450UnpackHalf2x16,
id_vector_temp_);
color_packed[j]);
for (uint32_t k = 0; k < 2; ++k) {
unpacked_16_float[i][2 * j + k] = builder_->createCompositeExtract(
components_float2, type_float_, k);
@ -3236,19 +3168,16 @@ spv::Id SpirvShaderTranslator::FSI_FlushNaNClampAndInBlending(
builder_->createConditionalBranch(is_fixed_point, &block_is_fixed_point_if,
&block_is_fixed_point_merge);
builder_->setBuildPoint(&block_is_fixed_point_if);
id_vector_temp_.clear();
// Flush NaN to 0 even for signed (NMax would flush it to the minimum value).
id_vector_temp_.push_back(builder_->createTriOp(
spv::OpSelect, color_or_alpha_type,
builder_->createUnaryOp(spv::OpIsNan,
type_bool_vectors_[component_count - 1],
color_or_alpha),
const_float_vectors_0_[component_count - 1], color_or_alpha));
id_vector_temp_.push_back(min_value);
id_vector_temp_.push_back(max_value);
spv::Id color_or_alpha_clamped =
builder_->createBuiltinCall(color_or_alpha_type, ext_inst_glsl_std_450_,
GLSLstd450FClamp, id_vector_temp_);
spv::Id color_or_alpha_clamped = builder_->createTriBuiltinCall(
color_or_alpha_type, ext_inst_glsl_std_450_, GLSLstd450FClamp,
builder_->createTriOp(
spv::OpSelect, color_or_alpha_type,
builder_->createUnaryOp(spv::OpIsNan,
type_bool_vectors_[component_count - 1],
color_or_alpha),
const_float_vectors_0_[component_count - 1], color_or_alpha),
min_value, max_value);
builder_->createBranch(&block_is_fixed_point_merge);
builder_->setBuildPoint(&block_is_fixed_point_merge);
id_vector_temp_.clear();
@ -3426,16 +3355,12 @@ spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor(
spv::Id result_source_alpha_saturate;
{
builder_->setBuildPoint(&block_factor_source_alpha_saturate);
spv::Id one_minus_dest_alpha = builder_->createNoContractionBinOp(
spv::OpFSub, type_float_, const_float_1_, dest_alpha);
id_vector_temp_.clear();
id_vector_temp_.push_back(source_alpha);
id_vector_temp_.push_back(one_minus_dest_alpha);
spv::Id factor_source_alpha_saturate = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMin, id_vector_temp_);
result_source_alpha_saturate = builder_->createNoContractionBinOp(
spv::OpVectorTimesScalar, type_float3_, value,
factor_source_alpha_saturate);
builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMin, source_alpha,
builder_->createNoContractionBinOp(spv::OpFSub, type_float_,
const_float_1_, dest_alpha)));
builder_->createBranch(&block_factor_merge);
}
@ -3605,15 +3530,12 @@ spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor(
spv::Id result_source_alpha_saturate;
{
builder_->setBuildPoint(&block_factor_source_alpha_saturate);
spv::Id one_minus_dest_alpha = builder_->createNoContractionBinOp(
spv::OpFSub, type_float_, const_float_1_, dest_alpha);
id_vector_temp_.clear();
id_vector_temp_.push_back(source_alpha);
id_vector_temp_.push_back(one_minus_dest_alpha);
spv::Id factor_source_alpha_saturate = builder_->createBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMin, id_vector_temp_);
result_source_alpha_saturate = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, value, factor_source_alpha_saturate);
spv::OpFMul, type_float_, value,
builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMin, source_alpha,
builder_->createNoContractionBinOp(spv::OpFSub, type_float_,
const_float_1_, dest_alpha)));
builder_->createBranch(&block_factor_merge);
}
@ -3687,22 +3609,18 @@ spv::Id SpirvShaderTranslator::FSI_BlendColorOrAlphaWithUnclampedResult(
// Min case.
builder_->setBuildPoint(&block_min_max_min);
id_vector_temp_.clear();
id_vector_temp_.push_back(is_alpha ? source_alpha_clamped
: source_color_clamped);
id_vector_temp_.push_back(is_alpha ? dest_alpha : dest_color);
spv::Id result_min = builder_->createBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450FMin, id_vector_temp_);
spv::Id result_min = builder_->createBinBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450FMin,
is_alpha ? source_alpha_clamped : source_color_clamped,
is_alpha ? dest_alpha : dest_color);
builder_->createBranch(&block_min_max_merge);
// Max case.
builder_->setBuildPoint(&block_min_max_max);
id_vector_temp_.clear();
id_vector_temp_.push_back(is_alpha ? source_alpha_clamped
: source_color_clamped);
id_vector_temp_.push_back(is_alpha ? dest_alpha : dest_color);
spv::Id result_max = builder_->createBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450FMax, id_vector_temp_);
spv::Id result_max = builder_->createBinBuiltinCall(
value_type, ext_inst_glsl_std_450_, GLSLstd450FMax,
is_alpha ? source_alpha_clamped : source_color_clamped,
is_alpha ? dest_alpha : dest_color);
builder_->createBranch(&block_min_max_merge);
// Blending with factors.

File diff suppressed because it is too large Load Diff