Massively disgusting and incomplete shader translator.

This commit is contained in:
Ben Vanik 2013-10-12 22:14:23 -07:00
parent 0ef278325f
commit 96a857e892
10 changed files with 1145 additions and 60 deletions

View File

@ -83,15 +83,12 @@ void D3D11GraphicsDriver::SetShader(
type, p, length); type, p, length);
// Disassemble. // Disassemble.
char* source = shader->Disassemble(); const char* source = shader->disasm_src();
if (!source) { if (!source) {
source = "<failed to disassemble>"; source = "<failed to disassemble>";
} }
XELOGGPU("D3D11: set shader %d at %0.8X (%db):\n%s", XELOGGPU("D3D11: set shader %d at %0.8X (%db):\n%s",
type, address, length, source); type, address, length, source);
if (source) {
xe_free(source);
}
// Stash for later. // Stash for later.
switch (type) { switch (type) {
@ -289,7 +286,7 @@ int D3D11GraphicsDriver::BindShaders() {
if (ps) { if (ps) {
if (!ps->is_prepared()) { if (!ps->is_prepared()) {
// Prepare for use. // Prepare for use.
if (ps->Prepare(&program_cntl)) { if (ps->Prepare(&program_cntl, vs)) {
XELOGGPU("D3D11: failed to prepare pixel shader"); XELOGGPU("D3D11: failed to prepare pixel shader");
state_.pixel_shader = NULL; state_.pixel_shader = NULL;
return 1; return 1;

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,13 @@ namespace xe {
namespace gpu { namespace gpu {
namespace d3d11 { namespace d3d11 {
struct Output;
typedef struct {
Output* output;
xenos::XE_GPU_SHADER_TYPE type;
} xe_gpu_translate_ctx_t;
class D3D11Shader : public Shader { class D3D11Shader : public Shader {
public: public:
@ -34,8 +41,18 @@ protected:
const uint8_t* src_ptr, size_t length, const uint8_t* src_ptr, size_t length,
uint64_t hash); uint64_t hash);
const char* translated_src() const { return translated_src_; }
void set_translated_src(char* value);
int TranslateExec(
xe_gpu_translate_ctx_t& ctx, const xenos::instr_cf_exec_t& cf);
ID3D10Blob* Compile(const char* shader_source);
protected: protected:
ID3D11Device* device_; ID3D11Device* device_;
char* translated_src_;
}; };
@ -52,6 +69,9 @@ public:
int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl); int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl);
private:
const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl);
private: private:
ID3D11VertexShader* handle_; ID3D11VertexShader* handle_;
ID3D11InputLayout* input_layout_; ID3D11InputLayout* input_layout_;
@ -68,7 +88,12 @@ public:
ID3D11PixelShader* handle() const { return handle_; } ID3D11PixelShader* handle() const { return handle_; }
int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl); int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl,
D3D11VertexShader* input_shader);
private:
const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl,
D3D11VertexShader* input_shader);
private: private:
ID3D11PixelShader* handle_; ID3D11PixelShader* handle_;

View File

@ -55,15 +55,12 @@ void NopGraphicsDriver::SetShader(
type, p, length); type, p, length);
// Disassemble. // Disassemble.
char* source = shader->Disassemble(); const char* source = shader->disasm_src();
if (!source) { if (!source) {
source = "<failed to disassemble>"; source = "<failed to disassemble>";
} }
XELOGGPU("NOP: set shader %d at %0.8X (%db):\n%s", XELOGGPU("NOP: set shader %d at %0.8X (%db):\n%s",
type, address, length, source); type, address, length, source);
if (source) {
xe_free(source);
}
} }
void NopGraphicsDriver::DrawIndexAuto( void NopGraphicsDriver::DrawIndexAuto(

View File

@ -21,7 +21,8 @@ Shader::Shader(
XE_GPU_SHADER_TYPE type, XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length, const uint8_t* src_ptr, size_t length,
uint64_t hash) : uint64_t hash) :
type_(type), hash_(hash), is_prepared_(false) { type_(type), hash_(hash), is_prepared_(false), disasm_src_(NULL) {
xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
xe_zero_struct(fetch_vtx_slots_, sizeof(fetch_vtx_slots_)); xe_zero_struct(fetch_vtx_slots_, sizeof(fetch_vtx_slots_));
// Verify. // Verify.
@ -37,9 +38,15 @@ Shader::Shader(
// Gather input/output registers/etc. // Gather input/output registers/etc.
GatherIO(); GatherIO();
// Disassemble, for debugging.
disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
} }
Shader::~Shader() { Shader::~Shader() {
if (disasm_src_) {
xe_free(disasm_src_);
}
xe_free(dwords_); xe_free(dwords_);
} }
@ -73,9 +80,26 @@ void Shader::GatherIO() {
void Shader::GatherAlloc(const instr_cf_alloc_t* cf) { void Shader::GatherAlloc(const instr_cf_alloc_t* cf) {
allocs_.push_back(*cf); allocs_.push_back(*cf);
switch (cf->buffer_select) {
case SQ_POSITION:
// Position (SV_POSITION).
alloc_counts_.positions += cf->size + 1;
break;
case SQ_PARAMETER_PIXEL:
// Output to PS (if VS), or frag output (if PS).
alloc_counts_.params += cf->size + 1;
break;
case SQ_MEMORY:
// MEMEXPORT?
alloc_counts_.memories += cf->size + 1;
break;
}
} }
void Shader::GatherExec(const instr_cf_exec_t* cf) { void Shader::GatherExec(const instr_cf_exec_t* cf) {
execs_.push_back(*cf);
uint32_t sequence = cf->serialize; uint32_t sequence = cf->serialize;
for (uint32_t i = 0; i < cf->count; i++) { for (uint32_t i = 0; i < cf->count; i++) {
uint32_t alu_off = (cf->address + i); uint32_t alu_off = (cf->address + i);
@ -129,7 +153,3 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
const instr_fetch_vtx_t* Shader::GetFetchVtxBySlot(uint32_t fetch_slot) { const instr_fetch_vtx_t* Shader::GetFetchVtxBySlot(uint32_t fetch_slot) {
return &fetch_vtx_slots_[fetch_slot]; return &fetch_vtx_slots_[fetch_slot];
} }
char* Shader::Disassemble() {
return DisassembleShader(type_, dwords_, dword_count_);
}

View File

@ -32,10 +32,16 @@ public:
uint64_t hash() const { return hash_; } uint64_t hash() const { return hash_; }
bool is_prepared() const { return is_prepared_; } bool is_prepared() const { return is_prepared_; }
const char* disasm_src() const { return disasm_src_; }
const xenos::instr_fetch_vtx_t* GetFetchVtxBySlot(uint32_t fetch_slot); const xenos::instr_fetch_vtx_t* GetFetchVtxBySlot(uint32_t fetch_slot);
// NOTE: xe_free() the returned string! typedef struct {
char* Disassemble(); uint32_t positions;
uint32_t params;
uint32_t memories;
} alloc_counts_t;
const alloc_counts_t& alloc_counts() const { return alloc_counts_; }
private: private:
void GatherIO(); void GatherIO();
@ -50,6 +56,10 @@ protected:
uint64_t hash_; uint64_t hash_;
bool is_prepared_; bool is_prepared_;
char* disasm_src_;
alloc_counts_t alloc_counts_;
std::vector<xenos::instr_cf_exec_t> execs_;
std::vector<xenos::instr_cf_alloc_t> allocs_; std::vector<xenos::instr_cf_alloc_t> allocs_;
std::vector<xenos::instr_fetch_vtx_t> fetch_vtxs_; std::vector<xenos::instr_fetch_vtx_t> fetch_vtxs_;
xenos::instr_fetch_vtx_t fetch_vtx_slots_[96]; xenos::instr_fetch_vtx_t fetch_vtx_slots_[96];

View File

@ -297,6 +297,14 @@ XEPACKEDSTRUCT(instr_cf_exec_t, {
uint32_t address_mode : 1; // instr_addr_mode_t uint32_t address_mode : 1; // instr_addr_mode_t
uint32_t opc : 4; // instr_cf_opc_t uint32_t opc : 4; // instr_cf_opc_t
}); });
bool is_cond_exec() const {
return (this->opc == COND_EXEC) ||
(this->opc == COND_EXEC_END) ||
(this->opc == COND_PRED_EXEC) ||
(this->opc == COND_PRED_EXEC_END) ||
(this->opc == COND_EXEC_PRED_CLEAN) ||
(this->opc == COND_EXEC_PRED_CLEAN_END);
}
}); });
XEPACKEDSTRUCT(instr_cf_loop_t, { XEPACKEDSTRUCT(instr_cf_loop_t, {

View File

@ -255,44 +255,50 @@ int disasm_alu(
output->append(" %sALU:\t", sync ? "(S)" : " "); output->append(" %sALU:\t", sync ? "(S)" : " ");
output->append("%s", vector_instructions[alu->vector_opc].name); if (!alu->scalar_write_mask && !alu->vector_write_mask) {
output->append(" <nop>\n");
if (alu->pred_select & 0x2) {
// seems to work similar to conditional execution in ARM instruction
// set, so let's use a similar syntax for now:
output->append((alu->pred_select & 0x1) ? "EQ" : "NE");
} }
output->append("\t"); if (alu->vector_write_mask) {
output->append("%s", vector_instructions[alu->vector_opc].name);
print_dstreg(output, if (alu->pred_select & 0x2) {
alu->vector_dest, alu->vector_write_mask, alu->export_data); // seems to work similar to conditional execution in ARM instruction
output->append(" = "); // set, so let's use a similar syntax for now:
if (vector_instructions[alu->vector_opc].num_srcs == 3) { output->append((alu->pred_select & 0x1) ? "EQ" : "NE");
}
output->append("\t");
print_dstreg(output,
alu->vector_dest, alu->vector_write_mask, alu->export_data);
output->append(" = ");
if (vector_instructions[alu->vector_opc].num_srcs == 3) {
print_srcreg(output,
alu->src3_reg, alu->src3_sel, alu->src3_swiz,
alu->src3_reg_negate, alu->src3_reg_abs);
output->append(", ");
}
print_srcreg(output, print_srcreg(output,
alu->src3_reg, alu->src3_sel, alu->src3_swiz, alu->src1_reg, alu->src1_sel, alu->src1_swiz,
alu->src3_reg_negate, alu->src3_reg_abs); alu->src1_reg_negate, alu->src1_reg_abs);
output->append(", "); if (vector_instructions[alu->vector_opc].num_srcs > 1) {
} output->append(", ");
print_srcreg(output, print_srcreg(output,
alu->src1_reg, alu->src1_sel, alu->src1_swiz, alu->src2_reg, alu->src2_sel, alu->src2_swiz,
alu->src1_reg_negate, alu->src1_reg_abs); alu->src2_reg_negate, alu->src2_reg_abs);
if (vector_instructions[alu->vector_opc].num_srcs > 1) { }
output->append(", ");
print_srcreg(output,
alu->src2_reg, alu->src2_sel, alu->src2_swiz,
alu->src2_reg_negate, alu->src2_reg_abs);
}
if (alu->vector_clamp) { if (alu->vector_clamp) {
output->append(" CLAMP"); output->append(" CLAMP");
} }
if (alu->export_data) { if (alu->export_data) {
print_export_comment(output, alu->vector_dest, type); print_export_comment(output, alu->vector_dest, type);
} }
output->append("\n"); output->append("\n");
}
if (alu->scalar_write_mask || !alu->vector_write_mask) { if (alu->scalar_write_mask || !alu->vector_write_mask) {
// 2nd optional scalar op: // 2nd optional scalar op:

View File

@ -45,8 +45,10 @@ typedef enum {
// XE_GPU_REG_SQ_PROGRAM_CNTL // XE_GPU_REG_SQ_PROGRAM_CNTL
typedef union { typedef union {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t vs_regs : 8; uint32_t vs_regs : 6;
uint32_t ps_regs : 8; uint32_t : 2;
uint32_t ps_regs : 6;
uint32_t : 2;
uint32_t vs_resource : 1; uint32_t vs_resource : 1;
uint32_t ps_resource : 1; uint32_t ps_resource : 1;
uint32_t param_gen : 1; uint32_t param_gen : 1;