FPPS4/chip/shader_dump.pas

356 lines
7.9 KiB
Plaintext

unit shader_dump;
{$mode objfpc}{$H+}
interface
uses
Classes,
SysUtils,
md_systm,
kern_authinfo,
kern_proc,
murmurhash,
si_ci_vi_merged_offset,
ps4_shader,
vShader,
vRegs2Vulkan;
type
TDUMP_WORD=packed record
REG,COUNT:WORD;
end;
Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD);
Function get_dev_progname:RawByteString;
function DumpCS(var GPU_REGS:TGPU_REGS;hash,alias:QWORD):RawByteString;
function DumpPS(var GPU_REGS:TGPU_REGS;hash,alias:QWORD):RawByteString;
function DumpVS(var GPU_REGS:TGPU_REGS;hash,alias:QWORD):RawByteString;
implementation
uses
kern_dmem;
Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD);
const
MAX_SIZE=($FFFF+1)*4;
var
W:TDUMP_WORD;
begin
if (F=feInvalidHandle) then Exit;
if (Size=0) or (P=nil) then Exit;
if (Size>MAX_SIZE) then Size:=MAX_SIZE;
W.REG :=REG;
W.COUNT:=((Size+3) div 4)-1;
FileWrite(F,W,SizeOf(W));
FileWrite(F,P^,System.Align(Size,4));
end;
Procedure DUMP_REG(F:THandle;REG:WORD;var GPU_REGS:TGPU_REGS);
var
DATA:DWORD;
begin
DATA:=GPU_REGS.get_reg(REG);
DUMP_BLOCK(F,REG,@DATA,SizeOf(DWORD));
end;
type
TUSER_DATA_USEAGE=array[0..15] of Byte;
function _calc_usage(info:PShaderBinaryInfo;USER_DATA:PDWORD):TUSER_DATA_USEAGE;
var
i:Integer;
Slots:PInputUsageSlot;
r,c,w:Byte;
begin
Result:=Default(TUSER_DATA_USEAGE);
if (info<>nil) then
begin
Slots:=_calc_shader_slot(info);
if (Slots<>nil) then
For i:=0 to info^.numInputUsageSlots-1 do
Case Slots[i].m_usageType of
kShaderInputUsageSubPtrFetchShader:
begin
r:=Slots[i].m_startRegister;
Assert(r<15);
Result[r+0]:=2; //getFetchAddress
Result[r+1]:=1; //skip
end;
kShaderInputUsagePtrResourceTable,
kShaderInputUsagePtrInternalResourceTable,
kShaderInputUsagePtrSamplerTable,
kShaderInputUsagePtrConstBufferTable,
kShaderInputUsagePtrVertexBufferTable,
kShaderInputUsagePtrSoBufferTable,
kShaderInputUsagePtrRwResourceTable,
kShaderInputUsagePtrInternalGlobalTable,
kShaderInputUsagePtrExtendedUserData,
kShaderInputUsagePtrIndirectResourceTable,
kShaderInputUsagePtrIndirectInternalResourceTable,
kShaderInputUsagePtrIndirectRwResourceTable:
begin
r:=Slots[i].m_startRegister;
Assert(r<15);
Result[r+0]:=3; //getBufferAddress
Result[r+1]:=1; //skip
end;
kShaderInputUsageImmShaderResourceTable:
begin
r:=Slots[i].m_startRegister;
Assert(r<15);
c:=Slots[i].m_srtSizeInDWordMinusOne+1;
Assert(c<=8);
c:=c div 2;
For w:=0 to c-1 do
begin
Result[r+w*2+0]:=3; //getBufferAddress
Result[r+w*2+1]:=1; //skip
end;
end;
end;
end;
For i:=0 to 15 do
if (Result[i]=0) and (USER_DATA[i]=0) then
begin
Result[i]:=1;
end;
end;
Procedure DUMP_USER_DATA(F:THandle;base:Pointer;REG:WORD;USER_DATA:PDWORD);
const
InputUsageSize=1024; //size is unknow
var
i:Integer;
buf:Pointer;
InputUsagePtr:Pointer;
size:DWORD;
USEAGE_DATA:TUSER_DATA_USEAGE;
begin
USEAGE_DATA:=_calc_usage(_calc_shader_info(base),USER_DATA);
//
InputUsagePtr:=nil;
//
For i:=0 to 15 do
begin
Case USEAGE_DATA[i] of
0:DUMP_BLOCK(F,REG+i,@USER_DATA[i],SizeOf(DWORD));
1:; //skip
2:
begin
buf:=getFetchAddress(USER_DATA[i],USER_DATA[i+1]);
if (buf<>nil) then
begin
size:=_calc_shader_size(buf,0,True);
DUMP_BLOCK(F,REG+i,buf,size);
end;
end;
3:
begin
buf:=getBufferAddress(USER_DATA[i],USER_DATA[i+1]);
if (buf<>nil) then
begin
if (InputUsagePtr=nil) then
begin
InputUsagePtr:=GetMem(InputUsageSize);
end;
FillChar(InputUsagePtr^,InputUsageSize,0);
md_copyin(buf,InputUsagePtr,InputUsageSize,nil);
DUMP_BLOCK(F,REG+i,InputUsagePtr,InputUsageSize);
end;
end;
end;
end;
//
if (InputUsagePtr<>nil) then
begin
FreeMem(InputUsagePtr);
end;
end;
function Trim(const S: RawByteString): RawByteString;
var
Ofs, Len: sizeint;
begin
len := Length(S);
while (Len>0) and ((S[Len]<=' ') or (S[Len]='?')) do
dec(Len);
Ofs := 1;
while (Ofs<=Len) and ((S[Ofs]<=' ') or (S[Ofs]='?')) do
Inc(Ofs);
result := Copy(S, Ofs, 1 + Len - Ofs);
end;
Function get_dev_progname:RawByteString;
begin
Result:=Trim(g_appinfo.CUSANAME);
if (Result='') then
begin
Result:=Trim(p_proc.p_comm);
end;
end;
function DumpCS(var GPU_REGS:TGPU_REGS;hash,alias:QWORD):RawByteString;
var
size:DWORD;
base:Pointer;
F:THandle;
fname:RawByteString;
begin
Result:='';
base:=GPU_REGS.get_code_addr(vShaderStageCs);
if (base<>nil) then
begin
base:=get_dmem_ptr(base);
size:=_calc_shader_size(base);
if (hash=0) then
begin
hash:=MurmurHash64A(base,size,0);
end;
fname:='shader_dump\'+get_dev_progname+'_cs_'+HexStr(hash,16)+'_'+IntToStr(alias)+'.dump';
Result:=fname;
if FileExists(fname) then Exit;
CreateDir('shader_dump');
F:=FileCreate(fname);
DUMP_BLOCK(F,mmCOMPUTE_PGM_LO,base,size);
DUMP_REG(F,mmCOMPUTE_PGM_RSRC1 ,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_PGM_RSRC2 ,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_NUM_THREAD_X,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_NUM_THREAD_Y,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_NUM_THREAD_Z,GPU_REGS);
DUMP_USER_DATA(F,base,mmCOMPUTE_USER_DATA_0,@GPU_REGS.SC_REG^.COMPUTE_USER_DATA);
DUMP_REG(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE0,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE1,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_RESOURCE_LIMITS ,GPU_REGS);
FileClose(F);
end;
end;
function DumpPS(var GPU_REGS:TGPU_REGS;hash,alias:QWORD):RawByteString;
var
i:Integer;
size:DWORD;
base:Pointer;
F:THandle;
fname:RawByteString;
begin
Result:='';
base:=GPU_REGS.get_code_addr(vShaderStagePs);
if (base<>nil) then
begin
base:=get_dmem_ptr(base);
size:=_calc_shader_size(base);
if (hash=0) then
begin
hash:=MurmurHash64A(base,size,0);
end;
fname:='shader_dump\'+get_dev_progname+'_ps_'+HexStr(hash,16)+'_'+IntToStr(alias)+'.dump';
Result:=fname;
if FileExists(fname) then Exit;
CreateDir('shader_dump');
F:=FileCreate(fname);
DUMP_BLOCK(F,mmSPI_SHADER_PGM_LO_PS,base,size);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC1_PS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC2_PS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC3_PS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_Z_FORMAT ,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_COL_FORMAT ,GPU_REGS);
DUMP_REG(F,mmSPI_PS_INPUT_ENA ,GPU_REGS);
DUMP_REG(F,mmSPI_PS_INPUT_ADDR ,GPU_REGS);
DUMP_REG(F,mmSPI_PS_IN_CONTROL ,GPU_REGS);
DUMP_REG(F,mmSPI_BARYC_CNTL ,GPU_REGS);
DUMP_REG(F,mmDB_SHADER_CONTROL ,GPU_REGS);
DUMP_REG(F,mmCB_SHADER_MASK ,GPU_REGS);
DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_PS_0,@GPU_REGS.SG_REG^.SPI_SHADER_USER_DATA_PS);
For i:=0 to 31 do
begin
DUMP_REG(F,mmSPI_PS_INPUT_CNTL_0+i,GPU_REGS);
end;
FileClose(F);
end;
end;
function DumpVS(var GPU_REGS:TGPU_REGS;hash,alias:QWORD):RawByteString;
var
size:DWORD;
base:Pointer;
F:THandle;
fname:RawByteString;
begin
Result:='';
base:=GPU_REGS.get_code_addr(vShaderStageVs);
if (base<>nil) then
begin
base:=get_dmem_ptr(base);
size:=_calc_shader_size(base);
if (hash=0) then
begin
hash:=MurmurHash64A(base,size,0);
end;
fname:='shader_dump\'+get_dev_progname+'_vs_'+HexStr(hash,16)+'_'+IntToStr(alias)+'.dump';
Result:=fname;
if FileExists(fname) then Exit;
CreateDir('shader_dump');
F:=FileCreate(fname);
DUMP_BLOCK(F,mmSPI_SHADER_PGM_LO_VS,base,size);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC1_VS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC2_VS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC3_VS,GPU_REGS);
DUMP_REG(F,mmSPI_VS_OUT_CONFIG ,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_POS_FORMAT,GPU_REGS);
DUMP_REG(F,mmPA_CL_VS_OUT_CNTL ,GPU_REGS);
DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_VS_0,@GPU_REGS.SG_REG^.SPI_SHADER_USER_DATA_VS);
DUMP_REG(F,mmVGT_DMA_NUM_INSTANCES,GPU_REGS);
DUMP_REG(F,mmVGT_NUM_INSTANCES,GPU_REGS);
FileClose(F);
end;
end;
end.