From 90cf5fe6fff10befd6d1fad9476d32793c236ffa Mon Sep 17 00:00:00 2001 From: Pavel <68122101+red-prig@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:19:49 +0300 Subject: [PATCH] + --- chip/pm4_pfp.pas | 215 +++++++++++++++++++++++++++++++++++----- chip/pm4_stream.pas | 136 ++++++++++++++++++++++--- chip/pm4defs.pas | 89 +++++++++-------- rtl/bittype.pas | 48 +++++++-- sys/dev/dev_gc.pas | 4 +- vulkan/vImage.pas | 6 ++ vulkan/vRegs2Vulkan.pas | 65 +++++++++--- 7 files changed, 458 insertions(+), 105 deletions(-) diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index c68ae18b..e8dbe4bb 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -110,12 +110,6 @@ begin ib_base:=QWORD(buf^.ibBase); ib_size:=QWORD(buf^.ibSize)*sizeof(DWORD); - case op of - $c0023300:Writeln('INDIRECT_BUFFER (ccb) 0x',HexStr(ib_base,10)); - $c0023f00:Writeln('INDIRECT_BUFFER (dcb) 0x',HexStr(ib_base,10)); - else; - end; - addr:=nil; size:=0; @@ -150,6 +144,8 @@ var begin Result:=0; + pctx^.LastSetReg:=0; + i:=ibuf^.bpos; buff:=ibuf^.buff+i; i:=ibuf^.size-i; @@ -841,6 +837,9 @@ begin end; end; +const + ShdrType:array[0..1] of Pchar=('(GX)','(CS)'); + function pm4_parse_ccb(pctx:p_pfp_ctx;token:DWORD;buff:Pointer):Integer; begin Result:=0; @@ -858,7 +857,9 @@ begin if (PM4_TYPE_3_HEADER(token).opcode<>IT_NOP) or (not pctx^.print_hint) then begin - Writeln('IT_',get_op_name(PM4_TYPE_3_HEADER(token).opcode),' len:',PM4_LENGTH(token)); + Writeln('IT_',get_op_name(PM4_TYPE_3_HEADER(token).opcode), + ' ',ShdrType[PM4_TYPE_3_HEADER(token).shaderType], + ' len:',PM4_LENGTH(token)); end; case PM4_TYPE_3_HEADER(token).opcode of @@ -902,11 +903,21 @@ end; procedure onEventWriteEop(pctx:p_pfp_ctx;Body:PPM4CMDEVENTWRITEEOP); var addr:Pointer; - size:QWORD; begin - DWORD(pctx^.CX_REG.VGT_EVENT_INITIATOR):=Body^.eventType; + Case Body^.eventType of + kEopFlushCbDbCaches:; + kEopFlushAndInvalidateCbDbCaches:; + kEopCbDbReadsDone:; + else + Assert(False,'EventWriteEop: eventType=0x'+HexStr(Body^.eventType,1)); + end; - Assert(Body^.EVENT_INDEX=EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP); + if (Body^.eventIndex<>EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP) then + begin + Assert(False,'EventWriteEop: eventIndex=0x'+HexStr(Body^.eventIndex,1)); + end; + + DWORD(pctx^.CX_REG.VGT_EVENT_INITIATOR):=Body^.eventType; Writeln(' eventType =0x',HexStr(Body^.eventType,2)); Writeln(' interrupt =0x',HexStr(Body^.intSel shr 1,2)); @@ -929,11 +940,10 @@ begin } addr:=nil; - size:=0; if (Body^.dataSel in [1..4]) then begin - if get_dmem_ptr(Pointer(Body^.address),@addr,@size) then + if get_dmem_ptr(Pointer(Body^.address),@addr,nil) then begin // end else @@ -942,7 +952,162 @@ begin end; end; - pctx^.stream_dcb.EventWriteEop(addr,Body^.DATA,Body^.dataSel,(Body^.intSel shr 1)); + pctx^.stream_dcb.EventWriteEop(addr,Body^.DATA,Body^.eventType,Body^.dataSel,(Body^.intSel shr 1)); + +end; + +procedure onEventWriteEos(pctx:p_pfp_ctx;Body:PPM4CMDEVENTWRITEEOS); +var + addr:Pointer; +begin + Case Body^.eventType of + CS_DONE:; + PS_DONE:; + else + Assert(False,'EventWriteEos: eventType=0x'+HexStr(Body^.eventType,1)); + end; + + if (Body^.eventIndex<>EVENT_WRITE_INDEX_ANY_EOS_TIMESTAMP) then + begin + Assert(False,'EventWriteEos: eventIndex=0x'+HexStr(Body^.eventIndex,1)); + end; + + DWORD(pctx^.CX_REG.VGT_EVENT_INITIATOR):=Body^.eventType; + + if get_dmem_ptr(Pointer(Body^.address),@addr,nil) then + begin + // + end else + begin + Assert(false,'addr:0x'+HexStr(Body^.address,16)+' not in dmem!'); + end; + + pctx^.stream_dcb.EventWriteEos(addr,Body^.data,Body^.eventType,Body^.command); + +end; + +procedure onDmaData(pctx:p_pfp_ctx;Body:PPM4DMADATA); +var + adrSrc,adrDst:QWORD; + byteCount:DWORD; + srcSel,dstSel:Byte; +begin + srcSel:=((PDWORD(Body)[1] shr $1d) and 3) or ((PDWORD(Body)[6] shr $19) and 8) or ((PDWORD(Body)[6] shr $18) and 4); + dstSel:=((PDWORD(Body)[1] shr $14) and 1) or ((PDWORD(Body)[6] shr $1a) and 8) or ((PDWORD(Body)[6] shr $19) and 4); + + adrSrc:=Body^.srcAddr; + adrDst:=Body^.dstAddr; + byteCount:=Body^.Flags2.byteCount; + + case dstSel of + kDmaDataDstRegister, + kDmaDataDstRegisterNoIncrement: + if (DWORD(adrDst)=$3022C) then + begin + //prefetchIntoL2 + Exit; + end; + else; + end; + + Case Body^.Flags1.engine of + CP_DMA_ENGINE_ME: + begin + pctx^.stream_dcb.DmaData(dstSel,adrDst,srcSel,adrSrc,byteCount,Body^.Flags1.cpSync); + end; + CP_DMA_ENGINE_PFP: + begin + //Execute on the parser side + + case (srcSel or (dstSel shl 4)) of + (kDmaDataSrcMemory or (kDmaDataDstMemory shl 4)), + (kDmaDataSrcMemoryUsingL2 or (kDmaDataDstMemory shl 4)), + (kDmaDataSrcMemory or (kDmaDataDstMemoryUsingL2 shl 4)), + (kDmaDataSrcMemoryUsingL2 or (kDmaDataDstMemoryUsingL2 shl 4)): + begin + Move(Pointer(adrSrc)^,Pointer(adrDst)^,byteCount); + end; + (kDmaDataSrcData or (kDmaDataDstMemory shl 4)), + (kDmaDataSrcData or (kDmaDataDstMemoryUsingL2 shl 4)): + begin + FillDWORD(Pointer(adrDst)^,(byteCount div 4),DWORD(adrSrc)); + end; + else + Assert(false,'DmaData: srcSel=0x'+HexStr(srcSel,1)+' dstSel=0x'+HexStr(dstSel,1)); + end; + + end; + else + Assert(false,'DmaData: engine=0x'+HexStr(Body^.Flags1.engine,1)); + end; + +end; + +procedure onWriteData(pctx:p_pfp_ctx;Body:PPM4CMDWRITEDATA); +var + addr:PDWORD; + count:Word; + engineSel:Byte; + dstSel:Byte; +begin + + Assert(Body^.CONTROL.wrOneAddr=0,'WriteData: wrOneAddr<>0'); + + count:=Body^.header.count; + if (count<3) then Exit; + + engineSel:=Body^.CONTROL.engineSel; + dstSel:=Body^.CONTROL.dstSel; + + Case engineSel of + WRITE_DATA_ENGINE_ME: + begin + pctx^.stream_dcb.WriteData(dstSel,QWORD(addr),QWORD(@Body^.DATA),count); + end; + WRITE_DATA_ENGINE_PFP: + begin + + case dstSel of + WRITE_DATA_DST_SEL_MEMORY_SYNC, //writeDataInline + WRITE_DATA_DST_SEL_TCL2, //writeDataInlineThroughL2 + WRITE_DATA_DST_SEL_MEMORY_ASYNC: + begin + count:=count-2; + addr:=Pointer(Body^.dstAddr); + Move(Body^.DATA,addr^,count*SizeOf(DWORD)); + end; + else + Assert(false,'WriteData: dstSel=0x'+HexStr(dstSel,1)); + end; + + end; + else + Assert(false,'WriteData: engineSel=0x'+HexStr(engineSel,1)); + end; + +end; + +procedure onWaitRegMem(pctx:p_pfp_ctx;Body:PPM4CMDWAITREGMEM); +begin + + Case Body^.memSpace of + WAIT_REG_MEM_SPACE_MEMORY:; + else + Assert(False,'WaitRegMem: memSpace=0x'+HexStr(Body^.memSpace,1)); + end; + + Case Body^.engine of + WAIT_REG_MEM_ENGINE_ME: + begin + pctx^.stream_dcb.WaitRegMem(Body^.pollAddress,Body^.reference,Body^.mask,Body^.compareFunc); + end; + WAIT_REG_MEM_ENGINE_PFP: + begin + Assert(false,'WaitRegMem: engine=0x'+HexStr(Body^.engine,1)); + end; + else + Assert(false,'WaitRegMem: engine=0x'+HexStr(Body^.engine,1)); + end; end; @@ -1260,13 +1425,13 @@ begin OP_HINT_PUSH_MARKER: if pctx^.print_hint then begin - onPushMarker(@Body[1]); + onPushMarker(@Body[2]); end; OP_HINT_SET_MARKER: if pctx^.print_hint then begin - onSetMarker(@Body[1]); + onSetMarker(@Body[2]); end; OP_HINT_PREPARE_FLIP_LABEL: @@ -1313,15 +1478,19 @@ begin if (PM4_TYPE_3_HEADER(token).opcode<>IT_NOP) or (not pctx^.print_hint) then begin - Writeln('IT_',get_op_name(PM4_TYPE_3_HEADER(token).opcode),' len:',PM4_LENGTH(token)); + Writeln('IT_',get_op_name(PM4_TYPE_3_HEADER(token).opcode), + ' ',ShdrType[PM4_TYPE_3_HEADER(token).shaderType], + ' len:',PM4_LENGTH(token)); end; case PM4_TYPE_3_HEADER(token).opcode of IT_NOP :onNop(pctx,buff); + IT_WRITE_DATA :onWriteData (pctx,buff); IT_EVENT_WRITE :onEventWrite (pctx,buff); IT_EVENT_WRITE_EOP :onEventWriteEop (pctx,buff); - IT_EVENT_WRITE_EOS :Assert(false); - IT_DMA_DATA :Assert(false); + IT_EVENT_WRITE_EOS :onEventWriteEos (pctx,buff); + IT_DMA_DATA :onDmaData (pctx,buff); + IT_WAIT_REG_MEM :onWaitRegMem (pctx,buff); IT_ACQUIRE_MEM :onAcquireMem (pctx,buff); IT_CONTEXT_CONTROL :onContextControl (buff); IT_DRAW_PREAMBLE :onDrawPreamble (pctx,buff); @@ -1335,12 +1504,10 @@ begin IT_INDEX_BASE :onIndexBase (pctx,buff); IT_NUM_INSTANCES :onNumInstances (pctx,buff); IT_DRAW_INDEX_2 :onDrawIndex2 (pctx,buff); - IT_DRAW_INDEX_AUTO :Assert(false); - IT_DRAW_INDEX_OFFSET_2:Assert(false); - IT_DISPATCH_DIRECT :Assert(false); - IT_WAIT_REG_MEM :Assert(false); - IT_WRITE_DATA :Assert(false); - IT_PFP_SYNC_ME :Assert(false); + IT_DRAW_INDEX_AUTO :Assert(false,'IT_DRAW_INDEX_AUTO'); + IT_DRAW_INDEX_OFFSET_2:Assert(false,'IT_DRAW_INDEX_OFFSET_2'); + IT_DISPATCH_DIRECT :Assert(false,'IT_DISPATCH_DIRECT'); + IT_PFP_SYNC_ME :Assert(false,'IT_PFP_SYNC_ME'); IT_SET_BASE :onSetBase(buff); IT_SET_PREDICATION :onSetPredication(buff); diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index af3bd0eb..956a6760 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -54,6 +54,10 @@ type ntLoadConstRam, ntEventWrite, ntEventWriteEop, + ntEventWriteEos, + ntDmaData, + ntWriteData, + ntWaitRegMem, ntFastClear, ntResolve, ntDrawIndex2 @@ -66,23 +70,58 @@ type end; p_pm4_node_LoadConstRam=^t_pm4_node_LoadConstRam; - t_pm4_node_LoadConstRam=object(t_pm4_node) + t_pm4_node_LoadConstRam=packed object(t_pm4_node) addr :Pointer; num_dw:Word; offset:Word; end; p_pm4_node_EventWrite=^t_pm4_node_EventWrite; - t_pm4_node_EventWrite=object(t_pm4_node) + t_pm4_node_EventWrite=packed object(t_pm4_node) eventType:Byte; end; p_pm4_node_EventWriteEop=^t_pm4_node_EventWriteEop; - t_pm4_node_EventWriteEop=object(t_pm4_node) - addr :Pointer; - data :QWORD; - dataSel:Byte; - intSel :Byte; + t_pm4_node_EventWriteEop=packed object(t_pm4_node) + addr :Pointer; + data :QWORD; + eventType:Byte; + dataSel :Byte; + intSel :Byte; + end; + + p_pm4_node_EventWriteEos=^t_pm4_node_EventWriteEos; + t_pm4_node_EventWriteEos=packed object(t_pm4_node) + addr :Pointer; + data :DWORD; + eventType:Byte; + command :Byte; + end; + + p_pm4_node_DmaData=^t_pm4_node_DmaData; + t_pm4_node_DmaData=packed object(t_pm4_node) + dst :QWORD; + src :QWORD; + numBytes:DWORD; + srcSel :Byte; + dstSel :Byte; + cpSync :Byte; + end; + + p_pm4_node_WriteData=^t_pm4_node_WriteData; + t_pm4_node_WriteData=packed object(t_pm4_node) + dst :QWORD; + src :QWORD; + num_dw:Word; + dstSel:Byte; + end; + + p_pm4_node_WaitRegMem=^t_pm4_node_WaitRegMem; + t_pm4_node_WaitRegMem=packed object(t_pm4_node) + pollAddr :QWORD; + refValue :DWORD; + mask :DWORD; + compareFunc :Byte; end; p_pm4_node_FastClear=^t_pm4_node_FastClear; @@ -119,7 +158,11 @@ type // procedure LoadConstRam (addr:Pointer;num_dw,offset:Word); procedure EventWrite (eventType:Byte); - procedure EventWriteEop(addr:Pointer;data:QWORD;dataSel,intSel:Byte); + procedure EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); + procedure EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); + procedure DmaData (dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); + procedure WriteData (dstSel:Byte;dst,src:QWORD;num_dw:Word); + procedure WaitRegMem (pollAddr:QWORD;refValue,mask:DWORD;compareFunc:Byte); procedure FastClear (var CX_REG:TCONTEXT_REG_GROUP); procedure Resolve (var CX_REG:TCONTEXT_REG_GROUP); function ColorControl (var CX_REG:TCONTEXT_REG_GROUP):Boolean; @@ -181,23 +224,86 @@ var begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWrite)); - node^.ntype :=ntEventWrite; + node^.ntype :=ntEventWrite; node^.eventType:=eventType; add_node(node); end; -procedure t_pm4_stream.EventWriteEop(addr:Pointer;data:QWORD;dataSel,intSel:Byte); +procedure t_pm4_stream.EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); var node:p_pm4_node_EventWriteEop; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEop)); - node^.ntype :=ntEventWriteEop; - node^.addr :=addr; - node^.data :=data; - node^.dataSel:=dataSel; - node^.intSel :=intSel; + node^.ntype :=ntEventWriteEop; + node^.addr :=addr; + node^.data :=data; + node^.eventType:=eventType; + node^.dataSel :=dataSel; + node^.intSel :=intSel; + + add_node(node); +end; + +procedure t_pm4_stream.EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); +var + node:p_pm4_node_EventWriteEos; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEos)); + + node^.ntype :=ntEventWriteEos; + node^.addr :=addr; + node^.data :=data; + node^.eventType:=eventType; + node^.command :=command; + + add_node(node); +end; + +procedure t_pm4_stream.DmaData(dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); +var + node:p_pm4_node_DmaData; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_DmaData)); + + node^.ntype :=ntDmaData; + node^.dst :=dst; + node^.src :=srcOrData; + node^.numBytes:=numBytes; + node^.srcSel :=srcSel; + node^.dstSel :=dstSel; + node^.cpSync :=isBlocking; + + add_node(node); +end; + +procedure t_pm4_stream.WriteData(dstSel:Byte;dst,src:QWORD;num_dw:Word); +var + node:p_pm4_node_WriteData; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_WriteData)); + + node^.ntype :=ntWriteData; + node^.dst :=dst; + node^.src :=src; + node^.num_dw:=num_dw; + node^.dstSel:=dstSel; + + add_node(node); +end; + +procedure t_pm4_stream.WaitRegMem(pollAddr:QWORD;refValue,mask:DWORD;compareFunc:Byte); +var + node:p_pm4_node_WaitRegMem; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_WaitRegMem)); + + node^.ntype :=ntWaitRegMem; + node^.pollAddr :=pollAddr; + node^.refValue :=refValue; + node^.mask :=mask; + node^.compareFunc:=compareFunc; add_node(node); end; diff --git a/chip/pm4defs.pas b/chip/pm4defs.pas index 2112b703..e0c028f3 100644 --- a/chip/pm4defs.pas +++ b/chip/pm4defs.pas @@ -265,7 +265,7 @@ type PPM4_TYPE_3_HEADER=^PM4_TYPE_3_HEADER; PM4_TYPE_3_HEADER=bitpacked record predicate :bit1; //1 - shaderType:bit1; //1 + shaderType:bit1; //1 < 0: Graphics, 1: Compute Shader reserved :bit6; //6 opcode :Byte; //8 count :bit14; //14 @@ -339,10 +339,10 @@ type PPM4CMDDRAWPREAMBLE=^TPM4CMDDRAWPREAMBLE; TPM4CMDDRAWPREAMBLE=bitpacked record - header :PM4_TYPE_3_HEADER; - control1 :TVGT_PRIMITIVE_TYPE; //< writes to VGT_PRIMITIVE_TYPE reg - control2 :TIA_MULTI_VGT_PARAM; //< writes to IA_MULTI_VGT_PARAM reg - control3 :TVGT_LS_HS_CONFIG; //< writes to VGT_LS_HS_CONFIG reg + header :PM4_TYPE_3_HEADER; + control1:TVGT_PRIMITIVE_TYPE; //< writes to VGT_PRIMITIVE_TYPE reg + control2:TIA_MULTI_VGT_PARAM; //< writes to IA_MULTI_VGT_PARAM reg + control3:TVGT_LS_HS_CONFIG; //< writes to VGT_LS_HS_CONFIG reg end; // WRITE_DATA DST_SEL and ENGINE definitions @@ -362,8 +362,8 @@ const WRITE_DATA_ENGINE_CE =2; type - PTPM4CMDWRITEDATA=^TPM4CMDWRITEDATA; - TPM4CMDWRITEDATA=packed record + PPM4CMDWRITEDATA=^PM4CMDWRITEDATA; + PM4CMDWRITEDATA=packed record header :PM4_TYPE_3_HEADER; CONTROL:bitpacked record reserved1 :bit8; @@ -379,8 +379,7 @@ type reserved5 :bit2; engineSel :bit2; ///< engine select end; - dstAddrLo:DWORD; - dstAddrHi:DWORD; + dstAddr:QWORD; data:packed record end; end; @@ -410,7 +409,7 @@ type eventType :bit6; //00 // < event type written to VGT_EVENT_INITIATOR Reserved1 :bit2; //06 - EVENT_INDEX :bit4; //08 // < event index [0x5] + eventIndex :bit4; //08 // < event index [0x5] tcL1VolActionEna:bit1; //12 //(cacheAction & 0x3f) [0x00,0x10,0x33,0x38,0x3B] tcVolActionEna :bit1; //13 //(cacheAction & 0x3f) @@ -442,23 +441,20 @@ type end; const - EVENT_WRITE_EOS_INDEX_CSDONE_PSDONE=6; - EVENT_WRITE_EOS_CMD_STORE_APPEND_COUNT_TO_MEMORY=0; EVENT_WRITE_EOS_CMD_STORE_GDS_DATA_TO_MEMORY =1; EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY =2; type - PTPM4CMDEVENTWRITEEOS=^TPM4CMDEVENTWRITEEOS; - TPM4CMDEVENTWRITEEOS=bitpacked record + PPM4CMDEVENTWRITEEOS=^PM4CMDEVENTWRITEEOS; + PM4CMDEVENTWRITEEOS=bitpacked record header :PM4_TYPE_3_HEADER; - eventType :bit6; ///< event type written to VGT_EVENT_INITIATOR + eventType :bit6; ///< event type written to VGT_EVENT_INITIATOR (CS_DONE, PS_DONE) reserved1 :bit2; ///< reserved - eventIndex :bit4; ///< event index + eventIndex :bit4; ///< event index (EVENT_WRITE_INDEX_ANY_EOS_TIMESTAMP) reserved2 :bit20; ///< reserved - addressLo :DWORD; ///< low bits of address, must be 4 byte aligned - addressHi :bit29; ///< high bits of address - command :bit3; ///< command + address :bit61; ///< bits of address, must be 4 byte aligned + command :bit3; ///< command (EVENT_WRITE_EOS_CMD_*) Case byte of 0:( gdsIndex:Word; ///< indexed offset into GDS partition @@ -516,6 +512,7 @@ const //DmaDataDst kDmaDataDstMemory = $0; ///< Destination is a GPU-visible memory address. kDmaDataDstGds = $1; ///< Destination is an offset into Global Data Store (GDS). + kDmaDataDstMemoryUsingL2 = $3; kDmaDataDstRegister = $4; ///< Destination is a GPU register offset (auto-increment enabled for multi-register DMAs). kDmaDataDstRegisterNoIncrement = $C; ///< Destination is a GPU register offset (auto-increment disabled for multi-register DMAs). @@ -523,14 +520,27 @@ const CP_DMA_ENGINE_ME = 0; CP_DMA_ENGINE_PFP = 1; + CPDMA_ADDR_SPACE_MEM = 0; + CPDMA_ADDR_SPACE_REG = 1; + + //CPDMA_SRC_SEL + CPDMA_SRC_SEL_SRC_ADDR = 0; + CPDMA_SRC_SEL_GDS = 1; + CPDMA_SRC_SEL_DATA = 2; + CPDMA_SRC_SEL_SRC_ADDR_USING_L2 = 3; + + //CPDMA_DST_SEL + CPDMA_DST_SEL_DST_ADDR = 0; + CPDMA_DST_SEL_GDS = 1; + CPDMA_DST_SEL_DST_ADDR_USING_L2 = 3; + type - PTPM4DMADATA=^TPM4DMADATA; - TPM4DMADATA=packed record + PPM4DMADATA=^PM4DMADATA; + PM4DMADATA=packed record header:PM4_TYPE_3_HEADER; - Flags1:bitpacked record - engine :bit1; + engine :bit1; //CP_DMA_ENGINE_PFP, CP_DMA_ENGINE_ME reserved1 :bit11; srcATC :bit1; srcCachePolicy :bit2; @@ -543,21 +553,19 @@ type dstVolatile :bit1; reserved4 :bit1; srcSel :bit2; - cpSync :bit1; + cpSync :bit1; //Synchronize the transfer (isBlocking) end; - srcAddrLo:DWORD; - srcAddrHi:DWORD; - dstAddrLo:DWORD; - dstAddrHi:DWORD; + srcAddr:QWORD; + dstAddr:QWORD; Flags2:bitpacked record - byteCount :bit21; - disWC :bit1; + byteCount :bit21; //Number of bytes to copy + disWC :bit1; //disable write-confirm srcSwap :bit2; dstSwap :bit2; - sas :bit1; - das :bit1; + sas :bit1; //CPDMA_ADDR_SPACE_MEM, CPDMA_ADDR_SPACE_REG + das :bit1; //CPDMA_ADDR_SPACE_MEM, CPDMA_ADDR_SPACE_REG saic :bit1; daic :bit1; rawWait :bit1; @@ -610,30 +618,29 @@ const } type - PPM4CMDWAITREGMEM=^TPM4CMDWAITREGMEM; - TPM4CMDWAITREGMEM=bitpacked record + PPM4CMDWAITREGMEM=^PM4CMDWAITREGMEM; + PM4CMDWAITREGMEM=bitpacked record header :PM4_TYPE_3_HEADER; compareFunc :bit3; ///< function. WAIT_REG_MEM_FUNC_XXXX reserved1 :bit1; ///< reserved memSpace :bit2; ///< memory space (0 = register, 1 = memory, 2=TC/L2, 3 = reserved) - operation__CI :bit2; ///< operation: + operation :bit2; ///< operation: ///< 00: WAIT_REG_MEM - Wait on Masked Register/Memory value to equal reference value. ///< 01: WR_WAIT_WR_REG (PFP only) ///< Writes REFERENCE value to POLL_ADDRESS_LO ///< Waits for REFERENCE = POLL_ADDRESS_HI ///< Write REFERENCE to POLL_ADDRESS_HI. engine :bit2; ///< 0 = ME, 1 = PFP, 2 = CE - uncached__VI :bit1; ///< When set the memory read will always use MTYPE 3 (uncached) + uncached :bit1; ///< When set the memory read will always use MTYPE 3 (uncached) /// Only applies when executed on MEC (ACE). /// WAIT_REG_MEM on PFP or ME are always uncached. reserved2 :bit13; ///< reserved - atc__CI :bit1; ///< ATC steting for MC read transactions - cachePolicy__CI :bit2; ///< Reserved for future use of CACHE_POLICY - volatile__CI :bit1; ///< Reserved for future use of VOLATILE + atc :bit1; ///< ATC steting for MC read transactions + cachePolicy :bit2; ///< Reserved for future use of CACHE_POLICY + volatile :bit1; ///< Reserved for future use of VOLATILE reserved3 :bit4; ///< reserved - pollAddressLo :DWORD; ///< lower portion of Address to poll or register offset - pollAddressHi :DWORD; ///< high portion of Address to poll, dont care for regs + pollAddress :QWORD; ///< Address to poll or register offset reference :DWORD; ///< reference value mask :DWORD; ///< mask for comparison pollInterval :DWORD; ///< interval to wait when issuing new poll requests diff --git a/rtl/bittype.pas b/rtl/bittype.pas index 5b20ce1f..b4b34570 100644 --- a/rtl/bittype.pas +++ b/rtl/bittype.pas @@ -5,15 +5,15 @@ unit bittype; interface type - bit1=0..1; - bit2=0..3; - bit3=0..7; - bit4=0..15; - bit5=0..31; - bit6=0..63; - bit7=0..127; - bit8=Byte; - bit9=0..511; + bit1 =0..1; + bit2 =0..3; + bit3 =0..7; + bit4 =0..15; + bit5 =0..31; + bit6 =0..63; + bit7 =0..127; + bit8 =Byte; + bit9 =0..511; bit10=0..1023; bit11=0..2047; bit12=0..4095; @@ -36,13 +36,39 @@ type bit29=0..536870911; bit30=0..1073741823; bit31=0..2147483647; - bit32=DWORD; + bit32=DWord; + bit33=0..8589934591; + bit34=0..17179869183; + bit35=0..34359738367; + bit36=0..68719476735; + bit37=0..137438953471; bit38=0..274877906943; + bit39=0..549755813887; bit40=0..1099511627775; + bit41=0..2199023255551; + bit42=0..4398046511103; bit43=0..8796093022207; bit44=0..17592186044415; + bit45=0..35184372088831; + bit46=0..70368744177663; + bit47=0..140737488355327; bit48=0..281474976710655; - bit64=QWORD; + bit49=0..562949953421311; + bit50=0..1125899906842623; + bit51=0..2251799813685247; + bit52=0..4503599627370495; + bit53=0..9007199254740991; + bit54=0..18014398509481983; + bit55=0..36028797018963967; + bit56=0..72057594037927935; + bit57=0..144115188075855871; + bit58=0..288230376151711743; + bit59=0..576460752303423487; + bit60=0..1152921504606846975; + bit61=0..2305843009213693951; + bit62=0..4611686018427387903; + bit63=0..9223372036854775807; + bit64=QWord; implementation diff --git a/sys/dev/dev_gc.pas b/sys/dev/dev_gc.pas index 2d59501d..5ef55807 100644 --- a/sys/dev/dev_gc.pas +++ b/sys/dev/dev_gc.pas @@ -136,7 +136,7 @@ begin begin if pctx^.print_ops then begin - Writeln('INDIRECT_BUFFER_CNST (ccb)'); + Writeln('INDIRECT_BUFFER (ccb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); end; if pm4_ibuf_init(@ibuf,buff,@pm4_parse_ccb) then begin @@ -151,7 +151,7 @@ begin begin if pctx^.print_ops then begin - Writeln('INDIRECT_BUFFER (dcb)'); + Writeln('INDIRECT_BUFFER (dcb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); end; if pm4_ibuf_init(@ibuf,buff,@pm4_parse_dcb) then begin diff --git a/vulkan/vImage.pas b/vulkan/vImage.pas index 91e37a23..3bf783a7 100644 --- a/vulkan/vImage.pas +++ b/vulkan/vImage.pas @@ -266,6 +266,12 @@ begin VK_FORMAT_R8G8B8A8_UINT :Result:=4; VK_FORMAT_R8G8B8A8_SINT :Result:=4; + VK_FORMAT_B8G8R8A8_UNORM :Result:=4; + VK_FORMAT_B8G8R8A8_SRGB :Result:=4; + VK_FORMAT_B8G8R8A8_SNORM :Result:=4; + VK_FORMAT_B8G8R8A8_UINT :Result:=4; + VK_FORMAT_B8G8R8A8_SINT :Result:=4; + VK_FORMAT_R16_UNORM :Result:=2; VK_FORMAT_R16_SNORM :Result:=2; VK_FORMAT_R16_UINT :Result:=2; diff --git a/vulkan/vRegs2Vulkan.pas b/vulkan/vRegs2Vulkan.pas index bab06859..392689f5 100644 --- a/vulkan/vRegs2Vulkan.pas +++ b/vulkan/vRegs2Vulkan.pas @@ -407,20 +407,65 @@ begin end; + { + Match the physical representation of the final pixel (RGBA) + to the output component number in shader export (0123) + } + //SWAP_STD (R=>0) //SWAP_ALT (G=>0) //SWAP_STD_REV (B=>0) //SWAP_ALT_REV (A=>0) + //SWAP_STD (R=>0, G=>1) //SWAP_ALT (R=>0, A=>1) + //SWAP_STD_REV (G=>0, R=>1) + //SWAP_ALT_REV (A=>0, R=>1) + + //SWAP_STD (R=>0, G=>1, B=>2) //SWAP_ALT (R=>0, G=>1, A=>2) + //SWAP_STD_REV (B=>0, G=>1, R=>2) + //SWAP_ALT_REV (A=>0, G=>1, R=>2) //SWAP_STD (R=>0, G=>1, B=>2, A=>3) - //SWAP_ALT (B=>0, G=>1, R=>2, A=>3). + //SWAP_ALT (B=>0, G=>1, R=>2, A=>3) //SWAP_STD_REV (A=>0, B=>1, G=>2, R=>3) //SWAP_ALT_REV (A=>0, R=>1, G=>2, B=>3) end; +const + VK_SWIZZLE_I=ord(VK_COMPONENT_SWIZZLE_IDENTITY); + VK_SWIZZLE_Z=ord(VK_COMPONENT_SWIZZLE_ZERO ); + VK_SWIZZLE_O=ord(VK_COMPONENT_SWIZZLE_ONE ); + VK_SWIZZLE_R=ord(VK_COMPONENT_SWIZZLE_R ); + VK_SWIZZLE_G=ord(VK_COMPONENT_SWIZZLE_G ); + VK_SWIZZLE_B=ord(VK_COMPONENT_SWIZZLE_B ); + VK_SWIZZLE_A=ord(VK_COMPONENT_SWIZZLE_A ); + + shader_swizzle_map:array[1..4,SWAP_STD..SWAP_ALT_REV] of TvDstSel=( + ( + (r:VK_SWIZZLE_R;g:VK_SWIZZLE_O;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_G;g:VK_SWIZZLE_O;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_B;g:VK_SWIZZLE_O;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_A;g:VK_SWIZZLE_O;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O) + ),( + (r:VK_SWIZZLE_R;g:VK_SWIZZLE_G;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_R;g:VK_SWIZZLE_A;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_G;g:VK_SWIZZLE_R;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_A;g:VK_SWIZZLE_R;b:VK_SWIZZLE_O;a:VK_SWIZZLE_O) + ),( + (r:VK_SWIZZLE_R;g:VK_SWIZZLE_G;b:VK_SWIZZLE_B;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_R;g:VK_SWIZZLE_G;b:VK_SWIZZLE_A;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_B;g:VK_SWIZZLE_G;b:VK_SWIZZLE_R;a:VK_SWIZZLE_O), + (r:VK_SWIZZLE_A;g:VK_SWIZZLE_G;b:VK_SWIZZLE_R;a:VK_SWIZZLE_O) + ),( + (r:VK_SWIZZLE_R;g:VK_SWIZZLE_G;b:VK_SWIZZLE_B;a:VK_SWIZZLE_A), + (r:VK_SWIZZLE_B;g:VK_SWIZZLE_G;b:VK_SWIZZLE_R;a:VK_SWIZZLE_A), + (r:VK_SWIZZLE_A;g:VK_SWIZZLE_B;b:VK_SWIZZLE_G;a:VK_SWIZZLE_R), + (r:VK_SWIZZLE_A;g:VK_SWIZZLE_R;b:VK_SWIZZLE_G;a:VK_SWIZZLE_B) + ) + ); + Function TGPU_REGS.GET_RT_BLEND(i:Byte):TVkPipelineColorBlendAttachmentState; //0..7 var RENDER_TARGET:TRENDER_TARGET; @@ -749,10 +794,6 @@ begin end; -// -//FORMAT :=RENDER_TARGET[i].INFO.FORMAT; -//NUMBER_TYPE:=RENDER_TARGET[i].INFO.NUMBER_TYPE; - Function TGPU_REGS.GET_RT_INFO(i:Byte):TRT_INFO; //0..7 var RENDER_TARGET:TRENDER_TARGET; @@ -1708,14 +1749,14 @@ end; function _get_dst_sel_swizzle(b:Byte):Byte; begin Case b of - 0:Result:=ord(VK_COMPONENT_SWIZZLE_ZERO); - 1:Result:=ord(VK_COMPONENT_SWIZZLE_ONE); - 4:Result:=ord(VK_COMPONENT_SWIZZLE_R); - 5:Result:=ord(VK_COMPONENT_SWIZZLE_G); - 6:Result:=ord(VK_COMPONENT_SWIZZLE_B); - 7:Result:=ord(VK_COMPONENT_SWIZZLE_A); + 0:Result:=VK_SWIZZLE_Z; + 1:Result:=VK_SWIZZLE_O; + 4:Result:=VK_SWIZZLE_R; + 5:Result:=VK_SWIZZLE_G; + 6:Result:=VK_SWIZZLE_B; + 7:Result:=VK_SWIZZLE_A; else - Result:=ord(VK_COMPONENT_SWIZZLE_IDENTITY); + Result:=VK_SWIZZLE_I; end; end;