[D3D12] DXT1 decompression shader
This commit is contained in:
parent
1f248572ac
commit
17e3f09c1e
Binary file not shown.
|
@ -1,8 +1,8 @@
|
||||||
// generated from `xb buildhlsl`
|
// generated from `xb buildhlsl`
|
||||||
// source: texture_load_ctx1.cs.hlsl
|
// source: texture_load_ctx1.cs.hlsl
|
||||||
const uint8_t texture_load_ctx1_cs[] = {
|
const uint8_t texture_load_ctx1_cs[] = {
|
||||||
0x44, 0x58, 0x42, 0x43, 0x36, 0x1B, 0x8D, 0x80, 0x5E, 0x82, 0x06, 0x8F,
|
0x44, 0x58, 0x42, 0x43, 0x7E, 0xC2, 0x29, 0xDA, 0xF8, 0x25, 0x11, 0x52,
|
||||||
0xC2, 0xE4, 0xED, 0xF5, 0xC4, 0x87, 0x3F, 0xF9, 0x01, 0x00, 0x00, 0x00,
|
0xE1, 0xBC, 0xD4, 0xC7, 0xF9, 0x11, 0xB6, 0x3E, 0x01, 0x00, 0x00, 0x00,
|
||||||
0x20, 0x26, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
0x20, 0x26, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||||
0x54, 0x04, 0x00, 0x00, 0x64, 0x04, 0x00, 0x00, 0x74, 0x04, 0x00, 0x00,
|
0x54, 0x04, 0x00, 0x00, 0x64, 0x04, 0x00, 0x00, 0x74, 0x04, 0x00, 0x00,
|
||||||
0x84, 0x25, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x18, 0x04, 0x00, 0x00,
|
0x84, 0x25, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x18, 0x04, 0x00, 0x00,
|
||||||
|
@ -528,57 +528,57 @@ const uint8_t texture_load_ctx1_cs[] = {
|
||||||
0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0xC2, 0x00, 0x10, 0x00,
|
0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0xC2, 0x00, 0x10, 0x00,
|
||||||
0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||||
0x15, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
0x15, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||||
|
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||||
|
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
|
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00,
|
||||||
|
0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00,
|
||||||
|
0x8C, 0x00, 0x00, 0x11, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||||
|
0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||||
|
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F,
|
||||||
|
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||||
|
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||||
|
0x08, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||||
|
0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||||
|
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A,
|
||||||
|
0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||||
|
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||||
|
0x8C, 0x00, 0x00, 0x11, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x02, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||||
|
0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A,
|
||||||
|
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
|
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||||
|
0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
|
||||||
|
0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||||
|
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
|
||||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0xAA, 0xAA, 0xAA, 0xAA,
|
|
||||||
0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
|
|
||||||
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
|
||||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x02, 0x40, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
|
|
||||||
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x1E, 0x00, 0x00, 0x07,
|
|
||||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
|
||||||
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
|
||||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
0x04, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
0x02, 0x40, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
|
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x57, 0x00, 0x00, 0x07,
|
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
|
||||||
0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
|
||||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
0x57, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||||
0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
0x04, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00,
|
||||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
|
||||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00,
|
|
||||||
0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x8C, 0x00, 0x00, 0x11,
|
|
||||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
|
||||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
|
||||||
0x08, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
|
||||||
0x03, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
|
|
||||||
0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
|
||||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
|
||||||
0x02, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
|
||||||
0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
|
||||||
0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
|
||||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x11,
|
|
||||||
0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
|
||||||
0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
|
||||||
0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
0x02, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
0x02, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
||||||
|
|
|
@ -178,20 +178,20 @@ else
|
||||||
mov r1.zw, r3.xxxz
|
mov r1.zw, r3.xxxz
|
||||||
mov r2.zw, r3.yyyw
|
mov r2.zw, r3.yyyw
|
||||||
endif
|
endif
|
||||||
ishl r3.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
|
||||||
and r3.xyzw, r3.xyzw, l(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa)
|
|
||||||
ushr r2.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
|
||||||
and r2.xyzw, r2.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
|
||||||
iadd r2.xyzw, r2.xyzw, r3.xyzw
|
|
||||||
ushr r3.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
|
||||||
and r3.xyzw, r3.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
|
||||||
xor r2.xyzw, r2.xyzw, r3.xyzw
|
|
||||||
ishl r3.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
ishl r3.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
||||||
and r3.xyzw, r3.xyzw, l(0x00ff0000, 0x00ff0000, 0x00ff0000, 0x00ff0000)
|
and r3.xyzw, r3.xyzw, l(0x00ff0000, 0x00ff0000, 0x00ff0000, 0x00ff0000)
|
||||||
bfi r3.xyzw, l(8, 8, 8, 8), l(0, 0, 0, 0), r1.xyzw, r3.xyzw
|
bfi r3.xyzw, l(8, 8, 8, 8), l(0, 0, 0, 0), r1.xyzw, r3.xyzw
|
||||||
ubfe r4.xyzw, l(8, 8, 8, 8), l(16, 16, 16, 16), r1.xyzw
|
ubfe r4.xyzw, l(8, 8, 8, 8), l(16, 16, 16, 16), r1.xyzw
|
||||||
ushr r1.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
ushr r1.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
||||||
bfi r1.xyzw, l(16, 16, 16, 16), l(0, 0, 0, 0), r4.xyzw, r1.xyzw
|
bfi r1.xyzw, l(16, 16, 16, 16), l(0, 0, 0, 0), r4.xyzw, r1.xyzw
|
||||||
|
ishl r4.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa)
|
||||||
|
ushr r2.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r2.xyzw, r2.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
||||||
|
iadd r2.xyzw, r2.xyzw, r4.xyzw
|
||||||
|
ushr r4.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
||||||
|
xor r2.xyzw, r2.xyzw, r4.xyzw
|
||||||
ishl r0.xy, r0.xyxx, l(2, 2, 0, 0)
|
ishl r0.xy, r0.xyxx, l(2, 2, 0, 0)
|
||||||
ishl r0.x, r0.x, l(1)
|
ishl r0.x, r0.x, l(1)
|
||||||
imad r0.z, vThreadID.z, CB0[0][1].y, r0.y
|
imad r0.z, vThreadID.z, CB0[0][1].y, r0.y
|
||||||
|
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,412 @@
|
||||||
|
//
|
||||||
|
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Buffer Definitions:
|
||||||
|
//
|
||||||
|
// cbuffer XeTextureCopyConstants
|
||||||
|
// {
|
||||||
|
//
|
||||||
|
// uint xe_texture_copy_guest_base; // Offset: 0 Size: 4
|
||||||
|
// uint xe_texture_copy_guest_pitch; // Offset: 4 Size: 4
|
||||||
|
// uint xe_texture_copy_host_base; // Offset: 8 Size: 4
|
||||||
|
// uint xe_texture_copy_host_pitch; // Offset: 12 Size: 4
|
||||||
|
// uint3 xe_texture_copy_size_texels; // Offset: 16 Size: 12
|
||||||
|
// bool xe_texture_copy_is_3d; // Offset: 28 Size: 4
|
||||||
|
// uint3 xe_texture_copy_size_blocks; // Offset: 32 Size: 12
|
||||||
|
// uint xe_texture_copy_endianness; // Offset: 44 Size: 4
|
||||||
|
// uint3 xe_texture_copy_guest_mip_offset;// Offset: 48 Size: 12
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Resource Bindings:
|
||||||
|
//
|
||||||
|
// Name Type Format Dim ID HLSL Bind Count
|
||||||
|
// ------------------------------ ---------- ------- ----------- ------- -------------- ------
|
||||||
|
// xe_texture_copy_source texture byte r/o T0 t0 1
|
||||||
|
// xe_texture_copy_dest UAV byte r/w U0 u0 1
|
||||||
|
// XeTextureCopyConstants cbuffer NA NA CB0 cb0 1
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Input signature:
|
||||||
|
//
|
||||||
|
// Name Index Mask Register SysValue Format Used
|
||||||
|
// -------------------- ----- ------ -------- -------- ------- ------
|
||||||
|
// no Input
|
||||||
|
//
|
||||||
|
// Output signature:
|
||||||
|
//
|
||||||
|
// Name Index Mask Register SysValue Format Used
|
||||||
|
// -------------------- ----- ------ -------- -------- ------- ------
|
||||||
|
// no Output
|
||||||
|
cs_5_1
|
||||||
|
dcl_globalFlags refactoringAllowed
|
||||||
|
dcl_constantbuffer CB0[0:0][4], immediateIndexed, space=0
|
||||||
|
dcl_resource_raw T0[0:0], space=0
|
||||||
|
dcl_uav_raw U0[0:0], space=0
|
||||||
|
dcl_input vThreadID.xyz
|
||||||
|
dcl_temps 22
|
||||||
|
dcl_thread_group 8, 32, 1
|
||||||
|
ishl r0.x, vThreadID.x, l(2)
|
||||||
|
mov r0.yz, vThreadID.yyzy
|
||||||
|
uge r1.xyz, r0.xyzx, CB0[0][2].xyzx
|
||||||
|
or r0.w, r1.y, r1.x
|
||||||
|
or r0.w, r1.z, r0.w
|
||||||
|
if_nz r0.w
|
||||||
|
ret
|
||||||
|
endif
|
||||||
|
iadd r1.xyz, r0.xyzx, CB0[0][3].xyzx
|
||||||
|
ieq r0.z, CB0[0][0].y, l(-1)
|
||||||
|
if_nz r0.z
|
||||||
|
if_nz CB0[0][1].w
|
||||||
|
iadd r2.xyzw, r1.xxxx, l(0, 1, 2, 3)
|
||||||
|
iadd r0.zw, CB0[0][2].yyyx, l(0, 0, 31, 31)
|
||||||
|
ushr r3.xyz, r1.zyyz, l(2, 4, 3, 0)
|
||||||
|
ushr r0.zw, r0.zzzw, l(0, 0, 4, 5)
|
||||||
|
and r0.z, r0.z, l(0x0ffffffe)
|
||||||
|
imad r0.z, r3.x, r0.z, r3.y
|
||||||
|
iadd r1.w, r3.z, r3.x
|
||||||
|
bfi r3.x, l(1), l(1), r1.w, l(0)
|
||||||
|
ushr r4.xyzw, r2.xyzw, l(3, 3, 3, 3)
|
||||||
|
iadd r3.xyzw, r3.xxxx, r4.xyzw
|
||||||
|
bfi r3.xyzw, l(2, 2, 2, 2), l(1, 1, 1, 1), r3.xyzw, l(0, 0, 0, 0)
|
||||||
|
bfi r3.xyzw, l(1, 1, 1, 1), l(0, 0, 0, 0), r1.wwww, r3.xyzw
|
||||||
|
ishl r1.w, r1.y, l(11)
|
||||||
|
and r1.w, r1.w, l(0x00003000)
|
||||||
|
bfi r4.xyzw, l(3, 3, 3, 3), l(9, 9, 9, 9), r2.xyzw, r1.wwww
|
||||||
|
ushr r4.xyzw, r4.xyzw, l(6, 6, 6, 6)
|
||||||
|
ushr r2.xyzw, r2.xyzw, l(5, 5, 5, 5)
|
||||||
|
imad r2.xyzw, r0.zzzz, r0.wwww, r2.xyzw
|
||||||
|
and r5.xyzw, r4.xyzw, l(240, 240, 240, 240)
|
||||||
|
bfi r6.xyzw, l(19, 19, 19, 19), l(11, 11, 11, 11), r2.xyzw, l(0, 0, 0, 0)
|
||||||
|
imad r6.xyzw, r5.xyzw, l(2, 2, 2, 2), r6.xyzw
|
||||||
|
bfi r6.xyzw, l(4, 4, 4, 4), l(0, 0, 0, 0), r4.xyzw, r6.xyzw
|
||||||
|
bfi r6.xyzw, l(2, 2, 2, 2), l(9, 9, 9, 9), r1.zzzz, r6.xyzw
|
||||||
|
bfi r7.xyzw, l(1, 1, 1, 1), l(4, 4, 4, 4), r1.yyyy, r6.xyzw
|
||||||
|
ubfe r6.xyzw, l(3, 3, 3, 3), l(6, 6, 6, 6), r6.xyzw
|
||||||
|
and r8.xyzw, r3.xyzw, l(6, 6, 6, 6)
|
||||||
|
bfi r3.xyzw, l(1, 1, 1, 1), l(8, 8, 8, 8), r3.xyzw, l(0, 0, 0, 0)
|
||||||
|
imad r3.xyzw, r6.xyzw, l(32, 32, 32, 32), r3.xyzw
|
||||||
|
imad r3.xyzw, r8.xyzw, l(4, 4, 4, 4), r3.xyzw
|
||||||
|
bfi r2.xyzw, l(19, 19, 19, 19), l(14, 14, 14, 14), r2.xyzw, l(0, 0, 0, 0)
|
||||||
|
imad r2.xyzw, r5.xyzw, l(16, 16, 16, 16), r2.xyzw
|
||||||
|
bfi r2.xyzw, l(4, 4, 4, 4), l(3, 3, 3, 3), r4.xyzw, r2.xyzw
|
||||||
|
bfi r2.xyzw, l(2, 2, 2, 2), l(12, 12, 12, 12), r1.zzzz, r2.xyzw
|
||||||
|
bfi r2.xyzw, l(1, 1, 1, 1), l(7, 7, 7, 7), r1.yyyy, r2.xyzw
|
||||||
|
bfi r2.xyzw, l(9, 9, 9, 9), l(3, 3, 3, 3), r3.xyzw, r2.xyzw
|
||||||
|
bfi r2.xyzw, l(6, 6, 6, 6), l(0, 0, 0, 0), r7.xyzw, r2.xyzw
|
||||||
|
else
|
||||||
|
iadd r3.xyzw, r1.xxxx, l(0, 1, 2, 3)
|
||||||
|
ushr r4.xyzw, r3.xyzw, l(5, 5, 5, 5)
|
||||||
|
ushr r0.zw, r1.yyyy, l(0, 0, 5, 2)
|
||||||
|
iadd r1.w, CB0[0][2].x, l(31)
|
||||||
|
ushr r1.w, r1.w, l(5)
|
||||||
|
imad r4.xyzw, r0.zzzz, r1.wwww, r4.xyzw
|
||||||
|
ishl r5.xy, r1.yyyy, l(5, 7, 0, 0)
|
||||||
|
and r5.xy, r5.xyxx, l(448, 2048, 0, 0)
|
||||||
|
bfi r6.xyzw, l(3, 3, 3, 3), l(3, 3, 3, 3), r3.xyzw, r5.xxxx
|
||||||
|
ishl r0.z, r5.x, l(1)
|
||||||
|
bfi r7.xyzw, l(3, 3, 3, 3), l(4, 4, 4, 4), r3.xyzw, r0.zzzz
|
||||||
|
and r7.xyzw, r7.xyzw, l(992, 992, 992, 992)
|
||||||
|
bfi r8.xyzw, l(22, 22, 22, 22), l(10, 10, 10, 10), r4.xyzw, r7.xyzw
|
||||||
|
bfi r8.xyzw, l(4, 4, 4, 4), l(0, 0, 0, 0), r6.xyzw, r8.xyzw
|
||||||
|
bfi r8.xyzw, l(1, 1, 1, 1), l(4, 4, 4, 4), r1.yyyy, r8.xyzw
|
||||||
|
ishl r9.xyzw, r7.xyzw, l(3, 3, 3, 3)
|
||||||
|
bfi r9.xyzw, l(22, 22, 22, 22), l(13, 13, 13, 13), r4.xyzw, r9.xyzw
|
||||||
|
bfi r9.xyzw, l(4, 4, 4, 4), l(3, 3, 3, 3), r6.xyzw, r9.xyzw
|
||||||
|
bfi r9.xyzw, l(1, 1, 1, 1), l(7, 7, 7, 7), r1.yyyy, r9.xyzw
|
||||||
|
bfi r5.xyzw, l(12, 12, 12, 12), l(0, 0, 0, 0), r5.yyyy, r9.xyzw
|
||||||
|
ishl r7.xyzw, r7.xyzw, l(2, 2, 2, 2)
|
||||||
|
bfi r4.xyzw, l(22, 22, 22, 22), l(12, 12, 12, 12), r4.xyzw, r7.xyzw
|
||||||
|
bfi r4.xyzw, l(4, 4, 4, 4), l(2, 2, 2, 2), r6.xyzw, r4.xyzw
|
||||||
|
bfi r4.xyzw, l(1, 1, 1, 1), l(6, 6, 6, 6), r1.yyyy, r4.xyzw
|
||||||
|
and r4.xyzw, r4.xyzw, l(1792, 1792, 1792, 1792)
|
||||||
|
iadd r4.xyzw, r5.xyzw, r4.xyzw
|
||||||
|
ushr r3.xyzw, r3.xyzw, l(3, 3, 3, 3)
|
||||||
|
and r0.z, r0.w, l(2)
|
||||||
|
iadd r3.xyzw, r0.zzzz, r3.xyzw
|
||||||
|
bfi r3.xyzw, l(2, 2, 2, 2), l(6, 6, 6, 6), r3.xyzw, l(0, 0, 0, 0)
|
||||||
|
iadd r3.xyzw, r4.xyzw, r3.xyzw
|
||||||
|
bfi r2.xyzw, l(6, 6, 6, 6), l(0, 0, 0, 0), r8.xyzw, r3.xyzw
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
ishl r0.z, r1.x, l(3)
|
||||||
|
iadd r0.w, CB0[0][2].y, l(31)
|
||||||
|
and r0.w, r0.w, l(-32)
|
||||||
|
imad r0.w, r1.z, r0.w, r1.y
|
||||||
|
imad r0.z, r0.w, CB0[0][0].y, r0.z
|
||||||
|
iadd r2.xyzw, r0.zzzz, l(0, 8, 16, 24)
|
||||||
|
endif
|
||||||
|
iadd r1.xyzw, r2.xyzw, CB0[0][0].xxxx
|
||||||
|
ld_raw r2.xz, r1.x, T0[0].yxxx
|
||||||
|
ld_raw r2.yw, r1.y, T0[0].xyxx
|
||||||
|
ld_raw r3.xy, r1.z, T0[0].xyxx
|
||||||
|
ld_raw r3.zw, r1.w, T0[0].xxxy
|
||||||
|
ushr r0.z, CB0[0][2].w, l(1)
|
||||||
|
xor r0.z, r0.z, CB0[0][2].w
|
||||||
|
and r0.z, r0.z, l(1)
|
||||||
|
if_nz r0.z
|
||||||
|
ishl r1.xyzw, r2.zxwy, l(8, 8, 8, 8)
|
||||||
|
and r1.xyzw, r1.xyzw, l(0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00)
|
||||||
|
ushr r4.xyzw, r2.zxwy, l(8, 8, 8, 8)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0x00ff00ff, 0x00ff00ff, 0x00ff00ff, 0x00ff00ff)
|
||||||
|
iadd r2.xyzw, r1.ywxz, r4.ywxz
|
||||||
|
endif
|
||||||
|
and r0.w, CB0[0][2].w, l(2)
|
||||||
|
if_nz r0.w
|
||||||
|
ushr r1.xyzw, r2.zxwy, l(16, 16, 16, 16)
|
||||||
|
bfi r2.xyzw, l(16, 16, 16, 16), l(16, 16, 16, 16), r2.xyzw, r1.ywxz
|
||||||
|
mov r1.xy, r2.zwzz
|
||||||
|
else
|
||||||
|
mov r1.xy, r2.zwzz
|
||||||
|
endif
|
||||||
|
if_nz r0.z
|
||||||
|
ishl r4.xyzw, r3.xyzw, l(8, 8, 8, 8)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00)
|
||||||
|
ushr r5.xyzw, r3.xyzw, l(8, 8, 8, 8)
|
||||||
|
and r5.xyzw, r5.xyzw, l(0x00ff00ff, 0x00ff00ff, 0x00ff00ff, 0x00ff00ff)
|
||||||
|
iadd r3.xyzw, r4.xyzw, r5.xyzw
|
||||||
|
endif
|
||||||
|
if_nz r0.w
|
||||||
|
ushr r4.xyzw, r3.xyzw, l(16, 16, 16, 16)
|
||||||
|
bfi r4.xyzw, l(16, 16, 16, 16), l(16, 16, 16, 16), r3.xyzw, r4.xyzw
|
||||||
|
mov r1.zw, r4.xxxz
|
||||||
|
mov r2.zw, r4.yyyw
|
||||||
|
else
|
||||||
|
mov r1.zw, r3.xxxz
|
||||||
|
mov r2.zw, r3.yyyw
|
||||||
|
endif
|
||||||
|
ishl r3.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r3.xyzw, r3.xyzw, l(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa)
|
||||||
|
ushr r4.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
||||||
|
iadd r3.xyzw, r3.xyzw, r4.xyzw
|
||||||
|
ushr r4.xyzw, r3.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
||||||
|
xor r3.xyzw, r3.xyzw, r4.xyzw
|
||||||
|
not r2.xyzw, r2.xyzw
|
||||||
|
ishl r4.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r4.xyzw, r4.xyzw, l(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa)
|
||||||
|
xor r2.xyzw, r2.xyzw, r4.xyzw
|
||||||
|
bfi r4.xyzw, l(5, 5, 5, 5), l(23, 23, 23, 23), r1.xyzw, l(0, 0, 0, 0)
|
||||||
|
ishl r5.xyzw, r1.xyzw, l(18, 18, 18, 18)
|
||||||
|
and r5.xyzw, r5.xyzw, l(0x00700000, 0x00700000, 0x00700000, 0x00700000)
|
||||||
|
iadd r4.xyzw, r4.xyzw, r5.xyzw
|
||||||
|
ishl r5.xyzw, r1.xyzw, l(7, 7, 7, 7)
|
||||||
|
and r6.xyzw, r5.xyzw, l(0x0003f000, 0x0003f000, 0x0003f000, 0x0003f000)
|
||||||
|
iadd r4.xyzw, r4.xyzw, r6.xyzw
|
||||||
|
ishl r6.xyzw, r1.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r6.xyzw, r6.xyzw, l(3072, 3072, 3072, 3072)
|
||||||
|
iadd r4.xyzw, r4.xyzw, r6.xyzw
|
||||||
|
ushr r6.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
||||||
|
and r6.xyzw, r6.xyzw, l(248, 248, 248, 248)
|
||||||
|
iadd r4.xyzw, r4.xyzw, r6.xyzw
|
||||||
|
ubfe r6.xyzw, l(3, 3, 3, 3), l(13, 13, 13, 13), r1.xyzw
|
||||||
|
iadd r4.xyzw, r4.xyzw, r6.xyzw
|
||||||
|
and r5.xyzw, r5.xyzw, l(0x0f800000, 0x0f800000, 0x0f800000, 0x0f800000)
|
||||||
|
ishl r6.xyzw, r1.xyzw, l(2, 2, 2, 2)
|
||||||
|
and r6.xyzw, r6.xyzw, l(0x00700000, 0x00700000, 0x00700000, 0x00700000)
|
||||||
|
iadd r5.xyzw, r5.xyzw, r6.xyzw
|
||||||
|
ushr r6.xyzw, r1.xyzw, l(9, 9, 9, 9)
|
||||||
|
and r6.xyzw, r6.xyzw, l(0x0003f000, 0x0003f000, 0x0003f000, 0x0003f000)
|
||||||
|
iadd r5.xyzw, r5.xyzw, r6.xyzw
|
||||||
|
ushr r6.xyzw, r1.xyzw, l(15, 15, 15, 15)
|
||||||
|
and r6.xyzw, r6.xyzw, l(3072, 3072, 3072, 3072)
|
||||||
|
iadd r5.xyzw, r5.xyzw, r6.xyzw
|
||||||
|
ushr r6.xyzw, r1.xyzw, l(24, 24, 24, 24)
|
||||||
|
and r6.xyzw, r6.xyzw, l(248, 248, 248, 248)
|
||||||
|
iadd r5.xyzw, r5.xyzw, r6.xyzw
|
||||||
|
ushr r6.xyzw, r1.xyzw, l(29, 29, 29, 29)
|
||||||
|
iadd r5.xyzw, r5.xyzw, r6.xyzw
|
||||||
|
and r6.xyzw, r1.xyzw, l(0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff)
|
||||||
|
ushr r1.xyzw, r1.xyzw, l(16, 16, 16, 16)
|
||||||
|
uge r1.xyzw, r1.xyzw, r6.xyzw
|
||||||
|
ishl r0.xy, r0.xyxx, l(2, 2, 0, 0)
|
||||||
|
ishl r0.x, r0.x, l(2)
|
||||||
|
imad r0.z, vThreadID.z, CB0[0][1].y, r0.y
|
||||||
|
imad r0.x, r0.z, CB0[0][0].w, r0.x
|
||||||
|
iadd r0.x, r0.x, CB0[0][0].z
|
||||||
|
not r6.xyzw, r3.xyzw
|
||||||
|
ushr r7.xyzw, r2.xyzw, l(1, 1, 1, 1)
|
||||||
|
and r8.xyzw, r2.xyzw, r7.xyzw
|
||||||
|
and r8.xyzw, r8.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
||||||
|
or r7.xyzw, r2.xyzw, r7.xyzw
|
||||||
|
and r7.xyzw, r7.xyzw, l(0x55555555, 0x55555555, 0x55555555, 0x55555555)
|
||||||
|
mov r0.z, CB0[0][1].y
|
||||||
|
mov r0.w, r0.y
|
||||||
|
mov r9.x, r0.x
|
||||||
|
mov r9.y, l(0)
|
||||||
|
loop
|
||||||
|
uge r9.z, r9.y, l(4)
|
||||||
|
breakc_nz r9.z
|
||||||
|
bfi r10.xyzw, l(29, 29, 29, 29), l(3, 3, 3, 3), r9.yyyy, l(0, 2, 4, 6)
|
||||||
|
ushr r11.xyzw, r6.xxxx, r10.xyzw
|
||||||
|
and r11.xyzw, r11.xyzw, l(3, 3, 3, 3)
|
||||||
|
ushr r12.xyzw, r3.xxxx, r10.xyzw
|
||||||
|
and r12.xyzw, r12.xyzw, l(3, 3, 3, 3)
|
||||||
|
imul null, r12.xyzw, r5.xxxx, r12.xyzw
|
||||||
|
imad r11.xyzw, r11.xyzw, r4.xxxx, r12.xyzw
|
||||||
|
and r12.xyzw, r11.xyzw, l(1023, 1023, 1023, 1023)
|
||||||
|
udiv r12.xyzw, null, r12.xyzw, l(3, 3, 3, 3)
|
||||||
|
ubfe r13.xyzw, l(10, 10, 10, 10), l(10, 10, 10, 10), r11.xyzw
|
||||||
|
udiv r13.xyzw, null, r13.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r13.xyzw, r13.xyzw, l(8, 8, 8, 8)
|
||||||
|
or r12.xyzw, r12.xyzw, r13.xyzw
|
||||||
|
ushr r11.xyzw, r11.xyzw, l(20, 20, 20, 20)
|
||||||
|
udiv r11.xyzw, null, r11.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r11.xyzw, r11.xyzw, l(16, 16, 16, 16)
|
||||||
|
or r11.xyzw, r11.xyzw, r12.xyzw
|
||||||
|
ushr r12.xyzw, r6.yyyy, r10.xyzw
|
||||||
|
and r12.xyzw, r12.xyzw, l(3, 3, 3, 3)
|
||||||
|
ushr r13.xyzw, r3.yyyy, r10.xyzw
|
||||||
|
and r13.xyzw, r13.xyzw, l(3, 3, 3, 3)
|
||||||
|
imul null, r13.xyzw, r5.yyyy, r13.xyzw
|
||||||
|
imad r12.xyzw, r12.xyzw, r4.yyyy, r13.xyzw
|
||||||
|
and r13.xyzw, r12.xyzw, l(1023, 1023, 1023, 1023)
|
||||||
|
udiv r13.xyzw, null, r13.xyzw, l(3, 3, 3, 3)
|
||||||
|
ubfe r14.xyzw, l(10, 10, 10, 10), l(10, 10, 10, 10), r12.xyzw
|
||||||
|
udiv r14.xyzw, null, r14.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r14.xyzw, r14.xyzw, l(8, 8, 8, 8)
|
||||||
|
or r13.xyzw, r13.xyzw, r14.xyzw
|
||||||
|
ushr r12.xyzw, r12.xyzw, l(20, 20, 20, 20)
|
||||||
|
udiv r12.xyzw, null, r12.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r12.xyzw, r12.xyzw, l(16, 16, 16, 16)
|
||||||
|
or r12.xyzw, r12.xyzw, r13.xyzw
|
||||||
|
ushr r13.xyzw, r6.zzzz, r10.xyzw
|
||||||
|
and r13.xyzw, r13.xyzw, l(3, 3, 3, 3)
|
||||||
|
ushr r14.xyzw, r3.zzzz, r10.xyzw
|
||||||
|
and r14.xyzw, r14.xyzw, l(3, 3, 3, 3)
|
||||||
|
imul null, r14.xyzw, r5.zzzz, r14.xyzw
|
||||||
|
imad r13.xyzw, r13.xyzw, r4.zzzz, r14.xyzw
|
||||||
|
and r14.xyzw, r13.xyzw, l(1023, 1023, 1023, 1023)
|
||||||
|
udiv r14.xyzw, null, r14.xyzw, l(3, 3, 3, 3)
|
||||||
|
ubfe r15.xyzw, l(10, 10, 10, 10), l(10, 10, 10, 10), r13.xyzw
|
||||||
|
udiv r15.xyzw, null, r15.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r15.xyzw, r15.xyzw, l(8, 8, 8, 8)
|
||||||
|
or r14.xyzw, r14.xyzw, r15.xyzw
|
||||||
|
ushr r13.xyzw, r13.xyzw, l(20, 20, 20, 20)
|
||||||
|
udiv r13.xyzw, null, r13.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r13.xyzw, r13.xyzw, l(16, 16, 16, 16)
|
||||||
|
or r13.xyzw, r13.xyzw, r14.xyzw
|
||||||
|
ushr r14.xyzw, r6.wwww, r10.xyzw
|
||||||
|
and r14.xyzw, r14.xyzw, l(3, 3, 3, 3)
|
||||||
|
ushr r15.xyzw, r3.wwww, r10.xyzw
|
||||||
|
and r15.xyzw, r15.xyzw, l(3, 3, 3, 3)
|
||||||
|
imul null, r15.xyzw, r5.wwww, r15.xyzw
|
||||||
|
imad r14.xyzw, r14.xyzw, r4.wwww, r15.xyzw
|
||||||
|
and r15.xyzw, r14.xyzw, l(1023, 1023, 1023, 1023)
|
||||||
|
udiv r15.xyzw, null, r15.xyzw, l(3, 3, 3, 3)
|
||||||
|
ubfe r16.xyzw, l(10, 10, 10, 10), l(10, 10, 10, 10), r14.xyzw
|
||||||
|
udiv r16.xyzw, null, r16.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r16.xyzw, r16.xyzw, l(8, 8, 8, 8)
|
||||||
|
or r15.xyzw, r15.xyzw, r16.xyzw
|
||||||
|
ushr r14.xyzw, r14.xyzw, l(20, 20, 20, 20)
|
||||||
|
udiv r14.xyzw, null, r14.xyzw, l(3, 3, 3, 3)
|
||||||
|
ishl r14.xyzw, r14.xyzw, l(16, 16, 16, 16)
|
||||||
|
or r14.xyzw, r14.xyzw, r15.xyzw
|
||||||
|
or r11.xyzw, r11.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000)
|
||||||
|
or r12.xyzw, r12.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000)
|
||||||
|
or r13.xyzw, r13.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000)
|
||||||
|
or r14.xyzw, r14.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000)
|
||||||
|
iadd r15.xyzw, r10.xyzw, l(1, 1, 1, 1)
|
||||||
|
ushr r16.xyzw, r2.xxxx, r10.xyzw
|
||||||
|
and r16.xyzw, r16.xyzw, l(1, 1, 1, 1)
|
||||||
|
ushr r17.xyzw, r2.xxxx, r15.xyzw
|
||||||
|
and r17.xyzw, r17.xyzw, l(1, 1, 1, 1)
|
||||||
|
imul null, r17.xyzw, r5.xxxx, r17.xyzw
|
||||||
|
imad r16.xyzw, r16.xyzw, r4.xxxx, r17.xyzw
|
||||||
|
ushr r17.xyzw, r8.xxxx, r10.xyzw
|
||||||
|
and r18.xyzw, r17.xyzw, l(1, 1, 1, 1)
|
||||||
|
ubfe r18.xyzw, l(9, 9, 9, 9), r18.xyzw, r16.xyzw
|
||||||
|
bfi r19.xyzw, l(1, 1, 1, 1), l(0, 0, 0, 0), r17.xyzw, l(10, 10, 10, 10)
|
||||||
|
ubfe r19.xyzw, l(9, 9, 9, 9), r19.xyzw, r16.xyzw
|
||||||
|
ishl r19.xyzw, r19.xyzw, l(8, 8, 8, 8)
|
||||||
|
iadd r18.xyzw, r18.xyzw, r19.xyzw
|
||||||
|
bfi r17.xyzw, l(2, 2, 2, 2), l(0, 0, 0, 0), r17.xyzw, l(20, 20, 20, 20)
|
||||||
|
ushr r16.xyzw, r16.xyzw, r17.xyzw
|
||||||
|
ishl r16.xyzw, r16.xyzw, l(16, 16, 16, 16)
|
||||||
|
iadd r16.xyzw, r16.xyzw, r18.xyzw
|
||||||
|
ushr r17.xyzw, r7.xxxx, r10.xyzw
|
||||||
|
and r17.xyzw, r17.xyzw, l(1, 1, 1, 1)
|
||||||
|
imad r16.xyzw, r17.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000), r16.xyzw
|
||||||
|
ushr r17.xyzw, r2.yyyy, r10.xyzw
|
||||||
|
and r17.xyzw, r17.xyzw, l(1, 1, 1, 1)
|
||||||
|
ushr r18.xyzw, r2.yyyy, r15.xyzw
|
||||||
|
and r18.xyzw, r18.xyzw, l(1, 1, 1, 1)
|
||||||
|
imul null, r18.xyzw, r5.yyyy, r18.xyzw
|
||||||
|
imad r17.xyzw, r17.xyzw, r4.yyyy, r18.xyzw
|
||||||
|
ushr r18.xyzw, r8.yyyy, r10.xyzw
|
||||||
|
and r19.xyzw, r18.xyzw, l(1, 1, 1, 1)
|
||||||
|
ubfe r19.xyzw, l(9, 9, 9, 9), r19.xyzw, r17.xyzw
|
||||||
|
bfi r20.xyzw, l(1, 1, 1, 1), l(0, 0, 0, 0), r18.xyzw, l(10, 10, 10, 10)
|
||||||
|
ubfe r20.xyzw, l(9, 9, 9, 9), r20.xyzw, r17.xyzw
|
||||||
|
ishl r20.xyzw, r20.xyzw, l(8, 8, 8, 8)
|
||||||
|
iadd r19.xyzw, r19.xyzw, r20.xyzw
|
||||||
|
bfi r18.xyzw, l(2, 2, 2, 2), l(0, 0, 0, 0), r18.xyzw, l(20, 20, 20, 20)
|
||||||
|
ushr r17.xyzw, r17.xyzw, r18.xyzw
|
||||||
|
ishl r17.xyzw, r17.xyzw, l(16, 16, 16, 16)
|
||||||
|
iadd r17.xyzw, r17.xyzw, r19.xyzw
|
||||||
|
ushr r18.xyzw, r7.yyyy, r10.xyzw
|
||||||
|
and r18.xyzw, r18.xyzw, l(1, 1, 1, 1)
|
||||||
|
imad r17.xyzw, r18.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000), r17.xyzw
|
||||||
|
ushr r18.xyzw, r2.zzzz, r10.xyzw
|
||||||
|
and r18.xyzw, r18.xyzw, l(1, 1, 1, 1)
|
||||||
|
ushr r19.xyzw, r2.zzzz, r15.xyzw
|
||||||
|
and r19.xyzw, r19.xyzw, l(1, 1, 1, 1)
|
||||||
|
imul null, r19.xyzw, r5.zzzz, r19.xyzw
|
||||||
|
imad r18.xyzw, r18.xyzw, r4.zzzz, r19.xyzw
|
||||||
|
ushr r19.xyzw, r8.zzzz, r10.xyzw
|
||||||
|
and r20.xyzw, r19.xyzw, l(1, 1, 1, 1)
|
||||||
|
ubfe r20.xyzw, l(9, 9, 9, 9), r20.xyzw, r18.xyzw
|
||||||
|
bfi r21.xyzw, l(1, 1, 1, 1), l(0, 0, 0, 0), r19.xyzw, l(10, 10, 10, 10)
|
||||||
|
ubfe r21.xyzw, l(9, 9, 9, 9), r21.xyzw, r18.xyzw
|
||||||
|
ishl r21.xyzw, r21.xyzw, l(8, 8, 8, 8)
|
||||||
|
iadd r20.xyzw, r20.xyzw, r21.xyzw
|
||||||
|
bfi r19.xyzw, l(2, 2, 2, 2), l(0, 0, 0, 0), r19.xyzw, l(20, 20, 20, 20)
|
||||||
|
ushr r18.xyzw, r18.xyzw, r19.xyzw
|
||||||
|
ishl r18.xyzw, r18.xyzw, l(16, 16, 16, 16)
|
||||||
|
iadd r18.xyzw, r18.xyzw, r20.xyzw
|
||||||
|
ushr r19.xyzw, r7.zzzz, r10.xyzw
|
||||||
|
and r19.xyzw, r19.xyzw, l(1, 1, 1, 1)
|
||||||
|
imad r18.xyzw, r19.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000), r18.xyzw
|
||||||
|
ushr r19.xyzw, r2.wwww, r10.xyzw
|
||||||
|
and r19.xyzw, r19.xyzw, l(1, 1, 1, 1)
|
||||||
|
ushr r15.xyzw, r2.wwww, r15.xyzw
|
||||||
|
and r15.xyzw, r15.xyzw, l(1, 1, 1, 1)
|
||||||
|
imul null, r15.xyzw, r5.wwww, r15.xyzw
|
||||||
|
imad r15.xyzw, r19.xyzw, r4.wwww, r15.xyzw
|
||||||
|
ushr r19.xyzw, r8.wwww, r10.xyzw
|
||||||
|
and r20.xyzw, r19.xyzw, l(1, 1, 1, 1)
|
||||||
|
ubfe r20.xyzw, l(9, 9, 9, 9), r20.xyzw, r15.xyzw
|
||||||
|
bfi r21.xyzw, l(1, 1, 1, 1), l(0, 0, 0, 0), r19.xyzw, l(10, 10, 10, 10)
|
||||||
|
ubfe r21.xyzw, l(9, 9, 9, 9), r21.xyzw, r15.xyzw
|
||||||
|
ishl r21.xyzw, r21.xyzw, l(8, 8, 8, 8)
|
||||||
|
iadd r20.xyzw, r20.xyzw, r21.xyzw
|
||||||
|
bfi r19.xyzw, l(2, 2, 2, 2), l(0, 0, 0, 0), r19.xyzw, l(20, 20, 20, 20)
|
||||||
|
ushr r15.xyzw, r15.xyzw, r19.xyzw
|
||||||
|
ishl r15.xyzw, r15.xyzw, l(16, 16, 16, 16)
|
||||||
|
iadd r15.xyzw, r15.xyzw, r20.xyzw
|
||||||
|
ushr r10.xyzw, r7.wwww, r10.xyzw
|
||||||
|
and r10.xyzw, r10.xyzw, l(1, 1, 1, 1)
|
||||||
|
imad r10.xyzw, r10.xyzw, l(0xff000000, 0xff000000, 0xff000000, 0xff000000), r15.xyzw
|
||||||
|
movc r11.xyzw, r1.xxxx, r16.xyzw, r11.xyzw
|
||||||
|
store_raw U0[0].xyzw, r9.x, r11.xyzw
|
||||||
|
iadd r11.xyz, r9.xxxx, l(16, 32, 48, 0)
|
||||||
|
movc r12.xyzw, r1.yyyy, r17.xyzw, r12.xyzw
|
||||||
|
store_raw U0[0].xyzw, r11.x, r12.xyzw
|
||||||
|
movc r12.xyzw, r1.zzzz, r18.xyzw, r13.xyzw
|
||||||
|
store_raw U0[0].xyzw, r11.y, r12.xyzw
|
||||||
|
movc r10.xyzw, r1.wwww, r10.xyzw, r14.xyzw
|
||||||
|
store_raw U0[0].xyzw, r11.z, r10.xyzw
|
||||||
|
iadd r0.w, r0.w, l(1)
|
||||||
|
uge r9.z, r0.w, r0.z
|
||||||
|
if_nz r9.z
|
||||||
|
ret
|
||||||
|
endif
|
||||||
|
iadd r9.x, r9.x, CB0[0][0].w
|
||||||
|
iadd r9.y, r9.y, l(1)
|
||||||
|
endloop
|
||||||
|
ret
|
||||||
|
// Approximately 360 instruction slots used
|
|
@ -73,4 +73,74 @@ uint4 XeFloat20e4To32(uint4 f24u32) {
|
||||||
return (((exponent + 112u) << 23u) | (mantissa << 3u)) * uint4(f24u32 != 0u);
|
return (((exponent + 112u) << 23u) | (mantissa << 3u)) * uint4(f24u32 != 0u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sorts the color indices of four DXT3/DXT5 or DXT1 opaque blocks so they can
|
||||||
|
// be used as the weights for the second endpoint, from 0 to 3. To get the
|
||||||
|
// weights for the first endpoint, apply bitwise NOT to the result.
|
||||||
|
uint4 XeDXTHighColorWeights(uint4 codes) {
|
||||||
|
// Initially 00 = 3:0, 01 = 0:3, 10 = 2:1, 11 = 1:2.
|
||||||
|
// Swap bits. 00 = 3:0, 01 = 2:1, 10 = 0:3, 11 = 1:2.
|
||||||
|
codes = ((codes & 0x55555555u) << 1u) | ((codes & 0xAAAAAAAAu) >> 1u);
|
||||||
|
// Swap 10 and 11. 00 = 3:0, 01 = 2:1, 10 = 1:2, 11 = 0:3.
|
||||||
|
return codes ^ ((codes & 0xAAAAAAAAu) >> 1u);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Converts endpoint RGB (first in the low 16 bits, second in the high) of four
|
||||||
|
// DXT blocks to 8-bit, with 2 unused bits between each component to allow for
|
||||||
|
// overflow when multiplying by values up to 3 (so multiplication can be done
|
||||||
|
// for all components at once).
|
||||||
|
void XeDXTColorEndpointsTo8In10(uint4 rgb_565, out uint4 rgb_10b_low,
|
||||||
|
out uint4 rgb_10b_high) {
|
||||||
|
// Converting 5:6:5 to 8:8:8 similar to how Compressonator does that.
|
||||||
|
// https://github.com/GPUOpen-Tools/Compressonator/blob/master/Compressonator/Source/Codec/DXTC/Codec_DXTC_RGBA.cpp#L429
|
||||||
|
rgb_10b_low = ((rgb_565 & 31u) << 23u) |
|
||||||
|
((rgb_565 & (7u << 2u)) << (20u - 2u)) |
|
||||||
|
((rgb_565 & (63u << 5u)) << (12u - 5u)) |
|
||||||
|
((rgb_565 & (3u << 9u)) << (10u - 9u)) |
|
||||||
|
((rgb_565 & (31u << 11u)) >> (11u - 3u)) |
|
||||||
|
((rgb_565 & (7u << 13u)) >> 13u);
|
||||||
|
rgb_10b_high = ((rgb_565 & (31u << 16u)) << (23u - 16u)) |
|
||||||
|
((rgb_565 & (7u << 18u)) << (20u - 18u)) |
|
||||||
|
((rgb_565 & (63u << 21u)) >> (21u - 12u)) |
|
||||||
|
((rgb_565 & (3u << 25u)) >> (25u - 10u)) |
|
||||||
|
((rgb_565 & (31u << 27u)) >> (27u - 3u)) |
|
||||||
|
((rgb_565 & (7u << 29u)) >> 29u);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gets the colors of one row of four DXT opaque blocks. Endpoint colors can be
|
||||||
|
// obtained using XeDXTColorEndpointsTo8In10 (8 bits with 2 bits of free space
|
||||||
|
// between each), weights can be obtained using XeDXTHighColorWeights. Alpha is
|
||||||
|
// set to 0 in the result. weights_shift is 0 for the first row, 8 for the
|
||||||
|
// second, 16 for the third, and 24 for the fourth.
|
||||||
|
void XeDXTFourBlocksRowToRGB8(uint4 rgb_10b_low, uint4 rgb_10b_high,
|
||||||
|
uint4 weights_high, uint weights_shift,
|
||||||
|
out uint4 row_0, out uint4 row_1,
|
||||||
|
out uint4 row_2, out uint4 row_3) {
|
||||||
|
uint4 weights_low = ~weights_high;
|
||||||
|
uint4 weights_shifts = weights_shift + uint4(0u, 2u, 4u, 6u);
|
||||||
|
uint4 block_row_10b_3x =
|
||||||
|
((weights_low.xxxx >> weights_shifts) & 3u) * rgb_10b_low.x +
|
||||||
|
((weights_high.xxxx >> weights_shifts) & 3u) * rgb_10b_high.x;
|
||||||
|
row_0 = ((block_row_10b_3x & 1023u) / 3u) |
|
||||||
|
((((block_row_10b_3x >> 10u) & 1023u) / 3u) << 8u) |
|
||||||
|
(((block_row_10b_3x >> 20u) / 3u) << 16u);
|
||||||
|
block_row_10b_3x =
|
||||||
|
((weights_low.yyyy >> weights_shifts) & 3u) * rgb_10b_low.y +
|
||||||
|
((weights_high.yyyy >> weights_shifts) & 3u) * rgb_10b_high.y;
|
||||||
|
row_1 = ((block_row_10b_3x & 1023u) / 3u) |
|
||||||
|
((((block_row_10b_3x >> 10u) & 1023u) / 3u) << 8u) |
|
||||||
|
(((block_row_10b_3x >> 20u) / 3u) << 16u);
|
||||||
|
block_row_10b_3x =
|
||||||
|
((weights_low.zzzz >> weights_shifts) & 3u) * rgb_10b_low.z +
|
||||||
|
((weights_high.zzzz >> weights_shifts) & 3u) * rgb_10b_high.z;
|
||||||
|
row_2 = ((block_row_10b_3x & 1023u) / 3u) |
|
||||||
|
((((block_row_10b_3x >> 10u) & 1023u) / 3u) << 8u) |
|
||||||
|
(((block_row_10b_3x >> 20u) / 3u) << 16u);
|
||||||
|
block_row_10b_3x =
|
||||||
|
((weights_low.wwww >> weights_shifts) & 3u) * rgb_10b_low.w +
|
||||||
|
((weights_high.wwww >> weights_shifts) & 3u) * rgb_10b_high.w;
|
||||||
|
row_3 = ((block_row_10b_3x & 1023u) / 3u) |
|
||||||
|
((((block_row_10b_3x >> 10u) & 1023u) / 3u) << 8u) |
|
||||||
|
(((block_row_10b_3x >> 20u) / 3u) << 16u);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_
|
#endif // XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "pixel_formats.hlsli"
|
||||||
#include "texture_copy.hlsli"
|
#include "texture_copy.hlsli"
|
||||||
|
|
||||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||||
|
@ -13,11 +14,11 @@
|
||||||
// II JJ KK LL
|
// II JJ KK LL
|
||||||
// MM NN OO PP
|
// MM NN OO PP
|
||||||
|
|
||||||
void XeCTX1FourBlocksRowToR8G8(uint4 weights_high, uint weights_shift,
|
void XeCTX1FourBlocksRowToR8G8(uint4 end_low_rr00gg00, uint4 end_high_rr00gg00,
|
||||||
uint4 end_low_rr00gg00, uint4 end_high_rr00gg00,
|
uint4 weights_high, uint weights_shift,
|
||||||
out uint4 row_01, out uint4 row_23) {
|
out uint4 row_01, out uint4 row_23) {
|
||||||
uint4 weights_low = ~weights_high;
|
uint4 weights_low = ~weights_high;
|
||||||
uint4 weights_shifts = uint4(0u, 2u, 4u, 6u) + weights_shift;
|
uint4 weights_shifts = weights_shift + uint4(0u, 2u, 4u, 6u);
|
||||||
uint4 row_3aaaa =
|
uint4 row_3aaaa =
|
||||||
((weights_low >> weights_shifts.x) & 3u) * end_low_rr00gg00 +
|
((weights_low >> weights_shifts.x) & 3u) * end_low_rr00gg00 +
|
||||||
((weights_high >> weights_shifts.x) & 3u) * end_high_rr00gg00;
|
((weights_high >> weights_shifts.x) & 3u) * end_high_rr00gg00;
|
||||||
|
@ -63,15 +64,6 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness);
|
blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness);
|
||||||
blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness);
|
blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness);
|
||||||
|
|
||||||
// Sort the color indices so they can be used as weights for the second
|
|
||||||
// endpoint. Initially 00 = 3:0, 01 = 0:3, 10 = 2:1, 11 = 1:2.
|
|
||||||
uint4 weights_high = uint4(blocks_01.yw, blocks_23.yw);
|
|
||||||
// Swap bits. 00 = 3:0, 01 = 2:1, 10 = 0:3, 11 = 1:2.
|
|
||||||
weights_high = ((weights_high & 0x55555555u) << 1u) |
|
|
||||||
((weights_high & 0xAAAAAAAAu) >> 1u);
|
|
||||||
// Swap 10 and 11. 00 = 3:0, 01 = 2:1, 10 = 1:2, 11 = 0:3.
|
|
||||||
weights_high ^= ((weights_high & 0xAAAAAAAAu) >> 1u);
|
|
||||||
|
|
||||||
// Unpack the endpoints as:
|
// Unpack the endpoints as:
|
||||||
// 0x00g000r0 0x00g100r1 0x00g200r2 0x00g300r3
|
// 0x00g000r0 0x00g100r1 0x00g200r2 0x00g300r3
|
||||||
// 0x00G000R0 0x00G100R1 0x00G200R2 0x00G300R3
|
// 0x00G000R0 0x00G100R1 0x00G200R2 0x00G300R3
|
||||||
|
@ -82,6 +74,10 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
uint4 end_high_rr00gg00 =
|
uint4 end_high_rr00gg00 =
|
||||||
((end_packed & 0xFF0000u) >> 16u) | ((end_packed & 0xFF000000u) >> 8u);
|
((end_packed & 0xFF0000u) >> 16u) | ((end_packed & 0xFF000000u) >> 8u);
|
||||||
|
|
||||||
|
// Sort the color indices so they can be used as weights for the second
|
||||||
|
// endpoint.
|
||||||
|
uint4 weights_high = XeDXTHighColorWeights(uint4(blocks_01.yw, blocks_23.yw));
|
||||||
|
|
||||||
// Uncompress and write the rows.
|
// Uncompress and write the rows.
|
||||||
uint3 texel_index_host = block_index << uint3(2u, 2u, 0u);
|
uint3 texel_index_host = block_index << uint3(2u, 2u, 0u);
|
||||||
uint texel_offset_host = XeTextureHostLinearOffset(
|
uint texel_offset_host = XeTextureHostLinearOffset(
|
||||||
|
@ -89,8 +85,8 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
xe_texture_copy_host_pitch, 2u) + xe_texture_copy_host_base;
|
xe_texture_copy_host_pitch, 2u) + xe_texture_copy_host_base;
|
||||||
for (uint i = 0u; i < 4u; ++i) {
|
for (uint i = 0u; i < 4u; ++i) {
|
||||||
uint4 row_01, row_23;
|
uint4 row_01, row_23;
|
||||||
XeCTX1FourBlocksRowToR8G8(weights_high, i * 8u, end_low_rr00gg00,
|
XeCTX1FourBlocksRowToR8G8(end_low_rr00gg00, end_high_rr00gg00, weights_high,
|
||||||
end_high_rr00gg00, row_01, row_23);
|
i * 8u, row_01, row_23);
|
||||||
xe_texture_copy_dest.Store4(texel_offset_host, row_01);
|
xe_texture_copy_dest.Store4(texel_offset_host, row_01);
|
||||||
xe_texture_copy_dest.Store4(texel_offset_host + 16u, row_23);
|
xe_texture_copy_dest.Store4(texel_offset_host + 16u, row_23);
|
||||||
if (++texel_index_host.y >= xe_texture_copy_size_texels.y) {
|
if (++texel_index_host.y >= xe_texture_copy_size_texels.y) {
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
#include "pixel_formats.hlsli"
|
||||||
|
#include "texture_copy.hlsli"
|
||||||
|
|
||||||
|
void XeDXT1FourTransBlocksRowToRGBA8(uint4 rgb_10b_low, uint4 rgb_10b_high,
|
||||||
|
uint4 weights, uint4 weights_shift,
|
||||||
|
out uint4 row_0, out uint4 row_1,
|
||||||
|
out uint4 row_2, out uint4 row_3) {
|
||||||
|
uint4 weights_shifts_low = weights_shift + uint4(0u, 2u, 4u, 6u);
|
||||||
|
uint4 weights_shifts_high = weights_shifts_low + 1u;
|
||||||
|
// Whether the texel is (RGB0+RGB1)/2 - divide the weighted sum by 2 (shift
|
||||||
|
// right by 1) if it is.
|
||||||
|
uint4 weights_sums_log2 = weights & ((weights & 0xAAAAAAAAu) >> 1u);
|
||||||
|
// Whether the texel is opaque.
|
||||||
|
uint4 weights_alpha =
|
||||||
|
(weights & 0x55555555u) | ((weights & 0xAAAAAAAAu) >> 1u);
|
||||||
|
uint4 block_rgb_10b =
|
||||||
|
((weights.xxxx >> weights_shifts_low) & 1u) * rgb_10b_low.x +
|
||||||
|
((weights.xxxx >> weights_shifts_high) & 1u) * rgb_10b_high.x;
|
||||||
|
uint4 block_rgb_shift = (weights_sums_log2.xxxx >> weights_shifts_low) & 1u;
|
||||||
|
row_0 = ((block_rgb_10b & 1023u) >> block_rgb_shift) +
|
||||||
|
((((block_rgb_10b >> 10u) & 1023u) >> block_rgb_shift) << 8u) +
|
||||||
|
(((block_rgb_10b >> 20u) >> block_rgb_shift) << 16u) +
|
||||||
|
(((weights_alpha.xxxx >> weights_shifts_low) & 1u) * 0xFF000000u);
|
||||||
|
block_rgb_10b =
|
||||||
|
((weights.yyyy >> weights_shifts_low) & 1u) * rgb_10b_low.y +
|
||||||
|
((weights.yyyy >> weights_shifts_high) & 1u) * rgb_10b_high.y;
|
||||||
|
block_rgb_shift = (weights_sums_log2.yyyy >> weights_shifts_low) & 1u;
|
||||||
|
row_1 = ((block_rgb_10b & 1023u) >> block_rgb_shift) +
|
||||||
|
((((block_rgb_10b >> 10u) & 1023u) >> block_rgb_shift) << 8u) +
|
||||||
|
(((block_rgb_10b >> 20u) >> block_rgb_shift) << 16u) +
|
||||||
|
(((weights_alpha.yyyy >> weights_shifts_low) & 1u) * 0xFF000000u);
|
||||||
|
block_rgb_10b =
|
||||||
|
((weights.zzzz >> weights_shifts_low) & 1u) * rgb_10b_low.z +
|
||||||
|
((weights.zzzz >> weights_shifts_high) & 1u) * rgb_10b_high.z;
|
||||||
|
block_rgb_shift = (weights_sums_log2.zzzz >> weights_shifts_low) & 1u;
|
||||||
|
row_2 = ((block_rgb_10b & 1023u) >> block_rgb_shift) +
|
||||||
|
((((block_rgb_10b >> 10u) & 1023u) >> block_rgb_shift) << 8u) +
|
||||||
|
(((block_rgb_10b >> 20u) >> block_rgb_shift) << 16u) +
|
||||||
|
(((weights_alpha.zzzz >> weights_shifts_low) & 1u) * 0xFF000000u);
|
||||||
|
block_rgb_10b =
|
||||||
|
((weights.wwww >> weights_shifts_low) & 1u) * rgb_10b_low.w +
|
||||||
|
((weights.wwww >> weights_shifts_high) & 1u) * rgb_10b_high.w;
|
||||||
|
block_rgb_shift = (weights_sums_log2.wwww >> weights_shifts_low) & 1u;
|
||||||
|
row_3 = ((block_rgb_10b & 1023u) >> block_rgb_shift) +
|
||||||
|
((((block_rgb_10b >> 10u) & 1023u) >> block_rgb_shift) << 8u) +
|
||||||
|
(((block_rgb_10b >> 20u) >> block_rgb_shift) << 16u) +
|
||||||
|
(((weights_alpha.wwww >> weights_shifts_low) & 1u) * 0xFF000000u);
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(8, 32, 1)]
|
||||||
|
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
|
// 1 thread = 4 DXT1 (8bpb) blocks to 16x4 R8G8B8A8 texels.
|
||||||
|
uint3 block_index = xe_thread_id;
|
||||||
|
block_index.x <<= 2u;
|
||||||
|
[branch] if (any(block_index >= xe_texture_copy_size_blocks)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint4 block_offsets_guest =
|
||||||
|
XeTextureCopyGuestBlockOffsets(block_index, 8u, 3u);
|
||||||
|
uint4 blocks_01 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.x),
|
||||||
|
xe_texture_copy_source.Load2(block_offsets_guest.y));
|
||||||
|
uint4 blocks_23 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.z),
|
||||||
|
xe_texture_copy_source.Load2(block_offsets_guest.w));
|
||||||
|
blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness);
|
||||||
|
blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness);
|
||||||
|
|
||||||
|
uint4 codes = uint4(blocks_01.yw, blocks_23.yw);
|
||||||
|
// Sort the color indices so they can be used as weights for the second
|
||||||
|
// endpoint in the opaque mode.
|
||||||
|
uint4 weights_opaque_high = XeDXTHighColorWeights(codes);
|
||||||
|
// Sort the color indices so bits of them can be used as endpoint weights, and
|
||||||
|
// AND of those bits can be used as the right shift amount for mixing the two
|
||||||
|
// colors in the punchthrough mode.
|
||||||
|
// Initially 00 = 1:0, 01 = 0:1, 10 = 1:1, 11 = 0:0.
|
||||||
|
// 00 = 0:0, 01 = 1:1, 10 = 0:1, 11 = 1:0.
|
||||||
|
uint4 weights_trans = ~codes;
|
||||||
|
// 00 = 0:0, 01 = 1:0, 10 = 0:1, 11 = 1:1.
|
||||||
|
weights_trans ^= (weights_trans & 0x55555555u) << 1u;
|
||||||
|
|
||||||
|
// Get endpoint RGB for mixing, as 8-bit components in 10-bit sequences.
|
||||||
|
uint4 rgb_565 = uint4(blocks_01.xz, blocks_23.xz);
|
||||||
|
uint4 rgb_10b_low, rgb_10b_high;
|
||||||
|
XeDXTColorEndpointsTo8In10(rgb_565, rgb_10b_low, rgb_10b_high);
|
||||||
|
|
||||||
|
// Get modes for each block.
|
||||||
|
bool4 is_trans = (rgb_565 & 0xFFFFu) <= (rgb_565 >> 16u);
|
||||||
|
|
||||||
|
// Uncompress and write the rows.
|
||||||
|
uint3 texel_index_host = block_index << uint3(2u, 2u, 0u);
|
||||||
|
uint texel_offset_host = XeTextureHostLinearOffset(
|
||||||
|
texel_index_host, xe_texture_copy_size_texels.y,
|
||||||
|
xe_texture_copy_host_pitch, 4u) + xe_texture_copy_host_base;
|
||||||
|
for (uint i = 0u; i < 4u; ++i) {
|
||||||
|
uint4 row_opaque_0, row_opaque_1, row_opaque_2, row_opaque_3;
|
||||||
|
XeDXTFourBlocksRowToRGB8(rgb_10b_low, rgb_10b_high, weights_opaque_high,
|
||||||
|
i * 8u, row_opaque_0, row_opaque_1, row_opaque_2,
|
||||||
|
row_opaque_3);
|
||||||
|
row_opaque_0 |= 0xFF000000u;
|
||||||
|
row_opaque_1 |= 0xFF000000u;
|
||||||
|
row_opaque_2 |= 0xFF000000u;
|
||||||
|
row_opaque_3 |= 0xFF000000u;
|
||||||
|
uint4 row_trans_0, row_trans_1, row_trans_2, row_trans_3;
|
||||||
|
XeDXT1FourTransBlocksRowToRGBA8(rgb_10b_low, rgb_10b_high, weights_trans,
|
||||||
|
i * 8u, row_trans_0, row_trans_1,
|
||||||
|
row_trans_2, row_trans_3);
|
||||||
|
xe_texture_copy_dest.Store4(texel_offset_host,
|
||||||
|
is_trans.x ? row_trans_0 : row_opaque_0);
|
||||||
|
xe_texture_copy_dest.Store4(texel_offset_host + 16u,
|
||||||
|
is_trans.y ? row_trans_1 : row_opaque_1);
|
||||||
|
xe_texture_copy_dest.Store4(texel_offset_host + 32u,
|
||||||
|
is_trans.z ? row_trans_2 : row_opaque_2);
|
||||||
|
xe_texture_copy_dest.Store4(texel_offset_host + 48u,
|
||||||
|
is_trans.w ? row_trans_3 : row_opaque_3);
|
||||||
|
if (++texel_index_host.y >= xe_texture_copy_size_texels.y) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
texel_offset_host += xe_texture_copy_host_pitch;
|
||||||
|
}
|
||||||
|
}
|
|
@ -34,6 +34,7 @@ namespace d3d12 {
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.h"
|
||||||
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_32bpp_cs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_32bpp_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_64bpp_cs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_64bpp_cs.h"
|
||||||
|
@ -180,6 +181,7 @@ const TextureCache::LoadModeInfo TextureCache::load_mode_info_[] = {
|
||||||
{texture_load_32bpb_cs, sizeof(texture_load_32bpb_cs)},
|
{texture_load_32bpb_cs, sizeof(texture_load_32bpb_cs)},
|
||||||
{texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)},
|
{texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)},
|
||||||
{texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)},
|
{texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)},
|
||||||
|
{texture_load_dxt1_rgba8_cs, sizeof(texture_load_dxt1_rgba8_cs)},
|
||||||
{texture_load_dxt3a_cs, sizeof(texture_load_dxt3a_cs)},
|
{texture_load_dxt3a_cs, sizeof(texture_load_dxt3a_cs)},
|
||||||
{texture_load_ctx1_cs, sizeof(texture_load_ctx1_cs)},
|
{texture_load_ctx1_cs, sizeof(texture_load_ctx1_cs)},
|
||||||
{texture_load_depth_unorm_cs, sizeof(texture_load_depth_unorm_cs)},
|
{texture_load_depth_unorm_cs, sizeof(texture_load_depth_unorm_cs)},
|
||||||
|
|
|
@ -99,6 +99,7 @@ class TextureCache {
|
||||||
k32bpb,
|
k32bpb,
|
||||||
k64bpb,
|
k64bpb,
|
||||||
k128bpb,
|
k128bpb,
|
||||||
|
kDXT1AsRGBA8,
|
||||||
kDXT3A,
|
kDXT3A,
|
||||||
kCTX1,
|
kCTX1,
|
||||||
kDepthUnorm,
|
kDepthUnorm,
|
||||||
|
|
Loading…
Reference in New Issue