diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index a734f3421..c5da88c0c 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -1,6 +1,7 @@ #include "_vmem.h" #include "hw/aica/aica_if.h" #include "hw/pvr/pvr_mem.h" +#include "hw/pvr/elan.h" #include "hw/sh4/dyna/blockmanager.h" #include "hw/sh4/sh4_mem.h" #include "oslib/oslib.h" @@ -24,6 +25,11 @@ static _vmem_WriteMem32FP* _vmem_WF32[HANDLER_COUNT]; //upper 8b of the address static void* _vmem_MemInfo_ptr[0x100]; +#define MAP_RAM_START_OFFSET 0 +#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) +#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) +#define MAP_ERAM_START_OFFSET (MAP_ARAM_START_OFFSET+ARAM_SIZE) + void* _vmem_read_const(u32 addr,bool& ismem,u32 sz) { u32 page=addr>>24; @@ -417,7 +423,8 @@ static void _vmem_set_p0_mappings() {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 + {0x08000000, 0x0A000000, 0, 0, false}, // Area 2 + {0x0A000000, 0x0C000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) {0x10000000, 0x80000000, 0, 0, false}, // Area 4-7 (unused) }; @@ -434,7 +441,7 @@ bool _vmem_reserve() // Use vmem only if settings mandate so, and if we have proper exception handlers. #if !defined(TARGET_NO_EXCEPTIONS) if (!settings.dynarec.disable_nvmem) - vmemstatus = vmem_platform_init((void**)&virt_ram_base, (void**)&p_sh4rcb); + vmemstatus = vmem_platform_init((void**)&virt_ram_base, (void**)&p_sh4rcb, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX + elan::ELAN_RAM_SIZE); #endif return true; } @@ -451,6 +458,8 @@ static void _vmem_term_mappings() vram.data = nullptr; free_pages(aica_ram.data); aica_ram.data = nullptr; + free_pages(elan::RAM); + elan::RAM = nullptr; } } @@ -478,6 +487,8 @@ void _vmem_init_mappings() aica_ram.size = ARAM_SIZE; aica_ram.data = (u8*)malloc_pages(ARAM_SIZE); + + elan::RAM = (u8*)malloc_pages(elan::ELAN_RAM_SIZE); } else { NOTICE_LOG(VMEM, "Info: nvmem is enabled, with addr space of size %s", vmemstatus == MemType4GB ? "4GB" : "512MB"); @@ -493,7 +504,8 @@ void _vmem_init_mappings() {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 + {0x08000000, 0x0A000000, 0, 0, false}, // Area 2 + {0x0A000000, 0x0C000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) // This is outside of the 512MB addr space. We map 8MB in all cases to help some games read past the end of aica ram @@ -505,6 +517,7 @@ void _vmem_init_mappings() aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writable AICA addrspace vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writable and lockable) mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror + elan::RAM = &virt_ram_base[0x0A000000]; } else { @@ -520,7 +533,8 @@ void _vmem_init_mappings() {0x85000000, 0x86000000, 0, 0, false}, // 32 bit path (unused) {0x86000000, 0x87000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror {0x87000000, 0x88000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x88000000, 0x8C000000, 0, 0, false}, // Area 2 + {0x88000000, 0x8A000000, 0, 0, false}, // Area 2 + {0x8A000000, 0x8C000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) {0x8C000000, 0x90000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) {0x90000000, 0xA0000000, 0, 0, false}, // Area 4-7 (unused) // P2 @@ -533,7 +547,8 @@ void _vmem_init_mappings() {0xA5000000, 0xA6000000, 0, 0, false}, // 32 bit path (unused) {0xA6000000, 0xA7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror {0xA7000000, 0xA8000000, 0, 0, false}, // 32 bit path (unused) mirror - {0xA8000000, 0xAC000000, 0, 0, false}, // Area 2 + {0xA8000000, 0xAA000000, 0, 0, false}, // Area 2 + {0xAA000000, 0xAC000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) {0xAC000000, 0xB0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) {0xB0000000, 0xC0000000, 0, 0, false}, // Area 4-7 (unused) // P3 @@ -546,7 +561,8 @@ void _vmem_init_mappings() {0xC5000000, 0xC6000000, 0, 0, false}, // 32 bit path (unused) {0xC6000000, 0xC7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror {0xC7000000, 0xC8000000, 0, 0, false}, // 32 bit path (unused) mirror - {0xC8000000, 0xCC000000, 0, 0, false}, // Area 2 + {0xC8000000, 0xCA000000, 0, 0, false}, // Area 2 + {0xCA000000, 0xCC000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) {0xCC000000, 0xD0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) {0xD0000000, 0x100000000L, 0, 0, false}, // Area 4-7 (unused) }; @@ -556,6 +572,7 @@ void _vmem_init_mappings() aica_ram.data = &virt_ram_base[0x80800000]; // Points to the first AICA addrspace in P1 vram.data = &virt_ram_base[0x84000000]; // Points to first vram mirror (writable and lockable) in P1 mem_b.data = &virt_ram_base[0x8C000000]; // Main memory, first mirror in P1 + elan::RAM = &virt_ram_base[0x8A000000]; vmem_4gb_space = true; } diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index a6c7d8fc3..bccb03f9f 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -15,7 +15,7 @@ struct vmem_mapping { // Platform specific vmemory API // To initialize (maybe) the vmem subsystem -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr); +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize); // To reset the on-demand allocated pages. void vmem_platform_reset_mem(void *ptr, unsigned size_bytes); // To handle a fault&allocate an ondemand page. @@ -109,10 +109,6 @@ static inline bool _nvmem_4gb_space() { } void _vmem_bm_reset(); -#define MAP_RAM_START_OFFSET 0 -#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) -#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) - void _vmem_protect_vram(u32 addr, u32 size); void _vmem_unprotect_vram(u32 addr, u32 size); u32 _vmem_get_vram_offset(void *addr); diff --git a/core/hw/mem/mem_watch.cpp b/core/hw/mem/mem_watch.cpp index bbc47f5a9..321906f58 100644 --- a/core/hw/mem/mem_watch.cpp +++ b/core/hw/mem/mem_watch.cpp @@ -24,6 +24,7 @@ namespace memwatch VramWatcher vramWatcher; RamWatcher ramWatcher; AicaRamWatcher aramWatcher; +ElanRamWatcher elanWatcher; void AicaRamWatcher::protectMem(u32 addr, u32 size) { @@ -107,5 +108,63 @@ u32 AicaRamWatcher::getMemOffset(void *p) return addr; } +void ElanRamWatcher::protectMem(u32 addr, u32 size) +{ + using namespace elan; + size = std::min(ELAN_RAM_SIZE - addr, size) & ~PAGE_MASK; + if (_nvmem_enabled()) + { + mem_region_lock(virt_ram_base + 0x0a000000 + addr, size); // P0 + if (_nvmem_4gb_space()) + { + mem_region_lock(virt_ram_base + 0x8a000000 + addr, size); // P1 + mem_region_lock(virt_ram_base + 0xaa000000 + addr, size); // P2 + } + } else { + mem_region_lock(RAM + addr, size); + } +} + +void ElanRamWatcher::unprotectMem(u32 addr, u32 size) +{ + using namespace elan; + size = std::min(ELAN_RAM_SIZE - addr, size) & ~PAGE_MASK; + if (_nvmem_enabled()) + { + mem_region_unlock(virt_ram_base + 0x0a000000 + addr, size); // P0 + if (_nvmem_4gb_space()) + { + mem_region_unlock(virt_ram_base + 0x8a000000 + addr, size); // P1 + mem_region_unlock(virt_ram_base + 0xaa000000 + addr, size); // P2 + } + } else { + mem_region_unlock(RAM + addr, size); + } +} + +u32 ElanRamWatcher::getMemOffset(void *p) +{ + using namespace elan; + u32 addr; + if (_nvmem_enabled()) + { + if ((u8 *)p < virt_ram_base || (u8 *)p >= virt_ram_base + 0x100000000L) + return -1; + addr = (u32)((u8 *)p - virt_ram_base); + u32 area = (addr >> 29) & 7; + if (area != 0 && area != 4 && area != 5) // P0, P1 or P2 only + return -1; + addr &= 0x1fffffff; + if (addr < 0x0a000000 || addr >= 0x0a000000 + ELAN_RAM_SIZE) + return -1; + addr &= ~(ELAN_RAM_SIZE - 1); + } else { + if ((u8 *)p < RAM || (u8 *)p >= &RAM[ELAN_RAM_SIZE]) + return -1; + addr = (u32)((u8 *)p - RAM); + } + return addr; +} + } diff --git a/core/hw/mem/mem_watch.h b/core/hw/mem/mem_watch.h index c7c22c89c..c83010fd0 100644 --- a/core/hw/mem/mem_watch.h +++ b/core/hw/mem/mem_watch.h @@ -22,6 +22,7 @@ #include "hw/sh4/dyna/blockmanager.h" #include "hw/sh4/sh4_mem.h" #include "hw/pvr/pvr_mem.h" +#include "hw/pvr/elan.h" #include "rend/TexCache.h" #include #include @@ -148,9 +149,26 @@ public: } }; +class ElanRamWatcher : public Watcher +{ + friend class Watcher; + +protected: + void protectMem(u32 addr, u32 size); + void unprotectMem(u32 addr, u32 size); + u32 getMemOffset(void *p); + +public: + void *getMemPage(u32 addr) + { + return &elan::RAM[addr]; + } +}; + extern VramWatcher vramWatcher; extern RamWatcher ramWatcher; extern AicaRamWatcher aramWatcher; +extern ElanRamWatcher elanWatcher; inline static bool writeAccess(void *p) { @@ -166,6 +184,8 @@ inline static bool writeAccess(void *p) VramLockedWrite((u8 *)p); return true; } + if (settings.platform.isNaomi2() && elanWatcher.hit(p)) + return true; return aramWatcher.hit(p); } @@ -176,6 +196,7 @@ inline static void protect() vramWatcher.protect(); ramWatcher.protect(); aramWatcher.protect(); + elanWatcher.protect(); } inline static void reset() @@ -183,6 +204,7 @@ inline static void reset() vramWatcher.reset(); ramWatcher.reset(); aramWatcher.reset(); + elanWatcher.reset(); } } diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index 899daaefd..0ec8af409 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -71,8 +71,7 @@ static _vmem_handler elanRegHandler; static _vmem_handler elanCmdHandler; static _vmem_handler elanRamHandler; -static u8 *elanRAM; -constexpr u32 ELAN_RAM_SIZE = 32 * 1024 * 1024; +u8 *RAM; static u32 reg10; static u32 reg74; @@ -244,7 +243,7 @@ T DYNACALL read_elancmd(u32 addr) static GMP *curGmp; static glm::mat4x4 curMatrix; static float *taMVMatrix; -static glm::mat4x4 lightMatrix; +static float *taNormalMatrix; static glm::mat4 projectionMatrix; static float *taProjMatrix; static LightModel *curLightModel; @@ -295,35 +294,41 @@ struct State if (instance == Null) { taMVMatrix = nullptr; + taNormalMatrix = nullptr; envMapUOffset = 0.f; envMapVOffset = 0.f; return; } - InstanceMatrix *mat = (InstanceMatrix *)&elanRAM[instance]; - DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f", - -mat->tm00, -mat->tm01, -mat->tm02, -mat->mat03, - mat->tm10, mat->tm11, mat->tm12, mat->mat13, - mat->tm20, mat->tm21, mat->tm22, -mat->mat23, - mat->lm00, mat->lm01, mat->lm02, - mat->lm10, mat->lm11, mat->lm12); + InstanceMatrix *mat = (InstanceMatrix *)&RAM[instance]; + DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f\n %f %f %f", + -mat->tm00, -mat->tm10, -mat->tm20, -mat->tm30, + mat->tm01, mat->tm11, mat->tm21, mat->tm31, + -mat->tm02, -mat->tm12, -mat->tm22, -mat->tm32, + mat->lm00, mat->lm10, mat->lm20, + mat->lm01, mat->lm11, mat->lm21, + mat->lm02, mat->lm12, mat->lm22); curMatrix = glm::mat4x4{ - -mat->tm00, mat->tm10, mat->tm20, 0, - -mat->tm01, mat->tm11, mat->tm21, 0, - -mat->tm02, mat->tm12, mat->tm22, 0, - -mat->mat03, mat->mat13, -mat->mat23, 1 + -mat->tm00, mat->tm01, -mat->tm02, 0.f, + -mat->tm10, mat->tm11, -mat->tm12, 0.f, + -mat->tm20, mat->tm21, -mat->tm22, 0.f, + -mat->tm30, mat->tm31, -mat->tm32, 1.f }; - lightMatrix = glm::mat4x4{ - -mat->lm00, mat->lm10, mat->tm20, 0, - -mat->lm01, mat->lm11, mat->tm21, 0, - -mat->lm02, mat->lm12, mat->tm22, 0, - -mat->mat03, mat->mat13, -mat->mat23, 1 + glm::mat4x4 normalMatrix = glm::mat4x4{ + mat->lm00, mat->lm01, mat->lm02, 0.f, + mat->lm10, mat->lm11, mat->lm12, 0.f, + mat->lm20, mat->lm21, mat->lm22, 0.f, + -mat->tm30, mat->tm31, -mat->tm32, 1.f }; nearPlane = mat->_near; farPlane = mat->_far; envMapUOffset = mat->envMapU; envMapVOffset = mat->envMapV; taMVMatrix = ta_add_matrix(glm::value_ptr(curMatrix)); + if (normalMatrix != curMatrix) + taNormalMatrix = ta_add_matrix(glm::value_ptr(normalMatrix)); + else + taNormalMatrix = taMVMatrix; } void setProjectionMatrix(void *p) @@ -339,9 +344,12 @@ struct State taProjMatrix = nullptr; return; } - ProjMatrix *pm = (ProjMatrix *)&elanRAM[projMatrix]; + ProjMatrix *pm = (ProjMatrix *)&RAM[projMatrix]; DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f near %f far %f", pm->fx, pm->tx, pm->fy, pm->ty, nearPlane, farPlane); - + // fx = -m00 * w/2 + // tx = -m20 * w/2 + left + w/2 + // fy = -m11 * h/2 + // ty = -m21 * h/2 + top + h/2 projectionMatrix = glm::mat4( -pm->fx, 0, 0, 0, 0, pm->fy, 0, 0, @@ -362,7 +370,7 @@ struct State curGmp = nullptr; else { - curGmp = (GMP *)&elanRAM[gmp]; + curGmp = (GMP *)&RAM[gmp]; DEBUG_LOG(PVR, "GMP paramSelect %x clip %d", curGmp->paramSelect.full, curGmp->pcw.userClip); } } @@ -380,7 +388,7 @@ struct State curLightModel = nullptr; else { - curLightModel = (LightModel *)&elanRAM[lightModel]; + curLightModel = (LightModel *)&RAM[lightModel]; DEBUG_LOG(PVR, "Light model mask: diffuse %04x specular %04x, ambient base %08x offset %08x", curLightModel->diffuseMask0, curLightModel->specularMask0, curLightModel->ambientBase0, curLightModel->ambientOffset0); } @@ -400,7 +408,7 @@ struct State elan::curLights[lightId] = nullptr; return; } - PointLight *plight = (PointLight *)&elanRAM[lights[lightId]]; + PointLight *plight = (PointLight *)&RAM[lights[lightId]]; if (plight->pcw.parallelLight) { ParallelLight *light = (ParallelLight *)plight; @@ -447,10 +455,10 @@ struct State static u32 elanRamAddress(void *p) { - if ((u8 *)p < elanRAM || (u8 *)p >= elanRAM + ELAN_RAM_SIZE) + if ((u8 *)p < RAM || (u8 *)p >= RAM + ELAN_RAM_SIZE) return Null; else - return (u32)((u8 *)p - elanRAM); + return (u32)((u8 *)p - RAM); } void serialize(Serializer& ser) @@ -537,21 +545,21 @@ static u32 packColor(const glm::vec4& color) } template -glm::vec4 getNormal(const T& vtx) +glm::vec3 getNormal(const T& vtx) { - return glm::vec4((int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f, 0); + return { (int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f }; } template<> -glm::vec4 getNormal(const N2_VERTEX_VNU& vtx) +glm::vec3 getNormal(const N2_VERTEX_VNU& vtx) { - return glm::vec4(vtx.normal.nx, vtx.normal.ny, vtx.normal.nz, 0); + return { vtx.normal.nx, vtx.normal.ny, vtx.normal.nz }; } template void setNormal(Vertex& vd, const T& vs) { - glm::vec4 normal = getNormal(vs); + glm::vec3 normal = getNormal(vs); vd.nx = normal.x; vd.ny = normal.y; vd.nz = normal.z; @@ -734,20 +742,10 @@ static void boundingBox(const T* vertices, u32 count, glm::vec3& min, glm::vec3& glm::vec3 extentY = curMatrix * glm::vec4(0, extents.y, 0, 0); glm::vec3 extentZ = curMatrix * glm::vec4(0, 0, extents.z, 0); // new AA extents - const float newX = std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentX)) + - std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentY)) + - std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentZ)); + glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ); - const float newY = std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentX)) + - std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentY)) + - std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentZ)); - - const float newZ = std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentX)) + - std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentY)) + - std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentZ)); - - min = glm::vec3(center) - glm::vec3(newX, newY, newZ); - max = glm::vec3(center) + glm::vec3(newX, newY, newZ); + min = glm::vec3(center) - newExtent; + max = glm::vec3(center) + newExtent; } template @@ -763,7 +761,14 @@ static bool isInFrustum(const T* vertices, u32 count) glm::vec4 pmax = projectionMatrix * glm::vec4(max, 1); if (std::isnan(pmin.x) || std::isnan(pmin.y) || std::isnan(pmax.x) || std::isnan(pmax.y)) return false; - // TODO ... +// // Check the farthest side +// float w = std::max(pmin.w, pmax.w); +// glm::vec2 smin = glm::min(glm::vec2(pmin) / w, glm::vec2(pmax) / w); +// glm::vec2 smax = glm::max(glm::vec2(pmin) / w, glm::vec2(pmax) / w); +// +// if (smax.x <= -214 || smin.x >= 854 // FIXME viewport dimensions +// || smax.y < 0 || smin.y >= 480) +// return false; return true; } @@ -926,7 +931,7 @@ static void sendLights() light.routing = plight->routing; light.dmode = plight->dmode; light.smode = N2_LMETHOD_SINGLE_SIDED; - memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4(-(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ, 0))), + memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4(-(int8_t)plight->dirX, (int8_t)plight->dirY, -(int8_t)plight->dirZ, 0))), sizeof(light.direction)); } else @@ -956,6 +961,7 @@ static void setStateParams(PolyParam& pp) sendLights(); pp.tileclip = state.tileclip; pp.mvMatrix = taMVMatrix; + pp.normalMatrix = taNormalMatrix; pp.projMatrix = taProjMatrix; pp.lightModel = taLightModel; pp.envMapping = false; @@ -1183,9 +1189,9 @@ static void executeCommand(u8 *data, int size) { verify(size >= 0); verify(size < (int)ELAN_RAM_SIZE); -// if (0x2b00 == (u32)(data - elanRAM)) +// if (0x2b00 == (u32)(data - RAM)) // for (int i = 0; i < size; i += 4) -// DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - elanRAM), *(u32 *)&data[i]); +// DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); while (size >= 32) { @@ -1235,7 +1241,7 @@ static void executeCommand(u8 *data, int size) //{ // WARN_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2); // for (int i = 0; i < 32; i += 4) - // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - elanRAM), *(u32 *)&data[i]); + // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); //} size -= sizeof(LightModel); } @@ -1247,7 +1253,7 @@ static void executeCommand(u8 *data, int size) cullingReversed = (model->id1 & 0x08000000) == 0; state.setClipMode(model->pcw); DEBUG_LOG(PVR, "Model offset %x size %x clip %d", model->offset, model->size, model->pcw.userClip); - executeCommand(&elanRAM[model->offset & 0x1ffffff8], model->size); + executeCommand(&RAM[model->offset & 0x1ffffff8], model->size); cullingReversed = false; size -= sizeof(Model); } @@ -1296,7 +1302,7 @@ static void executeCommand(u8 *data, int size) { Link *link = (Link *)data; DEBUG_LOG(PVR, "Link to %x (%x)", link->offset & 0x1ffffff8, link->size); - executeCommand(&elanRAM[link->offset & 0x1ffffff8], link->size); + executeCommand(&RAM[link->offset & 0x1ffffff8], link->size); size -= sizeof(Link); } break; @@ -1363,7 +1369,7 @@ static void executeCommand(u8 *data, int size) if (pcw != 0) INFO_LOG(PVR, "Unhandled command %x", pcw); for (int i = 0; i < 32; i += 4) - DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - elanRAM), *(u32 *)&data[i]); + DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); size -= 32; } } @@ -1391,33 +1397,30 @@ void DYNACALL write_elancmd(u32 addr, T data) template T DYNACALL read_elanram(u32 addr) { - return *(T *)&elanRAM[addr & (ELAN_RAM_SIZE - 1)]; + return *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)]; } template void DYNACALL write_elanram(u32 addr, T data) { - *(T *)&elanRAM[addr & (ELAN_RAM_SIZE - 1)] = data; + *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)] = data; } void init() { - elanRAM = (u8 *)allocAligned(PAGE_SIZE, ELAN_RAM_SIZE); } void reset(bool hard) { if (hard) { - memset(elanRAM, 0, ELAN_RAM_SIZE); + memset(RAM, 0, ELAN_RAM_SIZE); state.reset(); } } void term() { - freeAligned(elanRAM); - elanRAM = nullptr; } void vmem_init() @@ -1432,7 +1435,7 @@ void vmem_map(u32 base) _vmem_map_handler(elanRegHandler, base | 8, base | 8); _vmem_map_handler(elanCmdHandler, base | 9, base | 9); _vmem_map_handler(elanRamHandler, base | 0xA, base | 0xB); - _vmem_map_block(elanRAM, base | 0xA, base | 0xB, ELAN_RAM_SIZE - 1); + _vmem_map_block(RAM, base | 0xA, base | 0xB, ELAN_RAM_SIZE - 1); } void serialize(Serializer& ser) @@ -1443,7 +1446,7 @@ void serialize(Serializer& ser) ser << reg74; ser << elanCmd; if (!ser.rollback()) - ser.serialize(elanRAM, ELAN_RAM_SIZE); + ser.serialize(RAM, ELAN_RAM_SIZE); state.serialize(ser); } @@ -1455,7 +1458,7 @@ void deserialize(Deserializer& deser) deser >> reg74; deser >> elanCmd; if (!deser.rollback()) - deser.deserialize(elanRAM, ELAN_RAM_SIZE); + deser.deserialize(RAM, ELAN_RAM_SIZE); state.deserialize(deser); } diff --git a/core/hw/pvr/elan.h b/core/hw/pvr/elan.h index 961fd9584..ba6042f55 100644 --- a/core/hw/pvr/elan.h +++ b/core/hw/pvr/elan.h @@ -31,4 +31,6 @@ void vmem_map(u32 base); void serialize(Serializer& ser); void deserialize(Deserializer& deser); +extern u8 *RAM; +constexpr u32 ELAN_RAM_SIZE = 32 * 1024 * 1024; } diff --git a/core/hw/pvr/elan_struct.h b/core/hw/pvr/elan_struct.h index 20972dfba..98fe558f6 100644 --- a/core/hw/pvr/elan_struct.h +++ b/core/hw/pvr/elan_struct.h @@ -96,33 +96,33 @@ struct InstanceMatrix : public ElanBase u32 _res1; // 08000200 float envMapU; // env map U offset float lm00; - float lm01; - float lm02; float lm10; + float lm20; + float lm01; float lm11; + float lm21; + float lm02; float lm12; - float tm20; - float tm21; - float tm22; + float lm22; float envMapV; // env map V offset float _res2[4]; u32 _res3; // 08000100 float _near; float tm00; - float tm10; - float mfr2; float tm01; - float tm11; - float mfr6; float tm02; + float tm10; + float tm11; float tm12; - float mfr10; - float mat03; - float mat13; - float mat23; + float tm20; + float tm21; + float tm22; + float tm30; + float tm31; + float tm32; float _far; - float mproj6; + float mproj6; // 1 / near bool isInstanceMatrix() const { return id1 == 0xf && id2 == 0x7f; diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index e0101a377..c98d91c4b 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -50,6 +50,7 @@ struct PolyParam BaseTextureCacheData *texture1; float *mvMatrix; + float *normalMatrix; float *projMatrix; float glossCoef0; float glossCoef1; @@ -187,7 +188,6 @@ struct rend_context List matrices; List lightModels; - bool init = false; void Clear() { @@ -270,9 +270,8 @@ struct TA_context rend.modtrig.Init(16384, &rend.Overrun, "modtrig"); rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes - rend.matrices.Init(1000, &rend.Overrun, "matrices"); + rend.matrices.Init(2000, &rend.Overrun, "matrices"); rend.lightModels.Init(100, &rend.Overrun, "lightModels"); - rend.init = true; Reset(); } diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 8e686c6b1..5fbebc971 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -2077,7 +2077,6 @@ static bool ClearZBeforePass(int pass_number) void rend_context::newRenderPass() { - verify(init); if (global_param_op.used() > 0 || global_param_tr.used() > 0 || global_param_pt.used() > 0) diff --git a/core/linux/libnx_vmem.cpp b/core/linux/libnx_vmem.cpp index 681b3a785..1545f2265 100644 --- a/core/linux/libnx_vmem.cpp +++ b/core/linux/libnx_vmem.cpp @@ -123,7 +123,7 @@ static mem_handle_t allocate_shared_filemem(unsigned size) // In negative offsets of the pointer (up to FPCB size, usually 65/129MB) the context and jump table // can be found. If the platform init returns error, the user is responsible for initializing the // memory using a fallback (that is, regular mallocs and falling back to slow memory JIT). -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { return MemTypeError; #if 0 diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp index 719a75267..a2ab3c718 100644 --- a/core/linux/posix_vmem.cpp +++ b/core/linux/posix_vmem.cpp @@ -159,9 +159,9 @@ static size_t reserved_size; // In negative offsets of the pointer (up to FPCB size, usually 65/129MB) the context and jump table // can be found. If the platform init returns error, the user is responsible for initializing the // memory using a fallback (that is, regular mallocs and falling back to slow memory JIT). -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) { +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { // Firt let's try to allocate the shm-backed memory - vmem_fd = allocate_shared_filemem(RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX); + vmem_fd = allocate_shared_filemem(ramSize); if (vmem_fd < 0) return MemTypeError; diff --git a/core/network/ggpo.cpp b/core/network/ggpo.cpp index d24b081fd..abac8d440 100644 --- a/core/network/ggpo.cpp +++ b/core/network/ggpo.cpp @@ -125,10 +125,12 @@ struct MemPages ram = memwatch::ramWatcher.getPages(); vram = memwatch::vramWatcher.getPages(); aram = memwatch::aramWatcher.getPages(); + elanram = memwatch::elanWatcher.getPages(); } memwatch::PageMap ram; memwatch::PageMap vram; memwatch::PageMap aram; + memwatch::PageMap elanram; }; static std::unordered_map deltaStates; static int lastSavedFrame = -1; @@ -276,12 +278,6 @@ static bool load_game_state(unsigned char *buffer, int len) Deserializer deser(buffer, len, true); int frame; deser >> frame; - dc_deserialize(deser); - if (deser.size() != (u32)len) - { - ERROR_LOG(NETWORK, "load_game_state len %d used %d", len, (int)deser.size()); - die("fatal"); - } for (int f = lastSavedFrame - 1; f >= frame; f--) { const MemPages& pages = deltaStates[f]; @@ -291,8 +287,16 @@ static bool load_game_state(unsigned char *buffer, int len) memcpy(memwatch::vramWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); for (const auto& pair : pages.aram) memcpy(memwatch::aramWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); - DEBUG_LOG(NETWORK, "Restored frame %d pages: %d ram, %d vram, %d aica ram", f, (u32)pages.ram.size(), - (u32)pages.vram.size(), (u32)pages.aram.size()); + for (const auto& pair : pages.elanram) + memcpy(memwatch::elanWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); + DEBUG_LOG(NETWORK, "Restored frame %d pages: %d ram, %d vram, %d eram, %d aica ram", f, (u32)pages.ram.size(), + (u32)pages.vram.size(), (u32)pages.elanram.size(), (u32)pages.aram.size()); + } + dc_deserialize(deser); + if (deser.size() != (u32)len) + { + ERROR_LOG(NETWORK, "load_game_state len %d used %d", len, (int)deser.size()); + die("fatal"); } rend_allow_rollback(); // ggpo might load another state right after this one memwatch::reset(); @@ -374,8 +378,8 @@ static bool save_game_state(unsigned char **buffer, int *len, int *checksum, int #endif // Save the delta to frame-1 deltaStates[frame - 1].load(); - DEBUG_LOG(NETWORK, "Saved frame %d pages: %d ram, %d vram, %d aica ram", frame - 1, (u32)deltaStates[frame - 1].ram.size(), - (u32)deltaStates[frame - 1].vram.size(), (u32)deltaStates[frame - 1].aram.size()); + DEBUG_LOG(NETWORK, "Saved frame %d pages: %d ram, %d vram, %d eram, %d aica ram", frame - 1, (u32)deltaStates[frame - 1].ram.size(), + (u32)deltaStates[frame - 1].vram.size(), (u32)deltaStates[frame - 1].elanram.size(), (u32)deltaStates[frame - 1].aram.size()); } memwatch::protect(); diff --git a/core/rend/gl4/gl4.h b/core/rend/gl4/gl4.h index 5c2af12a3..bf8402d1d 100755 --- a/core/rend/gl4/gl4.h +++ b/core/rend/gl4/gl4.h @@ -43,10 +43,11 @@ struct gl4PipelineShader GLint fog_control; GLint trilinear_alpha; GLint fog_clamp_min, fog_clamp_max; - GLint normal_matrix; + GLint ndcMat; GLint palette_index; // Naomi2 GLint mvMat; + GLint normalMat; GLint projMat; GLint glossCoef0; GLint lightCount; @@ -73,6 +74,7 @@ struct gl4PipelineShader GLint attnAngleB; } lights[elan::MAX_LIGHTS]; float *lastMvMat; + float *lastNormalMat; float *lastProjMat; N2LightModel *lastLightModel; @@ -100,14 +102,14 @@ struct gl4_ctx { GLuint program; - GLuint normal_matrix; + GLuint ndcMat; } modvol_shader; struct { GLuint program; - GLuint normal_matrix; + GLuint ndcMat; GLint mvMat; GLint projMat; } n2ModVolShader; @@ -173,7 +175,7 @@ extern struct gl4ShaderUniforms_t TCW tcw1; float fog_clamp_min[4]; float fog_clamp_max[4]; - glm::mat4 normal_mat; + glm::mat4 ndcMat; struct { bool enabled; int x; @@ -234,8 +236,8 @@ extern struct gl4ShaderUniforms_t if (s->fog_clamp_max != -1) glUniform4fv(s->fog_clamp_max, 1, fog_clamp_max); - if (s->normal_matrix != -1) - glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]); + if (s->ndcMat != -1) + glUniformMatrix4fv(s->ndcMat, 1, GL_FALSE, &ndcMat[0][0]); if (s->palette_index != -1) glUniform1i(s->palette_index, palette_index); diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index dad907fc3..02d5cd5fe 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -59,7 +59,7 @@ static const char* VertexShaderSource = R"( #endif // Uniforms -uniform mat4 normal_matrix; +uniform mat4 ndcMat; // Input in vec4 in_pos; @@ -80,7 +80,7 @@ noperspective out vec2 vtx_uv1; void main() { - vec4 vpos = normal_matrix * in_pos; + vec4 vpos = ndcMat * in_pos; vtx_base = in_base; vtx_offs = in_offs; vtx_uv = vec3(in_uv * vpos.z, vpos.z); @@ -528,7 +528,7 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source s->fog_clamp_min = -1; s->fog_clamp_max = -1; } - s->normal_matrix = glGetUniformLocation(s->program, "normal_matrix"); + s->ndcMat = glGetUniformLocation(s->program, "ndcMat"); // Shadow stencil for OP/PT rendering pass gu = glGetUniformLocation(s->program, "shadow_stencil"); @@ -591,13 +591,13 @@ static void create_modvol_shader() .addSource(ModifierVolumeShader); gl4.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str()); - gl4.modvol_shader.normal_matrix = glGetUniformLocation(gl4.modvol_shader.program, "normal_matrix"); + gl4.modvol_shader.ndcMat = glGetUniformLocation(gl4.modvol_shader.program, "ndcMat"); N2Vertex4Source n2VertexShader(false, true); N2Geometry4Shader geometryShader(false, true); gl4.n2ModVolShader.program = gl_CompileAndLink(n2VertexShader.generate().c_str(), fragmentShader.generate().c_str(), geometryShader.generate().c_str()); - gl4.n2ModVolShader.normal_matrix = glGetUniformLocation(gl4.n2ModVolShader.program, "normal_matrix"); + gl4.n2ModVolShader.ndcMat = glGetUniformLocation(gl4.n2ModVolShader.program, "ndcMat"); gl4.n2ModVolShader.mvMat = glGetUniformLocation(gl4.n2ModVolShader.program, "mvMat"); gl4.n2ModVolShader.projMat = glGetUniformLocation(gl4.n2ModVolShader.program, "projMat"); } @@ -715,7 +715,7 @@ static bool RenderFrame(int width, int height) const bool is_rtt = pvrrc.isRTT; TransformMatrix matrices(pvrrc, width, height); - gl4ShaderUniforms.normal_mat = matrices.GetNormalMatrix(); + gl4ShaderUniforms.ndcMat = matrices.GetNormalMatrix(); const glm::mat4& scissor_mat = matrices.GetScissorMatrix(); ViewportMatrix = matrices.GetViewportMatrix(); @@ -761,10 +761,10 @@ static bool RenderFrame(int width, int height) if (config::Fog) { glcache.UseProgram(gl4.modvol_shader.program); - glUniformMatrix4fv(gl4.modvol_shader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]); + glUniformMatrix4fv(gl4.modvol_shader.ndcMat, 1, GL_FALSE, &gl4ShaderUniforms.ndcMat[0][0]); glcache.UseProgram(gl4.n2ModVolShader.program); - glUniformMatrix4fv(gl4.n2ModVolShader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]); + glUniformMatrix4fv(gl4.n2ModVolShader.ndcMat, 1, GL_FALSE, &gl4ShaderUniforms.ndcMat[0][0]); } for (auto& it : gl4.shaders) resetN2UniformCache(&it.second); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 25965436a..19cfc0a7e 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -93,7 +93,7 @@ const char* GouraudSource = R"( static const char* VertexShaderSource = R"( /* Vertex constants*/ uniform highp vec4 depth_scale; -uniform highp mat4 normal_matrix; +uniform highp mat4 ndcMat; uniform highp float sp_FOG_DENSITY; /* Vertex input */ @@ -108,7 +108,7 @@ NOPERSPECTIVE out highp vec3 vtx_uv; void main() { - highp vec4 vpos = normal_matrix * in_pos; + highp vec4 vpos = ndcMat * in_pos; vtx_base = in_base; vtx_offs = in_offs; #if TARGET_GL == GLES2 @@ -787,7 +787,7 @@ bool CompilePipelineShader(PipelineShader* s) s->fog_clamp_min = -1; s->fog_clamp_max = -1; } - s->normal_matrix = glGetUniformLocation(s->program, "normal_matrix"); + s->ndcMat = glGetUniformLocation(s->program, "ndcMat"); if (s->naomi2) initN2Uniforms(s); @@ -892,14 +892,14 @@ static void create_modvol_shader() .addSource(ModifierVolumeShader); gl.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str()); - gl.modvol_shader.normal_matrix = glGetUniformLocation(gl.modvol_shader.program, "normal_matrix"); + gl.modvol_shader.ndcMat = glGetUniformLocation(gl.modvol_shader.program, "ndcMat"); gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor"); gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); N2VertexSource n2vertexShader(false, true); N2GeometryShader geometryShader(false, true); gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str(), geometryShader.generate().c_str()); - gl.n2ModVolShader.normal_matrix = glGetUniformLocation(gl.n2ModVolShader.program, "normal_matrix"); + gl.n2ModVolShader.ndcMat = glGetUniformLocation(gl.n2ModVolShader.program, "ndcMat"); gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor"); gl.n2ModVolShader.depth_scale = glGetUniformLocation(gl.n2ModVolShader.program, "depth_scale"); gl.n2ModVolShader.mvMat = glGetUniformLocation(gl.n2ModVolShader.program, "mvMat"); @@ -1208,7 +1208,7 @@ bool RenderFrame(int width, int height) vtx_max_fZ *= 1.001f; TransformMatrix matrices(pvrrc, width, height); - ShaderUniforms.normal_mat = matrices.GetNormalMatrix(); + ShaderUniforms.ndcMat = matrices.GetNormalMatrix(); const glm::mat4& scissor_mat = matrices.GetScissorMatrix(); ViewportMatrix = matrices.GetViewportMatrix(); @@ -1235,13 +1235,13 @@ bool RenderFrame(int width, int height) glcache.UseProgram(gl.modvol_shader.program); if (gl.modvol_shader.depth_scale != -1) glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); - glUniformMatrix4fv(gl.modvol_shader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]); + glUniformMatrix4fv(gl.modvol_shader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]); glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); glcache.UseProgram(gl.n2ModVolShader.program); if (gl.n2ModVolShader.depth_scale != -1) glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs); - glUniformMatrix4fv(gl.n2ModVolShader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]); + glUniformMatrix4fv(gl.n2ModVolShader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]); glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 9e20210a7..901e3bc1b 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -50,10 +50,11 @@ struct PipelineShader GLint sp_FOG_DENSITY; GLint trilinear_alpha; GLint fog_clamp_min, fog_clamp_max; - GLint normal_matrix; + GLint ndcMat; GLint palette_index; // Naomi2 GLint mvMat; + GLint normalMat; GLint projMat; GLint glossCoef0; GLint lightCount; @@ -80,6 +81,7 @@ struct PipelineShader GLint attnAngleB; } lights[elan::MAX_LIGHTS]; float *lastMvMat; + float *lastNormalMat; float *lastProjMat; N2LightModel *lastLightModel; @@ -109,7 +111,7 @@ struct gl_ctx GLint depth_scale; GLint sp_ShaderColor; - GLint normal_matrix; + GLint ndcMat; } modvol_shader; struct @@ -118,7 +120,7 @@ struct gl_ctx GLint depth_scale; GLint sp_ShaderColor; - GLint normal_matrix; + GLint ndcMat; GLint mvMat; GLint projMat; @@ -202,7 +204,7 @@ void GetFramebufferScaling(float& scale_x, float& scale_y, float& scissoring_sca void GetFramebufferSize(float& dc_width, float& dc_height); void SetupMatrices(float dc_width, float dc_height, float scale_x, float scale_y, float scissoring_scale_x, float scissoring_scale_y, - float &ds2s_offs_x, glm::mat4& normal_mat, glm::mat4& scissor_mat); + float &ds2s_offs_x, glm::mat4& ndcMat, glm::mat4& scissor_mat); void SetCull(u32 CullMode); s32 SetTileClip(u32 val, GLint uniform); @@ -237,7 +239,7 @@ extern struct ShaderUniforms_t float trilinear_alpha; float fog_clamp_min[4]; float fog_clamp_max[4]; - glm::mat4 normal_mat; + glm::mat4 ndcMat; struct { bool enabled; int x; @@ -269,8 +271,8 @@ extern struct ShaderUniforms_t if (s->fog_clamp_max != -1) glUniform4fv(s->fog_clamp_max, 1, fog_clamp_max); - if (s->normal_matrix != -1) - glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]); + if (s->ndcMat != -1) + glUniformMatrix4fv(s->ndcMat, 1, GL_FALSE, &ndcMat[0][0]); if (s->palette_index != -1) glUniform1i(s->palette_index, palette_index); diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp index 833525764..7c2461836 100644 --- a/core/rend/gles/naomi2.cpp +++ b/core/rend/gles/naomi2.cpp @@ -19,11 +19,9 @@ #include "naomi2.h" const char* N2VertexShader = R"( -uniform vec4 depth_scale; -uniform mat4 normal_matrix; -uniform float sp_FOG_DENSITY; uniform mat4 mvMat; +uniform mat4 normalMat; uniform mat4 projMat; uniform int envMapping; uniform int bumpMapping; @@ -50,7 +48,7 @@ INTERPOLATION out vec4 vs_offs1; noperspective out vec2 vs_uv1; #endif #endif -out float gl_ClipDistance[6]; +out float gl_ClipDistance[2]; void main() { @@ -66,7 +64,7 @@ void main() if (bumpMapping == 1) computeBumpMap(vs_offs, vs_offs1, normalize(in_normal)); #endif - vec4 vnorm = normalize(mvMat * vec4(in_normal, 0.0)); + vec4 vnorm = normalize(normalMat * vec4(in_normal, 0.0)); if (bumpMapping == 0) computeColors(vs_base, vs_offs, vpos.xyz, vnorm.xyz); vs_uv.xy = in_uv; @@ -298,7 +296,7 @@ const char *GeometryClippingShader = R"( layout (triangles) in; layout (triangle_strip, max_vertices = 12) out; -uniform mat4 normal_matrix; +uniform mat4 ndcMat; #if GEOM_ONLY == 0 INTERPOLATION in highp vec4 vs_base[3]; @@ -421,7 +419,7 @@ int clip3(in vec3 dist, inout Vertex v0, inout Vertex v1, inout Vertex v2, out V void wDivide(inout Vertex v) { v.pos = vec4(v.pos.xy / v.pos.w, 1.0 / v.pos.w, 1.0); - v.pos = normal_matrix * v.pos; + v.pos = ndcMat * v.pos; #if GEOM_ONLY == 1 v.uv = vec3(0.0, 0.0, v.pos.z); #else diff --git a/core/rend/gles/naomi2.h b/core/rend/gles/naomi2.h index 376b82ae3..9ae0eb97d 100644 --- a/core/rend/gles/naomi2.h +++ b/core/rend/gles/naomi2.h @@ -48,6 +48,7 @@ template void initN2Uniforms(ShaderType *shader) { shader->mvMat = glGetUniformLocation(shader->program, "mvMat"); + shader->normalMat = glGetUniformLocation(shader->program, "normalMat"); shader->projMat = glGetUniformLocation(shader->program, "projMat"); shader->glossCoef0 = glGetUniformLocation(shader->program, "glossCoef0"); shader->envMapping = glGetUniformLocation(shader->program, "envMapping"); @@ -99,12 +100,17 @@ void setN2Uniforms(const PolyParam *pp, ShaderType *shader) if (pp->mvMatrix != shader->lastMvMat) { shader->lastMvMat = pp->mvMatrix; - glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, &pp->mvMatrix[0]); + glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, pp->mvMatrix); + } + if (pp->normalMatrix != shader->lastNormalMat) + { + shader->lastNormalMat = pp->normalMatrix; + glUniformMatrix4fv(shader->normalMat, 1, GL_FALSE, pp->normalMatrix); } if (pp->projMatrix != shader->lastProjMat) { shader->lastProjMat = pp->projMatrix; - glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, &pp->projMatrix[0]); + glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, pp->projMatrix); } glUniform1f(shader->glossCoef0, pp->glossCoef0); diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp index 33dd88d79..5fe995a25 100644 --- a/core/windows/win_vmem.cpp +++ b/core/windows/win_vmem.cpp @@ -49,7 +49,7 @@ static std::vector mapped_regions; // Please read the POSIX implementation for more information. On Windows this is // rather straightforward. -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { #ifdef TARGET_UWP return MemTypeError; @@ -58,7 +58,7 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) mapped_regions.reserve(32); // First let's try to allocate the in-memory file - mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX, 0); + mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, ramSize, 0); // Now allocate the actual address space (it will be 64KB aligned on windows). unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX;