diff --git a/.github/workflows/uwp.yml b/.github/workflows/uwp.yml index 85ec27914..2ca4d1dc0 100644 --- a/.github/workflows/uwp.yml +++ b/.github/workflows/uwp.yml @@ -39,9 +39,10 @@ jobs: - name: Package app run: | mkdir build\artifact - cd build\AppPackages\flycast\flycast_1.0.0.0_x64_Test + cd build\AppPackages\flycast\flycast_*_x64_Test mkdir tmp - makeappx.exe unpack /p .\flycast_1.0.0.0_x64.msix /d tmp + ren *.msix flycast.msix + makeappx.exe unpack /p .\flycast.msix /d tmp copy ..\..\..\Release\*.dll tmp makeappx pack /d tmp /p ..\..\..\artifact\flycast.appx signtool sign /f ..\..\..\..\shell\uwp\sign_cert.pfx /p '${{ secrets.SIGN_CERT_PWD }}' /v /fd SHA256 ..\..\..\artifact\flycast.appx diff --git a/CMakeLists.txt b/CMakeLists.txt index f7f0dfa40..b9ef5e33a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,17 @@ if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") OUTPUT_VARIABLE GIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE ) + if(WINDOWS_STORE) + string(REPLACE "v" "" MS_VERSION ${GIT_VERSION}) + string(REPLACE "-" "." MS_VERSION ${MS_VERSION}) + string(REGEX REPLACE "\.g[0-9a-f]+" "" MS_VERSION ${MS_VERSION}) + string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+" VERSION_3PARTS ${MS_VERSION}) + if (VERSION_3PARTS STREQUAL "") + string(APPEND MS_VERSION ".0.0") + else() + string(APPEND MS_VERSION ".0") + endif() + endif() endif() string(TIMESTAMP BUILD_TIMESTAMP UTC) @@ -1026,30 +1037,25 @@ if(USE_VULKAN) endif() if(WIN32 AND NOT LIBRETRO AND NOT WINDOWS_STORE) + add_subdirectory(core/rend/dx9) + target_link_libraries(${PROJECT_NAME} PRIVATE dx9renderer) +endif() + +if(WIN32 AND NOT LIBRETRO) target_sources(${PROJECT_NAME} PRIVATE - core/rend/dx9/comptr.h - core/rend/dx9/d3d_overlay.h - core/rend/dx9/d3d_overlay.cpp - core/rend/dx9/d3d_renderer.h - core/rend/dx9/d3d_renderer.cpp - core/rend/dx9/d3d_shaders.h - core/rend/dx9/d3d_shaders.cpp - core/rend/dx9/d3d_texture.h - core/rend/dx9/d3d_texture.cpp - core/rend/dx9/dx9_driver.h - core/rend/dx9/dxcontext.h - core/rend/dx9/dxcontext.cpp - core/rend/dx9/imgui_impl_dx9.h - core/rend/dx9/imgui_impl_dx9.cpp) - if(NOT MINGW) - target_include_directories(${PROJECT_NAME} PRIVATE "$ENV{DXSDK_DIR}/Include") - if (CMAKE_SIZEOF_VOID_P EQUAL 8) - target_link_directories(${PROJECT_NAME} PRIVATE "$ENV{DXSDK_DIR}/Lib/x64") - else() - target_link_directories(${PROJECT_NAME} PRIVATE "$ENV{DXSDK_DIR}/Lib/x86") - endif() - endif() - target_link_libraries(${PROJECT_NAME} PRIVATE d3d9 d3dx9) + core/rend/dx11/dx11_driver.h + core/rend/dx11/dx11_renderer.cpp + core/rend/dx11/dx11_renderer.h + core/rend/dx11/dx11_shaders.cpp + core/rend/dx11/dx11_shaders.h + core/rend/dx11/dx11_texture.cpp + core/rend/dx11/dx11_texture.h + core/rend/dx11/dx11context.cpp + core/rend/dx11/dx11context.h + core/rend/dx11/imgui_impl_dx11.cpp + core/rend/dx11/imgui_impl_dx11.h + core/rend/dx11/dx11_driver.h) + target_link_libraries(${PROJECT_NAME} PRIVATE d3d11 d3dcompiler) endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)" AND NOT APPLE) @@ -1353,7 +1359,10 @@ if(NOT LIBRETRO) core/windows/xinput_gamepad.h) endif() if(WINDOWS_STORE) - set(ResourceFiles shell/uwp/Package.appxmanifest + file(READ shell/uwp/Package.appxmanifest MANIFEST) + string(REPLACE "9.9.9.9" ${MS_VERSION} MANIFEST ${MANIFEST}) + file(WRITE ${CMAKE_BINARY_DIR}/Package.appxmanifest ${MANIFEST}) + set(ResourceFiles ${CMAKE_BINARY_DIR}/Package.appxmanifest shell/uwp/flycast150.png shell/uwp/flycast50.png shell/uwp/flycast44.png diff --git a/core/cfg/option.h b/core/cfg/option.h index ef88e8130..620c788ec 100644 --- a/core/cfg/option.h +++ b/core/cfg/option.h @@ -396,14 +396,12 @@ public: RendererOption() #ifdef USE_DX9 : Option("pvr.rend", RenderType::DirectX9) {} +#elif defined(TARGET_UWP) + : Option("pvr.rend", RenderType::DirectX11) {} #else : Option("pvr.rend", RenderType::OpenGL) {} #endif - bool isDirectX() const { - return value == RenderType::DirectX9; - } - RenderType& operator=(const RenderType& v) { set(v); return value; } }; extern RendererOption RendererType; diff --git a/core/deps/picotcp/include/arch/pico_posix.h b/core/deps/picotcp/include/arch/pico_posix.h index 1a3b8c726..918de83a5 100644 --- a/core/deps/picotcp/include/arch/pico_posix.h +++ b/core/deps/picotcp/include/arch/pico_posix.h @@ -11,7 +11,11 @@ #include #include // Note: alloca is not part of POSIX +#if defined(_MSC_VER) || defined(__MINGW32__) +#include +#else #include +#endif /* #define MEMORY_MEASURE diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index fdbacf6ee..4e9e1fe63 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -268,6 +268,7 @@ Renderer* rend_norend(); Renderer* rend_Vulkan(); Renderer* rend_OITVulkan(); Renderer* rend_DirectX9(); +Renderer* rend_DirectX11(); static void rend_create_renderer() { @@ -297,6 +298,11 @@ static void rend_create_renderer() case RenderType::DirectX9: renderer = rend_DirectX9(); break; +#endif +#if defined(_WIN32) && !defined(LIBRETRO) + case RenderType::DirectX11: + renderer = rend_DirectX11(); + break; #endif } #endif diff --git a/core/hw/pvr/pvr_regs.h b/core/hw/pvr/pvr_regs.h index d4088565a..243f7fa55 100644 --- a/core/hw/pvr/pvr_regs.h +++ b/core/hw/pvr/pvr_regs.h @@ -1,5 +1,6 @@ #pragma once #include "types.h" +#include #define pvr_RegSize (0x8000) #define pvr_RegMask (pvr_RegSize-1) @@ -371,21 +372,35 @@ union TA_GLOB_TILE_CLIP_type u32 full; }; -union TA_YUV_TEX_CTRL_type -{ - struct - { - u32 yuv_u_size : 6; - u32 reserved1 : 2; - u32 yuv_v_size : 6; - u32 reserved2 : 2; - u32 yuv_tex : 1; - u32 reserved3 : 7; - u32 yuv_form : 1; - u32 reserved4 : 7; - }; - u32 full; -}; +union TA_YUV_TEX_CTRL_type +{ + struct + { + u32 yuv_u_size : 6; + u32 reserved1 : 2; + u32 yuv_v_size : 6; + u32 reserved2 : 2; + u32 yuv_tex : 1; + u32 reserved3 : 7; + u32 yuv_form : 1; + u32 reserved4 : 7; + }; + u32 full; +}; + +union FOG_DENSITY_type +{ + struct + { + s8 exponent; + u8 mantissa; + }; + u32 full; + + float get() { + return mantissa / 128.f * std::pow(2.0f, (float)exponent); + } +}; // TA REGS #define TA_OL_BASE_addr 0x00000124 // RW Object list write start address @@ -461,7 +476,7 @@ union TA_YUV_TEX_CTRL_type #define FOG_COL_RAM PvrReg(FOG_COL_RAM_addr,u32) // RW Color for Look Up table Fog #define FOG_COL_VERT PvrReg(FOG_COL_VERT_addr,u32) // RW Color for vertex Fog -#define FOG_DENSITY PvrReg(FOG_DENSITY_addr,u32) // RW Fog scale value +#define FOG_DENSITY PvrReg(FOG_DENSITY_addr, FOG_DENSITY_type) // RW Fog scale value #define FOG_CLAMP_MAX PvrReg(FOG_CLAMP_MAX_addr,u32) // RW Color clamping maximum value #define FOG_CLAMP_MIN PvrReg(FOG_CLAMP_MIN_addr,u32) // RW Color clamping minimum value #define SPG_TRIGGER_POS PvrReg(SPG_TRIGGER_POS_addr,u32) // RW External trigger signal HV counter value diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 640955b46..ed8b81be1 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -1776,7 +1776,7 @@ void FillBGP(TA_context* ctx) float scale_x= (SCALER_CTL.hscale) ? 2.f:1.f; //if AA hack the hacked pos value hacks for (int i=0;i<3;i++) { - if (config::RendererType.isDirectX()) + if (isDirectX(config::RendererType)) decode_pvr_vertex<2, 1, 0, 3>(strip_base,vertex_ptr,&cv[i]); else decode_pvr_vertex<0, 1, 2, 3>(strip_base,vertex_ptr,&cv[i]); diff --git a/core/rend/CustomTexture.cpp b/core/rend/CustomTexture.cpp index 6b2378b9f..875b1b2e1 100644 --- a/core/rend/CustomTexture.cpp +++ b/core/rend/CustomTexture.cpp @@ -186,7 +186,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, TextureType textype, voi for (int y = 0; y < h; y++) { - if (!config::RendererType.isDirectX()) + if (!isDirectX(config::RendererType)) { switch (textype) { diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index d9429343e..558b576d2 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -91,7 +91,7 @@ void palette_update() pal_needs_update = false; palette_updated = true; - if (!config::RendererType.isDirectX()) + if (!isDirectX(config::RendererType)) { switch(PAL_RAM_CTRL&3) { diff --git a/core/rend/dx11/dx11_driver.h b/core/rend/dx11/dx11_driver.h new file mode 100644 index 000000000..b7dd7497e --- /dev/null +++ b/core/rend/dx11/dx11_driver.h @@ -0,0 +1,38 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "rend/imgui_driver.h" +#include "imgui_impl_dx11.h" +#include "dx11context.h" + +class DX11Driver final : public ImGuiDriver +{ +public: + void newFrame() override { + ImGui_ImplDX11_NewFrame(); + } + + void renderDrawData(ImDrawData *drawData) override { + theDX11Context.EndImGuiFrame(); + } + + void present() override { + theDX11Context.Present(); + } +}; diff --git a/core/rend/dx11/dx11_quad.h b/core/rend/dx11/dx11_quad.h new file mode 100644 index 000000000..d8052eebc --- /dev/null +++ b/core/rend/dx11/dx11_quad.h @@ -0,0 +1,138 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "dx11context.h" +#include "dx11_shaders.h" + +class Quad +{ +public: + void init(const ComPtr& device, ComPtr deviceContext, DX11Shaders *shaders) + { + this->device = device; + this->deviceContext = deviceContext; + this->shaders = shaders; + + vertexShader = shaders->getQuadVertexShader(false); + rotateVertexShader = shaders->getQuadVertexShader(true); + pixelShader = shaders->getQuadPixelShader(); + + // Input layout + D3D11_INPUT_ELEMENT_DESC layout[] + { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 2 * sizeof(float), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + ComPtr blob = shaders->getQuadVertexShaderBlob(); + if (FAILED(device->CreateInputLayout(layout, ARRAY_SIZE(layout), blob->GetBufferPointer(), blob->GetBufferSize(), &inputLayout.get()))) + WARN_LOG(RENDERER, "Input layout creation failed"); + + // Rasterizer state + { + D3D11_RASTERIZER_DESC desc{}; + desc.FillMode = D3D11_FILL_SOLID; + desc.CullMode = D3D11_CULL_NONE; + desc.ScissorEnable = true; + desc.DepthClipEnable = true; + device->CreateRasterizerState(&desc, &rasterizerState.get()); + } + // Blend state + { + D3D11_BLEND_DESC desc{}; + desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + device->CreateBlendState(&desc, &blendState.get()); + } + // Depth-stencil state + { + D3D11_DEPTH_STENCIL_DESC desc{}; + desc.DepthEnable = false; + desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; + desc.DepthFunc = D3D11_COMPARISON_ALWAYS; + desc.StencilEnable = false; + desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.BackFace = desc.FrontFace; + device->CreateDepthStencilState(&desc, &depthStencilState.get()); + } + // Vertex buffer + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(float) * 4 * 4; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.MiscFlags = 0; + device->CreateBuffer(&desc, nullptr, &vertexBuffer.get()); + } + } + + void draw(ComPtr& texView, ComPtr sampler, float x = -1.f, float y = -1.f, float w = 2.f, float h = 2.f, bool rotate = false) + { + // Vertex buffer + Vertex vertices[4] { + { x, y, 0.f, 1.f }, + { x, y + h, 0.f, 0.f }, + { x + w, y, 1.f, 1.f }, + { x + w, y + h, 1.f, 0.f }, + }; + D3D11_MAPPED_SUBRESOURCE mappedSubRes{}; + deviceContext->Map(vertexBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubRes); + memcpy(mappedSubRes.pData, vertices, sizeof(vertices)); + deviceContext->Unmap(vertexBuffer, 0); + unsigned int stride = sizeof(Vertex); + unsigned int offset = 0; + deviceContext->IASetInputLayout(inputLayout); + deviceContext->IASetVertexBuffers(0, 1, &vertexBuffer.get(), &stride, &offset); + + // Render states + const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; + deviceContext->OMSetBlendState(blendState, blend_factor, 0xffffffff); + deviceContext->OMSetDepthStencilState(depthStencilState, 0); + deviceContext->RSSetState(rasterizerState); + deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + deviceContext->VSSetShader(rotate ? rotateVertexShader : vertexShader, nullptr, 0); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + // TODO Scissor? + //const D3D11_RECT r = { (LONG)(pcmd->ClipRect.x - clip_off.x), (LONG)(pcmd->ClipRect.y - clip_off.y), (LONG)(pcmd->ClipRect.z - clip_off.x), (LONG)(pcmd->ClipRect.w - clip_off.y) }; + //deviceContext->RSSetScissorRects(1, &r); + + // Bind texture and draw + deviceContext->PSSetShaderResources(0, 1, &texView.get()); + deviceContext->PSSetSamplers(0, 1, &sampler.get()); + deviceContext->Draw(4, 0); + } + +private: + struct Vertex { + float x, y, u, v; + }; + + DX11Shaders *shaders = nullptr; + ComPtr device; + ComPtr deviceContext; + ComPtr inputLayout; + ComPtr vertexBuffer; + ComPtr rasterizerState; + ComPtr blendState; + ComPtr depthStencilState; + ComPtr vertexShader; + ComPtr rotateVertexShader; + ComPtr pixelShader; +}; diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp new file mode 100644 index 000000000..236617352 --- /dev/null +++ b/core/rend/dx11/dx11_renderer.cpp @@ -0,0 +1,1151 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#include "dx11_renderer.h" +#include "hw/pvr/ta.h" +#include "hw/pvr/pvr_mem.h" +#include "rend/gui.h" +#include "rend/tileclip.h" + +const D3D11_INPUT_ELEMENT_DESC MainLayout[] +{ + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 1, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; +const D3D11_INPUT_ELEMENT_DESC ModVolLayout[] +{ + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(ModTriangle, x0), D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +struct VertexConstants +{ + float transMatrix[4][4]; +}; + +struct PixelConstants +{ + float colorClampMin[4]; + float colorClampMax[4]; + float fog_col_vert[4]; + float fog_col_ram[4]; + float fogDensity; + float fogScale; + float alphaTestValue; +}; + +struct PixelPolyConstants +{ + float paletteIndex; + float trilinearAlpha; +}; + +bool DX11Renderer::Init() +{ + NOTICE_LOG(RENDERER, "DX11 renderer initializing"); + device = theDX11Context.getDevice(); + deviceContext = theDX11Context.getDeviceContext(); + + shaders.init(device); + bool success = (bool)shaders.getVertexShader(true); + ComPtr blob = shaders.getVertexShaderBlob(); + success = success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get())); + blob = shaders.getMVVertexShaderBlob(); + success = success && SUCCEEDED(device->CreateInputLayout(ModVolLayout, ARRAY_SIZE(ModVolLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &modVolInputLayout.get())); + + // Constants buffers + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(VertexConstants); + desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + success = success && SUCCEEDED(device->CreateBuffer(&desc, nullptr, &vtxConstants.get())); + + desc.ByteWidth = sizeof(PixelConstants); + desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; + success = success && SUCCEEDED(device->CreateBuffer(&desc, nullptr, &pxlConstants.get())); + + desc.ByteWidth = sizeof(PixelPolyConstants); + desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; + success = success && SUCCEEDED(device->CreateBuffer(&desc, nullptr, &pxlPolyConstants.get())); + } + + // Rasterizer state + { + D3D11_RASTERIZER_DESC desc{}; + desc.FillMode = D3D11_FILL_SOLID; + desc.CullMode = D3D11_CULL_NONE; + desc.FrontCounterClockwise = true; + desc.ScissorEnable = true; + desc.DepthClipEnable = true; + device->CreateRasterizerState(&desc, &rasterCullNone.get()); + desc.CullMode = D3D11_CULL_FRONT; + device->CreateRasterizerState(&desc, &rasterCullFront.get()); + desc.CullMode = D3D11_CULL_BACK; + device->CreateRasterizerState(&desc, &rasterCullBack.get()); + } + // Palette texture + { + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = 32; + desc.Height = 32; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.MipLevels = 1; + device->CreateTexture2D(&desc, nullptr, &paletteTexture.get()); + + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = desc.Format; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(paletteTexture, &viewDesc, &paletteTextureView.get()); + } + // Fog texture + { + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = 128; + desc.Height = 2; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.Format = DXGI_FORMAT_A8_UNORM; + desc.MipLevels = 1; + device->CreateTexture2D(&desc, nullptr, &fogTexture.get()); + + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = desc.Format; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(fogTexture, &viewDesc, &fogTextureView.get()); + } + + quad = std::unique_ptr(new Quad()); + quad->init(device, deviceContext, &shaders); + + fog_needs_update = true; + palette_updated = true; + + if (!success) + { + WARN_LOG(RENDERER, "DirectX 11 renderer initialization failed"); + Term(); + } + frameRendered = false; + + return success; +} + +void DX11Renderer::Term() +{ + NOTICE_LOG(RENDERER, "DX11 renderer terminating"); + vtxConstants.reset(); + pxlConstants.reset(); + fbTex.reset(); + fbTextureView.reset(); + fbRenderTarget.reset(); + quad.reset(); + samplers.term(); + shaders.term(); + deviceContext.reset(); + device.reset(); +} + +void DX11Renderer::createDepthTexAndView(ComPtr& texture, ComPtr& view, int width, int height) +{ + view.reset(); + texture.reset(); + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = width; + desc.Height = height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; + HRESULT hr = device->CreateTexture2D(&desc, nullptr, &texture.get()); + if (FAILED(hr)) + WARN_LOG(RENDERER, "Depth/stencil creation failed"); + + // Create the depth stencil view + D3D11_DEPTH_STENCIL_VIEW_DESC viewDesc{}; + viewDesc.Format = desc.Format; + viewDesc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; + hr = device->CreateDepthStencilView(texture, &viewDesc, &view.get()); + if (FAILED(hr)) + WARN_LOG(RENDERER, "Depth/stencil view creation failed"); +} + +void DX11Renderer::createTexAndRenderTarget(ComPtr& texture, ComPtr& renderTarget, int width, int height) +{ + texture.reset(); + renderTarget.reset(); + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = width; + desc.Height = height; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.MipLevels = 1; + + HRESULT hr = device->CreateTexture2D(&desc, nullptr, &texture.get()); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "Framebuffer texture creation failed"); + return; + } + + hr = device->CreateRenderTargetView(texture, nullptr, &renderTarget.get()); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "Framebuffer render target creation failed"); + return; + } + FLOAT black[4] = { 0.f, 0.f, 0.f, 0.f }; + deviceContext->ClearRenderTargetView(renderTarget, black); +} + +void DX11Renderer::Resize(int w, int h) +{ + if (width == (u32)w && height == (u32)h) + return; + width = w; + height = h; + + // Create framebuffer texture + { + fbTextureView.reset(); + createTexAndRenderTarget(fbTex, fbRenderTarget, width, height); + + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(fbTex, &viewDesc, &fbTextureView.get()); + } + + // Create depth stencil texture + createDepthTexAndView(depthTex, depthTexView, width, height); + + frameRendered = false; + frameRenderedOnce = false; +} + +bool DX11Renderer::ensureBufferSize(ComPtr& buffer, D3D11_BIND_FLAG bind, u32& currentSize, u32 minSize) +{ + if (minSize <= currentSize && buffer) + return true; + if (currentSize == 0) + currentSize = minSize; + else + while (currentSize < minSize) + currentSize *= 2; + buffer.reset(); + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = currentSize; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = bind; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + return SUCCEEDED(device->CreateBuffer(&desc, nullptr, &buffer.get())); +} + +BaseTextureCacheData *DX11Renderer::GetTexture(TSP tsp, TCW tcw) +{ + //lookup texture + DX11Texture* tf = texCache.getTextureCacheData(tsp, tcw); + + if (tf->texture == nullptr) + tf->Create(); + + //update if needed + if (tf->NeedsUpdate()) + tf->Update(); + else + { + if (tf->IsCustomTextureAvailable()) + { + texCache.DeleteLater(tf->texture); + tf->texture.reset(); + // FIXME textureView + tf->loadCustomTexture(); + } + } + return tf; +} + +bool DX11Renderer::Process(TA_context* ctx) +{ + if (KillTex) + texCache.Clear(); + texCache.Cleanup(); + + if (ctx->rend.isRenderFramebuffer) + { + readDCFramebuffer(); + } + else + { + if (!ta_parse_vdrc(ctx, true)) + return false; + } + + return true; +} + +bool DX11Renderer::Render() +{ + bool is_rtt = pvrrc.isRTT; + + u32 texAddress = FB_W_SOF1 & VRAM_MASK; + if (is_rtt) + { + prepareRttRenderTarget(texAddress); + } + else + { + ID3D11ShaderResourceView *p = nullptr; + deviceContext->PSSetShaderResources(0, 1, &p); + deviceContext->OMSetRenderTargets(1, &fbRenderTarget.get(), depthTexView); + D3D11_VIEWPORT vp{}; + vp.Width = (FLOAT)width; + vp.Height = (FLOAT)height; + vp.MinDepth = 0.f; + vp.MaxDepth = 1.f; + deviceContext->RSSetViewports(1, &vp); + } + matrices.CalcMatrices(&pvrrc, width, height); + D3D11_MAPPED_SUBRESOURCE mappedSubres; + deviceContext->Map(vtxConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, &matrices.GetNormalMatrix(), sizeof(float) * 4 * 4); + deviceContext->Unmap(vtxConstants, 0); + deviceContext->VSSetConstantBuffers(0, 1, &vtxConstants.get()); + + deviceContext->ClearDepthStencilView(depthTexView, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 0.f, 0); + + deviceContext->IASetInputLayout(mainInputLayout); + + if (!pvrrc.isRenderFramebuffer) + { + setProvokingVertices(); + + verify(ensureBufferSize(vertexBuffer, D3D11_BIND_VERTEX_BUFFER, vertexBufferSize, pvrrc.verts.bytes())); + D3D11_MAPPED_SUBRESOURCE mappedSubres; + deviceContext->Map(vertexBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, pvrrc.verts.head(), pvrrc.verts.bytes()); + deviceContext->Unmap(vertexBuffer, 0); + + verify(ensureBufferSize(indexBuffer, D3D11_BIND_INDEX_BUFFER, indexBufferSize, pvrrc.idx.bytes())); + deviceContext->Map(indexBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, pvrrc.idx.head(), pvrrc.idx.bytes()); + deviceContext->Unmap(indexBuffer, 0); + + if (config::ModifierVolumes && pvrrc.modtrig.used()) + { + verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, pvrrc.modtrig.bytes())); + deviceContext->Map(modvolBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, pvrrc.modtrig.head(), pvrrc.modtrig.bytes()); + deviceContext->Unmap(modvolBuffer, 0); + } + unsigned int stride = sizeof(Vertex); + unsigned int offset = 0; + deviceContext->IASetVertexBuffers(0, 1, &vertexBuffer.get(), &stride, &offset); + deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0); + + updateFogTexture(); + updatePaletteTexture(); + + PixelConstants pixelConstants; + + // VERT and RAM fog color constants + u8* fog_colvert_bgra = (u8*)&FOG_COL_VERT; + u8* fog_colram_bgra = (u8*)&FOG_COL_RAM; + pixelConstants.fog_col_vert[0] = fog_colvert_bgra[2] / 255.0f; + pixelConstants.fog_col_vert[1] = fog_colvert_bgra[1] / 255.0f; + pixelConstants.fog_col_vert[2] = fog_colvert_bgra[0] / 255.0f; + pixelConstants.fog_col_ram[0] = fog_colram_bgra[2] / 255.0f; + pixelConstants.fog_col_ram[1] = fog_colram_bgra[1] / 255.0f; + pixelConstants.fog_col_ram[2] = fog_colram_bgra[0] / 255.0f; + + // Fog density and scale constants + pixelConstants.fogDensity = FOG_DENSITY.get() * config::ExtraDepthScale; + pixelConstants.fogScale = 1.f - FPU_SHAD_SCALE.scale_factor / 256.f; + + // Color clamping + pixelConstants.colorClampMin[0] = ((pvrrc.fog_clamp_min >> 16) & 0xFF) / 255.0f; + pixelConstants.colorClampMin[1] = ((pvrrc.fog_clamp_min >> 8) & 0xFF) / 255.0f; + pixelConstants.colorClampMin[2] = ((pvrrc.fog_clamp_min >> 0) & 0xFF) / 255.0f; + pixelConstants.colorClampMin[3] = ((pvrrc.fog_clamp_min >> 24) & 0xFF) / 255.0f; + + pixelConstants.colorClampMax[0] = ((pvrrc.fog_clamp_max >> 16) & 0xFF) / 255.0f; + pixelConstants.colorClampMax[1] = ((pvrrc.fog_clamp_max >> 8) & 0xFF) / 255.0f; + pixelConstants.colorClampMax[2] = ((pvrrc.fog_clamp_max >> 0) & 0xFF) / 255.0f; + pixelConstants.colorClampMax[3] = ((pvrrc.fog_clamp_max >> 24) & 0xFF) / 255.0f; + + // Punch-through alpha ref + pixelConstants.alphaTestValue = (PT_ALPHA_REF & 0xFF) / 255.0f; + + deviceContext->Map(pxlConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, &pixelConstants, sizeof(pixelConstants)); + deviceContext->Unmap(pxlConstants, 0); + + ID3D11Buffer *buffers[] { pxlConstants, pxlPolyConstants }; + deviceContext->PSSetConstantBuffers(0, ARRAY_SIZE(buffers), buffers); + + setBaseScissor(); + + drawStrips(); + } + else + { + renderDCFramebuffer(); + } + + if (is_rtt) + { + readRttRenderTarget(texAddress); + } + else + { + deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); + renderFramebuffer(); + DrawOSD(false); + frameRendered = true; + frameRenderedOnce = true; + theDX11Context.setFrameRendered(); + } + + return !is_rtt; +} + +void DX11Renderer::renderDCFramebuffer() +{ + FLOAT colors[4] = { VO_BORDER_COL.Red / 255.f, VO_BORDER_COL.Green / 255.f, VO_BORDER_COL.Blue / 255.f, 1.f }; + deviceContext->ClearRenderTargetView(fbRenderTarget, colors); + D3D11_VIEWPORT vp{}; + vp.Width = (FLOAT)width; + vp.Height = (FLOAT)height; + vp.MinDepth = 0.f; + vp.MaxDepth = 1.f; + deviceContext->RSSetViewports(1, &vp); + + float bar = (width - height * 640.f / 480.f) / 2.f; + quad->draw(dcfbTextureView, samplers.getSampler(true), bar / width * 2.f - 1.f, -1.f, (width - bar * 2.f) / width * 2.f, 2.f); +} + +void DX11Renderer::renderFramebuffer() +{ + D3D11_VIEWPORT vp{}; + vp.Width = (FLOAT)settings.display.width; + vp.Height = (FLOAT)settings.display.height; + vp.MinDepth = 0.f; + vp.MaxDepth = 1.f; + deviceContext->RSSetViewports(1, &vp); + + const D3D11_RECT r = { 0, 0, settings.display.width, settings.display.height }; + deviceContext->RSSetScissorRects(1, &r); + FLOAT colors[4] = { VO_BORDER_COL.Red / 255.f, VO_BORDER_COL.Green / 255.f, VO_BORDER_COL.Blue / 255.f, 1.f }; + deviceContext->ClearRenderTargetView(theDX11Context.getRenderTarget(), colors); + int outwidth = settings.display.width; + int outheight = settings.display.height; + if (config::Rotate90) + std::swap(outwidth, outheight); + float renderAR = (float)width / height; + float screenAR = (float)outwidth / outheight; + int dy = 0; + int dx = 0; + if (renderAR > screenAR) + dy = (int)roundf((outheight - outwidth / renderAR) / 2.f); + else + dx = (int)roundf((outwidth - outheight * renderAR) / 2.f); + + float x = 0, y = 0, w = outwidth, h = outheight; + if (dx != 0) + { + x = dx; + w = outwidth - 2 * dx; + } + else + { + y = dy; + h = outheight - 2 * dy; + } + // Normalize + x = x * 2.f / outwidth - 1.f; + w *= 2.f / outwidth; + y = y * 2.f / outheight - 1.f; + h *= 2.f / outheight; + quad->draw(fbTextureView, samplers.getSampler(true), x, y, w, h, config::Rotate90); +} + +void DX11Renderer::setCullMode(int mode) +{ + ComPtr rasterizer; + switch (mode) + { + case 0: + case 1: + default: + rasterizer = rasterCullNone; + break; + case 2: + rasterizer = rasterCullFront; + break; + case 3: + rasterizer = rasterCullBack; + break; + } + deviceContext->RSSetState(rasterizer); +} + +template +void DX11Renderer::setRenderState(const PolyParam *gp) +{ + PixelPolyConstants constants; + if (gp->pcw.Texture && gp->tsp.FilterMode > 1 && Type != ListType_Punch_Through && gp->tcw.MipMapped == 1) + { + constants.trilinearAlpha = 0.25f * (gp->tsp.MipMapD & 0x3); + if (gp->tsp.FilterMode == 2) + // Trilinear pass A + constants.trilinearAlpha = 1.f - constants.trilinearAlpha; + } + else + constants.trilinearAlpha = 1.f; + + bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff); + int fog_ctrl = config::Fog ? gp->tsp.FogCtrl : 2; + + int clip_rect[4] = {}; + TileClipping clipmode = GetTileClip(gp->tileclip, matrices.GetViewportMatrix(), clip_rect); + DX11Texture *texture = (DX11Texture *)gp->texture; + bool gpuPalette = texture != nullptr ? texture->gpuPalette : false; + + ComPtr vertexShader = shaders.getVertexShader(gp->pcw.Gouraud); + deviceContext->VSSetShader(vertexShader, nullptr, 0); + ComPtr pixelShader = shaders.getShader( + gp->pcw.Texture, + gp->tsp.UseAlpha, + gp->tsp.IgnoreTexA, + gp->tsp.ShadInstr, + gp->pcw.Offset, + fog_ctrl, + gp->tcw.PixelFmt == PixelBumpMap, + color_clamp, + constants.trilinearAlpha != 1.f, + gpuPalette, + gp->pcw.Gouraud, + Type == ListType_Punch_Through); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + if (gpuPalette) + { + if (gp->tcw.PixelFmt == PixelPal4) + constants.paletteIndex = (float)(gp->tcw.PalSelect << 4); + else + constants.paletteIndex = (float)((gp->tcw.PalSelect >> 4) << 8); + } + if (constants.trilinearAlpha != 1.f || gpuPalette) + { + D3D11_MAPPED_SUBRESOURCE mappedSubres; + deviceContext->Map(pxlPolyConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, &constants, sizeof(constants)); + deviceContext->Unmap(pxlPolyConstants, 0); + } + + /* TODO + if (clipmode == TileClipping::Inside) + { + float f[] = { clip_rect[0], clip_rect[1], clip_rect[0] + clip_rect[2], clip_rect[1] + clip_rect[3] }; + device->SetPixelShaderConstantF(n, f, 1); + } + else */ + if (clipmode == TileClipping::Outside) + { + RECT rect { clip_rect[0], clip_rect[1], clip_rect[0] + clip_rect[2], clip_rect[1] + clip_rect[3] }; + // TODO cache? + deviceContext->RSSetScissorRects(1, &rect); + } + else + { + deviceContext->RSSetScissorRects(1, &scissorRect); + } + + if (texture != nullptr) + { + deviceContext->PSSetShaderResources(0, 1, &texture->textureView.get()); + auto sampler = samplers.getSampler(gp->tsp.FilterMode != 0 && !gpuPalette, gp->tsp.ClampU, gp->tsp.ClampV, gp->tsp.FlipU, gp->tsp.FlipV); + deviceContext->PSSetSamplers(0, 1, &sampler.get()); + } + + const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; + // Apparently punch-through polys support blending, or at least some combinations + if (Type == ListType_Translucent || Type == ListType_Punch_Through) + deviceContext->OMSetBlendState(blendStates.getState(true, gp->tsp.SrcInstr, gp->tsp.DstInstr), blend_factor, 0xffffffff); + else + deviceContext->OMSetBlendState(blendStates.getState(false, gp->tsp.SrcInstr, gp->tsp.DstInstr), blend_factor, 0xffffffff); + + setCullMode(gp->isp.CullMode); + + //set Z mode, only if required + int zfunc; + if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled)) + zfunc = 6; // GEQ + else + zfunc = gp->isp.DepthMode; + + bool zwriteEnable; + if (SortingEnabled && !config::PerStripSorting) + zwriteEnable = false; + else + { + // Z Write Disable seems to be ignored for punch-through. + // Fixes Worms World Party, Bust-a-Move 4 and Re-Volt + if (Type == ListType_Punch_Through) + zwriteEnable = true; + else + zwriteEnable = !gp->isp.ZWriteDis; + } + const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0; + deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, config::ModifierVolumes), stencil); +} + +template +void DX11Renderer::drawList(const List& gply, int first, int count) +{ + deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + PolyParam* params = &gply.head()[first]; + + while (count-- > 0) + { + if (params->count > 2) + { + if ((Type == ListType_Opaque || (Type == ListType_Translucent && !SortingEnabled)) && params->isp.DepthMode == 0) + { + // depthFunc = never + params++; + continue; + } + setRenderState(params); + deviceContext->DrawIndexed(params->count, params->first, 0); + } + + params++; + } +} + +void DX11Renderer::sortTriangles(int first, int count) +{ + std::vector vidx_sort; + GenSorted(first, count, pidx_sort, vidx_sort); + + //Upload to GPU if needed + if (pidx_sort.empty()) + return; + + const u32 bufSize = vidx_sort.size() * sizeof(u32); + // Upload sorted index buffer + ensureBufferSize(sortedTriIndexBuffer, D3D11_BIND_INDEX_BUFFER, sortedTriIndexBufferSize, bufSize); + D3D11_MAPPED_SUBRESOURCE mappedSubres; + deviceContext->Map(sortedTriIndexBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, &vidx_sort[0], bufSize); + deviceContext->Unmap(sortedTriIndexBuffer, 0); + deviceContext->IASetIndexBuffer(sortedTriIndexBuffer, DXGI_FORMAT_R32_UINT, 0); +} + +void DX11Renderer::drawSorted(bool multipass) +{ + if (pidx_sort.empty()) + return; + + deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + u32 count = pidx_sort.size(); + + for (u32 p = 0; p < count; p++) + { + const PolyParam* params = pidx_sort[p].ppid; + if (pidx_sort[p].count > 2) + { + setRenderState(params); + deviceContext->DrawIndexed(pidx_sort[p].count, pidx_sort[p].first, 0); + } + } + if (multipass && config::TranslucentPolygonDepthMask) + { + // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) + const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; + deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), blend_factor, 0xffffffff); + + ComPtr vertexShader = shaders.getVertexShader(true); + deviceContext->VSSetShader(vertexShader, nullptr, 0); + ComPtr pixelShader = shaders.getShader( + false, + false, + false, + 0, + false, + 2, + false, + false, + false, + false, + true, + false); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + // Enable depth test, enable depth write, >=, disable stencil + deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, true, 6, false), 0); + deviceContext->RSSetScissorRects(1, &scissorRect); + + for (u32 p = 0; p < count; p++) + { + const PolyParam* params = pidx_sort[p].ppid; + if (pidx_sort[p].count > 2 && !params->isp.ZWriteDis) + { + setCullMode(params->isp.CullMode); + deviceContext->DrawIndexed(pidx_sort[p].count, pidx_sort[p].first, 0); + } + } + } + deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0); +} + +void DX11Renderer::drawModVols(int first, int count) +{ + if (count == 0 || pvrrc.modtrig.used() == 0 || !config::ModifierVolumes) + return; + + deviceContext->IASetInputLayout(modVolInputLayout); + unsigned int stride = 3 * sizeof(float); + unsigned int offset = 0; + deviceContext->IASetVertexBuffers(0, 1, &modvolBuffer.get(), &stride, &offset); + deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; + deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), blend_factor, 0xffffffff); + + deviceContext->VSSetShader(shaders.getMVVertexShader(), nullptr, 0); + deviceContext->PSSetShader(shaders.getModVolShader(), nullptr, 0); + + deviceContext->RSSetScissorRects(1, &scissorRect); + setCullMode(0); + + ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first]; + + int mod_base = -1; + + for (int cmv = 0; cmv < count; cmv++) + { + ModifierVolumeParam& param = params[cmv]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + if (mod_base == -1) + mod_base = param.first; + + if (!param.isp.VolumeLast && mv_mode > 0) + // OR'ing (open volume or quad) + deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(DepthStencilStates::Or), 2); + else + // XOR'ing (closed volume) + deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(DepthStencilStates::Xor), 0); + + deviceContext->Draw(param.count * 3, param.first * 3); + + if (mv_mode == 1 || mv_mode == 2) + { + // Sum the area + deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(mv_mode == 1 ? DepthStencilStates::Inclusion : DepthStencilStates::Exclusion), 1); + deviceContext->Draw((param.first + param.count - mod_base) * 3, mod_base * 3); + mod_base = -1; + } + } + //disable culling + setCullMode(0); + //enable color writes + deviceContext->OMSetBlendState(blendStates.getState(true, 4, 5), blend_factor, 0xffffffff); + + //black out any stencil with '1' + //only pixels that are Modvol enabled, and in area 1 + deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(DepthStencilStates::Final), 0x81); + + deviceContext->IASetInputLayout(mainInputLayout); + stride = sizeof(Vertex); + offset = 0; + deviceContext->IASetVertexBuffers(0, 1, &vertexBuffer.get(), &stride, &offset); + deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0); + deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + deviceContext->DrawIndexed(4, 0, 0); +} + +// Direct3D uses the color values of the first vertex for flat shaded triangle strips. +// On Dreamcast the last vertex is the provoking one so we must copy it onto the first. +// TODO refactor with Vk +// FIXME share with DX9 +void DX11Renderer::setProvokingVertices() +{ + auto setProvokingVertex = [](const List& list) { + u32 *idx_base = pvrrc.idx.head(); + Vertex *vtx_base = pvrrc.verts.head(); + const PolyParam *pp_end = list.LastPtr(0); + for (const PolyParam *pp = list.head(); pp != pp_end; pp++) + { + if (!pp->pcw.Gouraud && pp->count > 2) + { + for (u32 i = 0; i < pp->count - 2; i++) + { + Vertex *vertex = &vtx_base[idx_base[pp->first + i]]; + Vertex *lastVertex = &vtx_base[idx_base[pp->first + i + 2]]; + memcpy(vertex->col, lastVertex->col, 4); + memcpy(vertex->spc, lastVertex->spc, 4); + //memcpy(vertex->col1, lastVertex->col1, 4); + //memcpy(vertex->spc1, lastVertex->spc1, 4); + } + } + } + }; + setProvokingVertex(pvrrc.global_param_op); + setProvokingVertex(pvrrc.global_param_pt); + setProvokingVertex(pvrrc.global_param_tr); +} + +void DX11Renderer::drawStrips() +{ + RenderPass previous_pass {}; + for (int render_pass = 0; render_pass < pvrrc.render_passes.used(); render_pass++) + { + const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; + u32 op_count = current_pass.op_count - previous_pass.op_count; + u32 pt_count = current_pass.pt_count - previous_pass.pt_count; + u32 tr_count = current_pass.tr_count - previous_pass.tr_count; + u32 mvo_count = current_pass.mvo_count - previous_pass.mvo_count; + DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d", render_pass + 1, + op_count, pt_count, tr_count, mvo_count); + + drawList(pvrrc.global_param_op, previous_pass.op_count, op_count); + + drawList(pvrrc.global_param_pt, previous_pass.pt_count, pt_count); + + drawModVols(previous_pass.mvo_count, mvo_count); + + if (current_pass.autosort) + { + if (!config::PerStripSorting) + { + sortTriangles(previous_pass.tr_count, tr_count); + drawSorted(render_pass < pvrrc.render_passes.used() - 1); + } + else + { + SortPParams(previous_pass.tr_count, tr_count); + drawList(pvrrc.global_param_tr, previous_pass.tr_count, tr_count); + } + } + else + { + drawList(pvrrc.global_param_tr, previous_pass.tr_count, tr_count); + } + previous_pass = current_pass; + } +} + +bool DX11Renderer::RenderLastFrame() +{ + if (!frameRenderedOnce) + return false; + renderFramebuffer(); + return false; +} + +void DX11Renderer::readDCFramebuffer() +{ + if (FB_R_SIZE.fb_x_size == 0 || FB_R_SIZE.fb_y_size == 0) + return; + + PixelBuffer pb; + int width; + int height; + ReadFramebuffer(pb, width, height); + + //if (!dcfbTexture) + { + // FIXME dimension can change + dcfbTexture.reset(); + dcfbTextureView.reset(); + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = width; + desc.Height = height; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; // TODO correct? + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.MipLevels = 1; + + HRESULT hr = device->CreateTexture2D(&desc, nullptr, &dcfbTexture.get()); + if (FAILED(hr)) + WARN_LOG(RENDERER, "DC Framebuffer texture creation failed"); + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = desc.Format; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = 1; + hr = device->CreateShaderResourceView(dcfbTexture, &viewDesc, &dcfbTextureView.get()); + if (FAILED(hr)) + WARN_LOG(RENDERER, "DC Framebuffer texture view creation failed"); + } + + deviceContext->UpdateSubresource(dcfbTexture, 0, nullptr, pb.data(), width * sizeof(u32), width * sizeof(u32) * height); +} + +void DX11Renderer::setBaseScissor() +{ + bool wide_screen_on = !pvrrc.isRTT && config::Widescreen && !matrices.IsClipped() && !config::Rotate90; + if (!wide_screen_on) + { + float fWidth; + float fHeight; + float min_x; + float min_y; + if (!pvrrc.isRTT) + { + glm::vec4 clip_min(pvrrc.fb_X_CLIP.min, pvrrc.fb_Y_CLIP.min, 0, 1); + glm::vec4 clip_dim(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1, + pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1, 0, 0); + clip_min = matrices.GetScissorMatrix() * clip_min; + clip_dim = matrices.GetScissorMatrix() * clip_dim; + + min_x = clip_min[0]; + min_y = clip_min[1]; + fWidth = clip_dim[0]; + fHeight = clip_dim[1]; + if (fWidth < 0) + { + min_x += fWidth; + fWidth = -fWidth; + } + if (fHeight < 0) + { + min_y += fHeight; + fHeight = -fHeight; + } + if (matrices.GetSidebarWidth() > 0) + { + float scaled_offs_x = matrices.GetSidebarWidth(); + + float borderColor[] { 1.f, VO_BORDER_COL.Red / 255.f, VO_BORDER_COL.Green / 255.f, VO_BORDER_COL.Blue / 255.f }; +// TODO devCache.SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); +// D3DRECT rects[] { +// { 0, 0, lroundf(scaled_offs_x), (long)height }, +// { (long)(width - scaled_offs_x), 0, (long)(width + 1), (long)height }, +// }; +// device->Clear(2, rects, D3DCLEAR_TARGET, borderColor, 0.f, 0); + } + } + else + { + fWidth = (float)(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1); + fHeight = (float)(pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1); + min_x = (float)pvrrc.fb_X_CLIP.min; + min_y = (float)pvrrc.fb_Y_CLIP.min; + if (config::RenderResolution > 480 && !config::RenderToTextureBuffer) + { + min_x *= config::RenderResolution / 480.f; + min_y *= config::RenderResolution / 480.f; + fWidth *= config::RenderResolution / 480.f; + fHeight *= config::RenderResolution / 480.f; + } + } + scissorEnable = true; + scissorRect.left = lroundf(min_x); + scissorRect.top = lroundf(min_y); + scissorRect.right = scissorRect.left + lroundf(fWidth); + scissorRect.bottom = scissorRect.top + lroundf(fHeight); + } + else + { + scissorEnable = false; + scissorRect.left = 0; + scissorRect.top = 0; + scissorRect.right = width; + scissorRect.bottom = height; + } + deviceContext->RSSetScissorRects(1, &scissorRect); +} + +void DX11Renderer::prepareRttRenderTarget(u32 texAddress) +{ + u32 fbw = pvrrc.fb_X_CLIP.max + 1; + u32 fbh = pvrrc.fb_Y_CLIP.max + 1; + DEBUG_LOG(RENDERER, "RTT packmode=%d stride=%d - %d x %d @ %06x", + FB_W_CTRL.fb_packmode, FB_W_LINESTRIDE.stride * 8, fbw, fbh, texAddress); + // Find the smallest power of two texture that fits the viewport + u32 fbh2 = 2; + while (fbh2 < fbh) + fbh2 *= 2; + u32 fbw2 = 2; + while (fbw2 < fbw) + fbw2 *= 2; + if (!config::RenderToTextureBuffer) + { + fbw *= config::RenderResolution / 480.f; + fbh *= config::RenderResolution / 480.f; + fbw2 *= config::RenderResolution / 480.f; + fbh2 *= config::RenderResolution / 480.f; + } + createTexAndRenderTarget(rttTexture, rttRenderTarget, fbw2, fbh2); + createDepthTexAndView(rttDepthTex, rttDepthTexView, fbw2, fbh2); + deviceContext->OMSetRenderTargets(1, &rttRenderTarget.get(), rttDepthTexView); + + D3D11_VIEWPORT vp{}; + vp.Width = (FLOAT)fbw; + vp.Height = (FLOAT)fbh; + vp.MinDepth = 0.f; + vp.MaxDepth = 1.f; + deviceContext->RSSetViewports(1, &vp); +} + +void DX11Renderer::readRttRenderTarget(u32 texAddress) +{ + u32 w = pvrrc.fb_X_CLIP.max + 1; + u32 h = pvrrc.fb_Y_CLIP.max + 1; + const u8 fb_packmode = FB_W_CTRL.fb_packmode; + if (config::RenderToTextureBuffer) + { + D3D11_TEXTURE2D_DESC desc; + rttTexture->GetDesc(&desc); + desc.Usage = D3D11_USAGE_STAGING; + desc.BindFlags = 0; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + + ComPtr stagingTex; + HRESULT hr = device->CreateTexture2D(&desc, nullptr, &stagingTex.get()); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "Staging RTT texture creation failed"); + return; + } + deviceContext->CopyResource(stagingTex, rttTexture); + + PixelBuffer tmp_buf; + tmp_buf.init(w, h); + u8 *p = (u8 *)tmp_buf.data(); + + D3D11_MAPPED_SUBRESOURCE mappedSubres; + deviceContext->Map(stagingTex, 0, D3D11_MAP_READ, 0, &mappedSubres); + memcpy(p, mappedSubres.pData, w * h * sizeof(u32)); + deviceContext->Unmap(stagingTex, 0); + + u16 *dst = (u16 *)&vram[texAddress]; + WriteTextureToVRam<2, 1, 0, 3>(w, h, p, dst); + } + else + { + //memset(&vram[gl.rtt.texAddress], 0, size); + if (w <= 1024 && h <= 1024) + { + // TexAddr : (address), Reserved : 0, StrideSel : 0, ScanOrder : 1 + TCW tcw = { { texAddress >> 3, 0, 0, 1 } }; + switch (fb_packmode) { + case 0: + case 3: + tcw.PixelFmt = Pixel1555; + break; + case 1: + tcw.PixelFmt = Pixel565; + break; + case 2: + tcw.PixelFmt = Pixel4444; + break; + } + TSP tsp = { 0 }; + for (tsp.TexU = 0; tsp.TexU <= 7 && (8u << tsp.TexU) < w; tsp.TexU++) + ; + + for (tsp.TexV = 0; tsp.TexV <= 7 && (8u << tsp.TexV) < h; tsp.TexV++) + ; + + DX11Texture* texture = texCache.getTextureCacheData(tsp, tcw); + if (!texture->texture) + texture->Create(); + + texture->texture = rttTexture; + rttTexture.reset(); + rttRenderTarget.reset(); + texture->textureView.reset(); + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(texture->texture, &viewDesc, &texture->textureView.get()); + + texture->dirty = 0; + libCore_vramlock_Lock(texture->sa_tex, texture->sa + texture->size - 1, texture); + } + } +} + +void DX11Renderer::updatePaletteTexture() +{ + if (!palette_updated) + return; + palette_updated = false; + + deviceContext->UpdateSubresource(paletteTexture, 0, nullptr, palette32_ram, 32 * sizeof(u32), 32 * sizeof(u32) * 32); + + deviceContext->PSSetShaderResources(1, 1, &paletteTextureView.get()); + deviceContext->PSSetSamplers(1, 1, &samplers.getSampler(false).get()); +} + +void DX11Renderer::updateFogTexture() +{ + if (!fog_needs_update || !config::Fog) + return; + fog_needs_update = false; + u8 temp_tex_buffer[256]; + MakeFogTexture(temp_tex_buffer); + + deviceContext->UpdateSubresource(fogTexture, 0, nullptr, temp_tex_buffer, 128, 128 * 2); + + deviceContext->PSSetShaderResources(2, 1, &fogTextureView.get()); + deviceContext->PSSetSamplers(2, 1, &samplers.getSampler(true).get()); +} + +void DX11Renderer::DrawOSD(bool clear_screen) +{ + theDX11Context.setOverlay(!clear_screen); + gui_display_osd(); + theDX11Context.setOverlay(false); +} + +Renderer *rend_DirectX11() +{ + return new DX11Renderer(); +} diff --git a/core/rend/dx11/dx11_renderer.h b/core/rend/dx11/dx11_renderer.h new file mode 100644 index 000000000..f5dcc9915 --- /dev/null +++ b/core/rend/dx11/dx11_renderer.h @@ -0,0 +1,122 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "types.h" +#include "hw/pvr/Renderer_if.h" +#include +#include "dx11context.h" +#include "rend/transform_matrix.h" +#include "dx11_quad.h" +#include "dx11_texture.h" +#include "dx11_shaders.h" +#include "rend/sorter.h" +#include "dx11_renderstate.h" + +struct DX11Renderer : public Renderer +{ + bool Init() override; + void Resize(int w, int h) override; + void Term() override; + bool Process(TA_context* ctx) override; + bool Render() override; + + bool Present() override + { + if (!frameRendered) + return false; + frameRendered = false; + return true; + } + + bool RenderLastFrame() override; + void DrawOSD(bool clear_screen) override; + BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override; + +private: + void readDCFramebuffer(); + void renderDCFramebuffer(); + bool ensureBufferSize(ComPtr& buffer, D3D11_BIND_FLAG bind, u32& currentSize, u32 minSize); + void setProvokingVertices(); + void prepareRttRenderTarget(u32 texAddress); + void readRttRenderTarget(u32 texAddress); + void renderFramebuffer(); + void updateFogTexture(); + void updatePaletteTexture(); + void setBaseScissor(); + void drawStrips(); + template + void drawList(const List& gply, int first, int count); + template + void setRenderState(const PolyParam *gp); + void sortTriangles(int first, int count); + void drawSorted(bool multipass); + void drawModVols(int first, int count); + void setCullMode(int mode); + void createDepthTexAndView(ComPtr& depthTex, ComPtr& depthTexView, int width, int height); + void createTexAndRenderTarget(ComPtr& texture, ComPtr& renderTarget, int width, int height); + + ComPtr device; + ComPtr deviceContext; + ComPtr vertexBuffer; + u32 vertexBufferSize = 0; + ComPtr modvolBuffer; + u32 modvolBufferSize = 0; + ComPtr indexBuffer; + u32 indexBufferSize = 0; + ComPtr sortedTriIndexBuffer; + u32 sortedTriIndexBufferSize = 0; + + ComPtr fbTex; + ComPtr fbRenderTarget; + ComPtr fbTextureView; + ComPtr depthTex; + ComPtr depthTexView; + ComPtr dcfbTexture; + ComPtr dcfbTextureView; + ComPtr paletteTexture; + ComPtr paletteTextureView; + ComPtr fogTexture; + ComPtr fogTextureView; + ComPtr rttTexture; + ComPtr rttRenderTarget; + ComPtr rttDepthTex; + ComPtr rttDepthTexView; + + ComPtr rasterCullNone, rasterCullFront, rasterCullBack; + + u32 width = 0; + u32 height = 0; + TransformMatrix matrices; + DX11TextureCache texCache; + DX11Shaders shaders; + Samplers samplers; + DepthStencilStates depthStencilStates; + BlendStates blendStates; + std::vector pidx_sort; + std::unique_ptr quad; + ComPtr mainInputLayout; + ComPtr modVolInputLayout; + ComPtr vtxConstants; + ComPtr pxlConstants; + ComPtr pxlPolyConstants; + D3D11_RECT scissorRect{}; + bool scissorEnable = false; + bool frameRendered = false; + bool frameRenderedOnce = false; +}; diff --git a/core/rend/dx11/dx11_renderstate.h b/core/rend/dx11/dx11_renderstate.h new file mode 100644 index 000000000..f77f5b8a2 --- /dev/null +++ b/core/rend/dx11/dx11_renderstate.h @@ -0,0 +1,180 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "dx11context.h" +#include + +const D3D11_COMPARISON_FUNC Zfunction[] +{ + D3D11_COMPARISON_NEVER, //0 Never + D3D11_COMPARISON_LESS, //1 Less + D3D11_COMPARISON_EQUAL, //2 Equal + D3D11_COMPARISON_LESS_EQUAL, //3 Less Or Equal + D3D11_COMPARISON_GREATER, //4 Greater + D3D11_COMPARISON_NOT_EQUAL, //5 Not Equal + D3D11_COMPARISON_GREATER_EQUAL, //6 Greater Or Equal + D3D11_COMPARISON_ALWAYS, //7 Always +}; + +const D3D11_BLEND DestBlend[] +{ + D3D11_BLEND_ZERO, + D3D11_BLEND_ONE, + D3D11_BLEND_SRC_COLOR, + D3D11_BLEND_INV_SRC_COLOR, + D3D11_BLEND_SRC_ALPHA, + D3D11_BLEND_INV_SRC_ALPHA, + D3D11_BLEND_DEST_ALPHA, + D3D11_BLEND_INV_DEST_ALPHA +}; + +const D3D11_BLEND SrcBlend[] +{ + D3D11_BLEND_ZERO, + D3D11_BLEND_ONE, + D3D11_BLEND_DEST_COLOR, + D3D11_BLEND_INV_DEST_COLOR, + D3D11_BLEND_SRC_ALPHA, + D3D11_BLEND_INV_SRC_ALPHA, + D3D11_BLEND_DEST_ALPHA, + D3D11_BLEND_INV_DEST_ALPHA +}; + +class DepthStencilStates +{ +public: + enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, Final, Count }; + + ComPtr getState(bool depth, bool depthWrite, int depthFunc, bool stencil) + { + int hash = (depthFunc << 3) | depth | (depthWrite << 1) | (stencil << 2); + auto& state = states[hash]; + if (!state) + { + D3D11_DEPTH_STENCIL_DESC desc{}; + desc.DepthEnable = depth; + desc.DepthWriteMask = depthWrite ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; + desc.DepthFunc = Zfunction[depthFunc]; + desc.StencilEnable = stencil; + desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.BackFace = desc.FrontFace; + desc.StencilWriteMask = 0xFF; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + theDX11Context.getDevice()->CreateDepthStencilState(&desc, &state.get()); + } + return state; + } + + ComPtr getMVState(ModifierVolumeMode mode) + { + auto& state = mvStates[mode]; + if (!state) + { + D3D11_DEPTH_STENCIL_DESC desc{}; + desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + desc.DepthFunc = D3D11_COMPARISON_GREATER; + desc.StencilEnable = true; + desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + switch (mode) + { + case Xor: + desc.DepthEnable = true; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_INVERT; + desc.StencilWriteMask = 2; + break; + case Or: + desc.DepthEnable = true; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + desc.StencilWriteMask = 2; + break; + case Inclusion: + desc.FrontFace.StencilFunc = D3D11_COMPARISON_LESS_EQUAL; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_ZERO; + desc.StencilReadMask = 3; + desc.StencilWriteMask = 3; + break; + case Exclusion: + desc.FrontFace.StencilFunc = D3D11_COMPARISON_LESS_EQUAL; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_ZERO; + desc.StencilReadMask = 3; + desc.StencilWriteMask = 3; + break; + case Final: + desc.FrontFace.StencilFunc = D3D11_COMPARISON_EQUAL; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_ZERO; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_ZERO; + desc.StencilReadMask = 0x81; + desc.StencilWriteMask = 3; + break; + default: + break; + } + desc.BackFace = desc.FrontFace; + theDX11Context.getDevice()->CreateDepthStencilState(&desc, &state.get()); + } + return state; + } + + void term() + { + states.clear(); + for (auto& state : mvStates) + state.reset(); + } + +private: + std::unordered_map> states; + std::array, ModifierVolumeMode::Count> mvStates; +}; + +class BlendStates +{ +public: + ComPtr getState(bool enable, int srcBlend = 0, int destBlend = 0, bool disableWrite = false) + { + int hash = enable | (srcBlend << 1) | (destBlend << 4) | (disableWrite << 7); + auto& state = states[hash]; + if (!state) + { + D3D11_BLEND_DESC desc{}; + desc.RenderTarget[0].RenderTargetWriteMask = disableWrite ? 0 : D3D11_COLOR_WRITE_ENABLE_ALL; + desc.RenderTarget[0].BlendEnable = enable; + desc.RenderTarget[0].SrcBlend = SrcBlend[srcBlend]; + desc.RenderTarget[0].DestBlend = DestBlend[destBlend]; + desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_SRC_ALPHA; + desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + theDX11Context.getDevice()->CreateBlendState(&desc, &state.get()); + } + return state; + } + + void term() { + states.clear(); + } + +private: + std::unordered_map> states; +}; diff --git a/core/rend/dx11/dx11_shaders.cpp b/core/rend/dx11/dx11_shaders.cpp new file mode 100644 index 000000000..30e0a7a8c --- /dev/null +++ b/core/rend/dx11/dx11_shaders.cpp @@ -0,0 +1,497 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#include "dx11_shaders.h" + +const char * const VertexShader = R"( +struct VertexIn +{ + float4 pos : POSITION; + float4 col : COLOR0; + float4 spec : COLOR1; + float2 uv : TEXCOORD0; +}; + +struct VertexOut +{ + float4 pos : SV_POSITION; + float4 uv : TEXCOORD0; + float4 col : COLOR0; + float4 spec : COLOR1; +}; + +cbuffer constantBuffer : register(b0) +{ + float4x4 transMatrix; +}; + +VertexOut main(in VertexIn vin) +{ + VertexOut vo; + vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f)); +#if pp_Gouraud == 1 + vo.col = vin.col * vo.pos.z; + vo.spec = vin.spec * vo.pos.z; +#else + // flat shading: no interpolation + vo.col = vin.col; + vo.spec = vin.spec; +#endif + vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z); + + vo.pos.w = 1.f; + vo.pos.z = 0.f; + + return vo; +} + +)"; + +const char * const ModVolVertexShader = R"( +struct VertexIn +{ + float4 pos : POSITION; +}; + +struct VertexOut +{ + float4 pos : SV_POSITION; + float4 uv : TEXCOORD0; +}; + +cbuffer constantBuffer : register(b0) +{ + float4x4 transMatrix; +}; + +VertexOut main(in VertexIn vin) +{ + VertexOut vo; + vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f)); + vo.uv = float4(0.f, 0.f, 0.f, vo.pos.z); + + vo.pos.w = 1.f; + vo.pos.z = 0.f; + + return vo; +} + +)"; + +const char * const PixelShader = R"( + +#define PI 3.1415926f + +struct Pixel +{ + float4 pos : SV_POSITION; + float4 uv : TEXCOORD0; + float4 col : COLOR0; + float4 spec : COLOR1; +}; + +Texture2D texture0 : register(t0); +sampler sampler0 : register(s0); + +Texture2D paletteTexture : register(t1); +sampler paletteSampler : register(s1); + +Texture2D fogTexture : register(t2); +sampler fogSampler : register(s2); + +cbuffer constantBuffer : register(b0) +{ + float4 colorClampMin; + float4 colorClampMax; + float4 FOG_COL_VERT; + float4 FOG_COL_RAM; + float fogDensity; + float fogScale; + float alphaTestValue; +}; + +cbuffer polyConstantBuffer : register(b1) +{ + float paletteIndex; + float trilinearAlpha; +}; + +float fog_mode2(float w) +{ + float z = clamp(w * fogDensity, 1.0f, 255.9999f); + float exp = floor(log2(z)); + float m = z * 16.0f / pow(2.0, exp) - 16.0f; + float idx = floor(m) + exp * 16.0f + 0.5f; + float4 fogCoef = fogTexture.Sample(fogSampler, float2(idx / 128.0f, 0.75f - (m - floor(m)) / 2.0f)); + return fogCoef.a; +} + +float4 clampColor(float4 color) +{ +#if FogClamping == 1 + return clamp(color, colorClampMin, colorClampMax); +#else + return color; +#endif +} + +#if pp_Palette == 1 + +float4 palettePixel(float2 coords) +{ + int colorIdx = int(floor(texture0.Sample(sampler0, coords).a * 255.0f + 0.5f) + paletteIndex.x); + float2 c = float2((fmod(float(colorIdx), 32.0f) * 2.0f + 1.0f) / 64.0f, (float(colorIdx / 32) * 2.0f + 1.0f) / 64.0f); + return paletteTexture.Sample(paletteSampler, c); +} + +#endif + +struct PSO +{ + float4 col : SV_TARGET; + float z : SV_DEPTH; +}; + +PSO main(in Pixel inpix) +{ +#if pp_Gouraud == 1 + float4 color = inpix.col / inpix.uv.w; + #if pp_BumpMap == 1 || pp_Offset == 1 + float4 specular = inpix.spec / inpix.uv.w; + #endif +#else + float4 color = inpix.col; + #if pp_BumpMap == 1 || pp_Offset == 1 + float4 specular = inpix.spec; + #endif +#endif + #if pp_UseAlpha == 0 + color.a = 1.0f; + #endif + #if pp_FogCtrl == 3 + color = float4(FOG_COL_RAM.rgb, fog_mode2(inpix.uv.w)); + #endif + #if pp_Texture == 1 + { + float2 uv = inpix.uv.xy / inpix.uv.w; + #if pp_Palette == 0 + float4 texcol = texture0.Sample(sampler0, uv); + #else + float4 texcol = palettePixel(uv); + #endif + + #if pp_BumpMap == 1 + float s = PI / 2.0f * (texcol.a * 15.0f * 16.0f + texcol.r * 15.0f) / 255.0f; + float r = 2.0f * PI * (texcol.g * 15.0f * 16.0f + texcol.b * 15.0f) / 255.0f; + texcol[3] = clamp(specular.a + specular.r * sin(s) + specular.g * cos(s) * cos(r - 2.0f * PI * specular.b), 0.0f, 1.0f); + texcol.rgb = float3(1.0f, 1.0f, 1.0f); + #else + #if pp_IgnoreTexA == 1 + texcol.a = 1.0f; + #endif + #if cp_AlphaTest == 1 + if (alphaTestValue > texcol.a) + discard; + texcol.a = 1.0f; + #endif + #endif + #if pp_ShadInstr == 0 + color = texcol; + #endif + #if pp_ShadInstr == 1 + color.rgb *= texcol.rgb; + color.a = texcol.a; + #endif + #if pp_ShadInstr == 2 + color.rgb = lerp(color.rgb, texcol.rgb, texcol.a); + #endif + #if pp_ShadInstr == 3 + color *= texcol; + #endif + + #if pp_Offset == 1 && pp_BumpMap == 0 + color.rgb += specular.rgb; + #endif + } + #endif + + color = clampColor(color); + + #if pp_FogCtrl == 0 + color.rgb = lerp(color.rgb, FOG_COL_RAM.rgb, fog_mode2(inpix.uv.w)); + #endif + #if pp_FogCtrl == 1 && pp_Offset == 1 && pp_BumpMap == 0 + color.rgb = lerp(color.rgb, FOG_COL_VERT.rgb, specular.a); + #endif + + #if pp_TriLinear == 1 + color *= trilinearAlpha; + #endif + + PSO pso; + float w = inpix.uv.w * 100000.0f; + pso.z = log2(1.0f + w) / 34.0f; + pso.col = color; + + return pso; +} + +struct MVPixel +{ + float4 pos : SV_POSITION; + float4 uv : TEXCOORD0; +}; + +PSO modifierVolume(in MVPixel inpix) +{ + PSO pso; + float w = inpix.uv.w * 100000.0f; + pso.z = log2(1.0f + w) / 34.0f; + pso.col = float4(0, 0, 0, fogScale); + + return pso; +} +)"; + +const char * const QuadVertexShader = R"( +struct VertexIn +{ + float2 pos : POSITION; + float2 uv : TEXCOORD0; +}; + +struct VertexOut +{ + float4 pos : SV_POSITION; + float2 uv : TEXCOORD0; +}; + +VertexOut main(in VertexIn vin) +{ + VertexOut vo; +#if ROTATE == 0 + vo.pos = float4(vin.pos, 0.f, 1.f); +#else + vo.pos = float4(vin.pos.y, -vin.pos.x, 0.f, 1.f); +#endif + vo.uv = vin.uv; + + return vo; +} +)"; + +const char * const QuadPixelShader = R"( +struct VertexIn +{ + float4 pos : SV_POSITION; + float2 uv : TEXCOORD0; +}; + +sampler sampler0; +Texture2D texture0; + +float4 main(in VertexIn vin) : SV_Target +{ + return texture0.Sample(sampler0, vin.uv); +} + +)"; + +const char * const MacroValues[] { "0", "1", "2", "3" }; + +static D3D_SHADER_MACRO VertexMacros[] +{ + { "pp_Gouraud", "1" }, + { nullptr, nullptr } +}; + +enum PixelMacroEnum { + MacroGouraud, + MacroTexture, + MacroUseAlpha, + MacroIgnoreTexA, + MacroShadInstr, + MacroOffset, + MacroFogCtrl, + MacroBumpMap, + MacroFogClamping, + MacroTriLinear, + MacroPalette, + MacroAlphaTest +}; + +static D3D_SHADER_MACRO PixelMacros[] +{ + { "pp_Gouraud", "1" }, + { "pp_Texture", "0" }, + { "pp_UseAlpha", "0" }, + { "pp_IgnoreTexA", "0" }, + { "pp_ShadInstr", "0" }, + { "pp_Offset", "0" }, + { "pp_FogCtrl", "0" }, + { "pp_BumpMap", "0" }, + { "FogClamping", "0" }, + { "pp_TriLinear", "0" }, + { "pp_Palette", "0" }, + { "cp_AlphaTest", "0" }, + { nullptr, nullptr } +}; + +const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, + bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, + bool trilinear, bool palette, bool gouraud, bool alphaTest) +{ + const u32 hash = (int)pp_Texture + | (pp_UseAlpha << 1) + | (pp_IgnoreTexA << 2) + | (pp_ShadInstr << 3) + | (pp_Offset << 5) + | (pp_FogCtrl << 6) + | (pp_BumpMap << 8) + | (fog_clamping << 9) + | (trilinear << 10) + | (palette << 11) + | (gouraud << 12) + | (alphaTest << 13); + auto& shader = shaders[hash]; + if (shader == nullptr) + { + verify(pp_ShadInstr < ARRAY_SIZE(MacroValues)); + verify(pp_FogCtrl < ARRAY_SIZE(MacroValues)); + PixelMacros[MacroGouraud].Definition = MacroValues[gouraud]; + PixelMacros[MacroTexture].Definition = MacroValues[pp_Texture]; + PixelMacros[MacroUseAlpha].Definition = MacroValues[pp_UseAlpha]; + PixelMacros[MacroIgnoreTexA].Definition = MacroValues[pp_IgnoreTexA]; + PixelMacros[MacroShadInstr].Definition = MacroValues[pp_ShadInstr]; + PixelMacros[MacroOffset].Definition = MacroValues[pp_Offset]; + PixelMacros[MacroFogCtrl].Definition = MacroValues[pp_FogCtrl]; + PixelMacros[MacroBumpMap].Definition = MacroValues[pp_BumpMap]; + PixelMacros[MacroFogClamping].Definition = MacroValues[fog_clamping]; + PixelMacros[MacroTriLinear].Definition = MacroValues[trilinear]; + PixelMacros[MacroPalette].Definition = MacroValues[palette]; + PixelMacros[MacroAlphaTest].Definition = MacroValues[alphaTest]; + + shader = compilePS(PixelShader, "main", PixelMacros); + verify(shader != nullptr); + } + return shader; +} + +const ComPtr& DX11Shaders::getVertexShader(bool gouraud) +{ + ComPtr& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader; + if (!vertexShader) + { + VertexMacros[0].Definition = MacroValues[gouraud]; + vertexShader = compileVS(VertexShader, "main", VertexMacros); + } + + return vertexShader; +} + +const ComPtr& DX11Shaders::getMVVertexShader() +{ + if (!modVolVertexShader) + modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr); + + return modVolVertexShader; +} + +const ComPtr& DX11Shaders::getModVolShader() +{ + if (!modVolShader) + modVolShader = compilePS(PixelShader, "modifierVolume", PixelMacros); + + return modVolShader; +} + +const ComPtr& DX11Shaders::getQuadVertexShader(bool rotate) +{ + ComPtr& shader = rotate ? quadRotateVertexShader : quadVertexShader; + if (!shader) + { + D3D_SHADER_MACRO macros[] + { + { "ROTATE", rotate ? "1" : "0" }, + { nullptr, nullptr } + }; + shader = compileVS(QuadVertexShader, "main", macros); + } + + return shader; +} + +const ComPtr& DX11Shaders::getQuadPixelShader() +{ + if (!quadPixelShader) + quadPixelShader = compilePS(QuadPixelShader, "main", nullptr); + + return quadPixelShader; +} + +ComPtr DX11Shaders::compileShader(const char* source, const char* function, const char* profile, const D3D_SHADER_MACRO *pDefines) +{ + ComPtr shaderBlob; + ComPtr errorBlob; + if (FAILED(D3DCompile(source, strlen(source), nullptr, pDefines, nullptr, function, profile, 0, 0, &shaderBlob.get(), &errorBlob.get()))) + ERROR_LOG(RENDERER, "Shader compilation failed: %s", errorBlob->GetBufferPointer()); + + return shaderBlob; +} + +ComPtr DX11Shaders::compileVS(const char* source, const char* function, const D3D_SHADER_MACRO *pDefines) +{ + ComPtr blob = compileShader(source, function, "vs_4_0", pDefines); + ComPtr shader; + if (blob) + { + if (FAILED(device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, &shader.get()))) + ERROR_LOG(RENDERER, "Vertex shader creation failed"); + } + + return shader; +} + +ComPtr DX11Shaders::compilePS(const char* source, const char* function, const D3D_SHADER_MACRO *pDefines) +{ + ComPtr blob = compileShader(source, function, "ps_4_0", pDefines); + ComPtr shader; + if (blob) + { + if (device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, &shader.get()) != S_OK) + ERROR_LOG(RENDERER, "Pixel shader creation failed"); + } + + return shader; +} + +ComPtr DX11Shaders::getVertexShaderBlob() +{ + VertexMacros[0].Definition = MacroValues[0]; + return compileShader(VertexShader, "main", "vs_4_0", VertexMacros); +} + +ComPtr DX11Shaders::getMVVertexShaderBlob() +{ + return compileShader(ModVolVertexShader, "main", "vs_4_0", nullptr); +} + +ComPtr DX11Shaders::getQuadVertexShaderBlob() +{ + return compileShader(QuadVertexShader, "main", "vs_4_0", nullptr); +} diff --git a/core/rend/dx11/dx11_shaders.h b/core/rend/dx11/dx11_shaders.h new file mode 100644 index 000000000..2e03e4ee8 --- /dev/null +++ b/core/rend/dx11/dx11_shaders.h @@ -0,0 +1,71 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include +#include "dx11context.h" +#include + +class DX11Shaders +{ +public: + void init(const ComPtr& device) + { + this->device = device; + } + + const ComPtr& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, + bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool trilinear, bool palette, bool gouraud, + bool alphaTest); + const ComPtr& getVertexShader(bool gouraud); + const ComPtr& getModVolShader(); + const ComPtr& getMVVertexShader(); + const ComPtr& getQuadPixelShader(); + const ComPtr& getQuadVertexShader(bool rotate); + + void term() + { + shaders.clear(); + gouraudVertexShader.reset(); + flatVertexShader.reset(); + modVolShader.reset(); + modVolVertexShader.reset(); + quadVertexShader.reset(); + quadRotateVertexShader.reset(); + quadPixelShader.reset(); + device.reset(); + } + ComPtr getVertexShaderBlob(); + ComPtr getMVVertexShaderBlob(); + ComPtr getQuadVertexShaderBlob(); + +private: + ComPtr compileShader(const char *source, const char* function, const char* profile, const D3D_SHADER_MACRO *pDefines); + ComPtr compileVS(const char *source, const char* function, const D3D_SHADER_MACRO *pDefines); + ComPtr compilePS(const char *source, const char* function, const D3D_SHADER_MACRO *pDefines); + + ComPtr device; + std::unordered_map> shaders; + ComPtr gouraudVertexShader; + ComPtr flatVertexShader; + ComPtr modVolShader; + ComPtr modVolVertexShader; + ComPtr quadPixelShader; + ComPtr quadVertexShader; + ComPtr quadRotateVertexShader; +}; diff --git a/core/rend/dx11/dx11_texture.cpp b/core/rend/dx11/dx11_texture.cpp new file mode 100644 index 000000000..4b1e66a20 --- /dev/null +++ b/core/rend/dx11/dx11_texture.cpp @@ -0,0 +1,120 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#include "dx11_texture.h" + +void DX11Texture::UploadToGPU(int width, int height, u8* temp_tex_buffer, bool mipmapped, bool mipmapsIncluded) +{ + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = width; + desc.Height = height; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + u32 bpp = 2; + switch (tex_type) + { + case TextureType::_5551: + desc.Format = DXGI_FORMAT_B5G5R5A1_UNORM; + break; + case TextureType::_4444: + desc.Format = DXGI_FORMAT_B4G4R4A4_UNORM; + break; + case TextureType::_565: + desc.Format = DXGI_FORMAT_B5G6R5_UNORM; + break; + case TextureType::_8888: + bpp = 4; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + break; + case TextureType::_8: + bpp = 1; + desc.Format = DXGI_FORMAT_A8_UNORM; + break; + default: + return; + } + int mipmapLevels = 1; + if (mipmapsIncluded) + { + mipmapLevels = 0; + int dim = width; + while (dim != 0) + { + mipmapLevels++; + dim >>= 1; + } + } + desc.MipLevels = mipmapLevels; + + if (texture == nullptr) + { + if (mipmapped && !mipmapsIncluded) + { + desc.MipLevels = 0; + desc.MiscFlags = D3D11_RESOURCE_MISC_GENERATE_MIPS; + desc.BindFlags |= D3D11_BIND_RENDER_TARGET; + mipmapLevels = 1; + } + if (SUCCEEDED(theDX11Context.getDevice()->CreateTexture2D(&desc, nullptr, &texture.get()))) + { + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = desc.Format; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = desc.MipLevels == 0 ? -1 : desc.MipLevels; + theDX11Context.getDevice()->CreateShaderResourceView(texture, &viewDesc, &textureView.get()); + } + verify(texture != nullptr); + verify(textureView != nullptr); + } + + for (int i = 0; i < mipmapLevels; i++) + { + u32 w = mipmapLevels == 1 ? width : 1 << i; + u32 h = mipmapLevels == 1 ? height : 1 << i; + theDX11Context.getDeviceContext()->UpdateSubresource(texture, mipmapLevels - i - 1, nullptr, temp_tex_buffer, w * bpp, w * bpp * h); + temp_tex_buffer += (1 << (2 * i)) * bpp; + } + if (mipmapped && !mipmapsIncluded) + theDX11Context.getDeviceContext()->GenerateMips(textureView); +} + +bool DX11Texture::Delete() +{ + if (!BaseTextureCacheData::Delete()) + return false; + + textureView.reset(); + texture.reset(); + return true; +} + +void DX11Texture::loadCustomTexture() +{ + u32 size = custom_width * custom_height; + u8 *p = custom_image_data; + while (size--) + { + // RGBA -> BGRA + std::swap(p[0], p[2]); + p += 4; + } + CheckCustomTexture(); +} diff --git a/core/rend/dx11/dx11_texture.h b/core/rend/dx11/dx11_texture.h new file mode 100644 index 000000000..5c0c94e7c --- /dev/null +++ b/core/rend/dx11/dx11_texture.h @@ -0,0 +1,87 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "rend/TexCache.h" +#include +#include "dx11context.h" +#include + +class DX11Texture final : public BaseTextureCacheData +{ +public: + ComPtr texture; + ComPtr textureView; + + std::string GetId() override { return std::to_string((uintptr_t)texture.get()); } + void UploadToGPU(int width, int height, u8* temp_tex_buffer, bool mipmapped, + bool mipmapsIncluded = false) override; + bool Delete() override; + void loadCustomTexture(); +}; + +class DX11TextureCache final : public BaseTextureCache +{ +public: + DX11TextureCache() { + DX11Texture::SetDirectXColorOrder(true); + } + ~DX11TextureCache() { + Clear(); + } + void Cleanup() + { + texturesToDelete.clear(); + CollectCleanup(); + } + void DeleteLater(ComPtr tex) { texturesToDelete.push_back(tex); } + +private: + std::vector> texturesToDelete; +}; + +class Samplers +{ +public: + ComPtr getSampler(bool linear, bool clampU = true, bool clampV = true, bool flipU = false, bool flipV = false) + { + int hash = clampU | (clampV << 1) | (flipU << 2) | (flipV << 3) | (linear << 4); + auto& sampler = samplers[hash]; + if (!sampler) + { + // Create texture sampler + D3D11_SAMPLER_DESC desc{}; + desc.Filter = linear ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_POINT; + desc.AddressU = flipU ? D3D11_TEXTURE_ADDRESS_MIRROR : clampU ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP; + desc.AddressV = flipV ? D3D11_TEXTURE_ADDRESS_MIRROR : clampV ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP; + desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + desc.ComparisonFunc = D3D11_COMPARISON_NEVER; + desc.MaxAnisotropy = 1; + desc.MaxLOD = D3D11_FLOAT32_MAX; + theDX11Context.getDevice()->CreateSamplerState(&desc, &sampler.get()); + } + return sampler; + } + + void term() { + samplers.clear(); + } + +private: + std::unordered_map> samplers; +}; diff --git a/core/rend/dx11/dx11context.cpp b/core/rend/dx11/dx11context.cpp new file mode 100644 index 000000000..acd9e4d54 --- /dev/null +++ b/core/rend/dx11/dx11context.cpp @@ -0,0 +1,294 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#include "dx11context.h" +#include "rend/gui.h" +#include "rend/osd.h" +#ifdef USE_SDL +#include "sdl/sdl.h" +#endif +#include "hw/pvr/Renderer_if.h" +#include "emulator.h" +#include "dx11_driver.h" +#ifdef TARGET_UWP +#include +#include +#endif + +DX11Context theDX11Context; + +bool DX11Context::init(bool keepCurrentWindow) +{ + NOTICE_LOG(RENDERER, "DX11 Context initializing"); + GraphicsContext::instance = this; +#ifdef USE_SDL + if (!keepCurrentWindow && !sdl_recreate_window(0)) + return false; +#endif +#ifdef TARGET_UWP + GAMING_DEVICE_MODEL_INFORMATION info {}; + GetGamingDeviceModelInformation(&info); + if (info.vendorId == GAMING_DEVICE_VENDOR_ID_MICROSOFT) + { + switch (info.deviceId) + { + case GAMING_DEVICE_DEVICE_ID_XBOX_ONE: + case GAMING_DEVICE_DEVICE_ID_XBOX_ONE_S: + NOTICE_LOG(RENDERER, "XBox One [S] detected. Setting resolution to 1920x1080."); + settings.display.width = 1920; + settings.display.height = 1080; + break; + + case GAMING_DEVICE_DEVICE_ID_XBOX_ONE_X: + case GAMING_DEVICE_DEVICE_ID_XBOX_ONE_X_DEVKIT: + default: + NOTICE_LOG(RENDERER, "XBox One X detected. Setting resolution to 3840x2160."); + settings.display.width = 3840; + settings.display.height = 2160; + break; + } + } +#endif + + D3D_FEATURE_LEVEL featureLevels[] = + { + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + }; + D3D11CreateDevice( + nullptr, // Specify nullptr to use the default adapter. + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + D3D11_CREATE_DEVICE_BGRA_SUPPORT, // | D3D11_CREATE_DEVICE_DEBUG, // FIXME + featureLevels, + ARRAYSIZE(featureLevels), + D3D11_SDK_VERSION, // UWP apps must set this to D3D11_SDK_VERSION. + &pDevice.get(), + nullptr, + &pDeviceContext.get()); + + ComPtr dxgiDevice; + pDevice.as(dxgiDevice); + + ComPtr dxgiAdapter; + dxgiDevice->GetAdapter(&dxgiAdapter.get()); + DXGI_ADAPTER_DESC desc; + dxgiAdapter->GetDesc(&desc); + nowide::stackstring wdesc; + wdesc.convert(desc.Description); + adapterDesc = wdesc.c_str(); + adapterVersion = std::to_string(desc.Revision); + + ComPtr dxgiFactory; + dxgiAdapter->GetParent(__uuidof(IDXGIFactory1), (void **)&dxgiFactory.get()); + + ComPtr dxgiFactory2; + dxgiFactory.as(dxgiFactory2); + HRESULT hr; + + if (dxgiFactory2) + { + // DX 11.1 + DXGI_SWAP_CHAIN_DESC1 desc{}; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + desc.BufferCount = 2; + desc.SampleDesc.Count = 1; + desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + +#ifdef TARGET_UWP + desc.Width = settings.display.width; + desc.Height = settings.display.height; + hr = dxgiFactory2->CreateSwapChainForCoreWindow(pDevice, (IUnknown *)window, &desc, nullptr, &swapchain1.get()); +#else + hr = dxgiFactory2->CreateSwapChainForHwnd(pDevice, (HWND)window, &desc, nullptr, nullptr, &swapchain1.get()); +#endif + if (SUCCEEDED(hr)) + swapchain1.as(swapchain); + } + else + { + // DX 11.0 + swapchain1.reset(); +#ifdef TARGET_UWP + return false; +#endif + DXGI_SWAP_CHAIN_DESC desc{}; + desc.BufferCount = 2; + desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.BufferDesc.RefreshRate.Numerator = 60; + desc.BufferDesc.RefreshRate.Denominator = 1; + desc.OutputWindow = (HWND)window; + desc.Windowed = TRUE; + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + desc.BufferCount = 2; + desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + + hr = dxgiFactory->CreateSwapChain(pDevice, &desc, &swapchain.get()); + } + if (FAILED(hr)) + return false; + + imguiDriver = std::unique_ptr(new DX11Driver()); + resize(); + gui_init(); +// TODO overlay.init(pDevice); + return ImGui_ImplDX11_Init(pDevice, pDeviceContext); +} + +void DX11Context::term() +{ + NOTICE_LOG(RENDERER, "DX11 Context terminating"); + GraphicsContext::instance = nullptr; + ID3D11RenderTargetView* views[1] {}; + pDeviceContext->OMSetRenderTargets(ARRAY_SIZE(views), views, nullptr); +//TODO overlay.term(); + imguiDriver.reset(); + ImGui_ImplDX11_Shutdown(); + gui_term(); + renderTargetView.reset(); + swapchain1.reset(); + swapchain.reset(); + pDeviceContext.reset(); + pDevice.reset(); +} + +void DX11Context::Present() +{ + if (!frameRendered) + return; + frameRendered = false; + bool swapOnVSync = !settings.input.fastForwardMode && config::VSync; + HRESULT hr = swapchain->Present(swapOnVSync ? 1 : 0, 0); + if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) + { + WARN_LOG(RENDERER, "Present failed: device removed/reset"); + handleDeviceLost(); + } + else if (FAILED(hr)) + WARN_LOG(RENDERER, "Present failed %x", hr); +} + +void DX11Context::EndImGuiFrame() +{ + verify((bool)pDevice); + if (!overlayOnly) + { + pDeviceContext->OMSetRenderTargets(1, &renderTargetView.get(), nullptr); + const FLOAT black[4] { 0.f, 0.f, 0.f, 1.f }; + pDeviceContext->ClearRenderTargetView(renderTargetView, black); + if (renderer != nullptr) + renderer->RenderLastFrame(); + } +// if (overlayOnly) +// { +// if (crosshairsNeeded() || config::FloatVMUs) +// overlay.draw(settings.display.width, settings.display.height, config::FloatVMUs, true); +// } +// else +// { +// overlay.draw(settings.display.width, settings.display.height, true, false); +// } + ImGui_ImplDX11_RenderDrawData(ImGui::GetDrawData()); + frameRendered = true; +} + +void DX11Context::resize() +{ + if (!pDevice) + return; + if (swapchain) + { + BOOL fullscreen; + swapchain->GetFullscreenState(&fullscreen, nullptr); + NOTICE_LOG(RENDERER, "DX11Context::resize: current display is %d x %d fullscreen %d", settings.display.width, settings.display.height, fullscreen); + ID3D11RenderTargetView* views[1] {}; + pDeviceContext->OMSetRenderTargets(ARRAY_SIZE(views), views, nullptr); + renderTargetView.reset(); +#ifdef TARGET_UWP + // FIXME how to get correct width/height? + HRESULT hr = swapchain->ResizeBuffers(2, settings.display.width, settings.display.height, DXGI_FORMAT_R8G8B8A8_UNORM, 0); +#else + DXGI_SWAP_CHAIN_DESC swapchainDesc; + swapchain->GetDesc(&swapchainDesc); + NOTICE_LOG(RENDERER, "current swapchain desc: %d x %d windowed %d", swapchainDesc.BufferDesc.Width, swapchainDesc.BufferDesc.Height, swapchainDesc.Windowed); + + HRESULT hr = swapchain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH); + if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) + { + handleDeviceLost(); + return; + } +#endif + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "ResizeBuffers failed"); + return; + } + + // Create a render target view + ComPtr backBuffer; + hr = swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void **)&backBuffer.get()); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "swapChain->GetBuffer() failed"); + return; + } + + hr = pDevice->CreateRenderTargetView(backBuffer, nullptr, &renderTargetView.get()); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "CreateRenderTargetView failed"); + return; + } + pDeviceContext->OMSetRenderTargets(1, &renderTargetView.get(), nullptr); + + if (swapchain1) + { + DXGI_SWAP_CHAIN_DESC1 desc; + swapchain1->GetDesc1(&desc); +#ifndef TARGET_UWP + settings.display.width = desc.Width; + settings.display.height = desc.Height; +#endif + NOTICE_LOG(RENDERER, "swapchain desc: %d x %d", desc.Width, desc.Height); + } + else + { + DXGI_SWAP_CHAIN_DESC desc; + swapchain->GetDesc(&desc); + settings.display.width = desc.BufferDesc.Width; + settings.display.height = desc.BufferDesc.Height; + } + } + // TODO minimized window +} + +void DX11Context::handleDeviceLost() +{ + rend_term_renderer(); + term(); + init(true); + rend_init_renderer(); + rend_resize_renderer(); +} diff --git a/core/rend/dx11/dx11context.h b/core/rend/dx11/dx11context.h new file mode 100644 index 000000000..2de92bcd5 --- /dev/null +++ b/core/rend/dx11/dx11context.h @@ -0,0 +1,70 @@ +/* + Copyright 2021 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "build.h" +#if defined(_WIN32) +#include "types.h" +#include +#include +#include +#include +#include "imgui_impl_dx11.h" +#include "../dx9/comptr.h" +// TODO #include "d3d_overlay.h" +#include "wsi/context.h" + +class DX11Context : public GraphicsContext +{ +public: + bool init(bool keepCurrentWindow = false); + void term() override; + void EndImGuiFrame(); + void Present(); + const ComPtr& getDevice() const { return pDevice; } + const ComPtr& getDeviceContext() const { return pDeviceContext; } + ComPtr& getRenderTarget() { return renderTargetView; } + void resize() override; + void setOverlay(bool overlayOnly) { this->overlayOnly = overlayOnly; } + std::string getDriverName() override { + return adapterDesc; + } + std::string getDriverVersion() override { + return adapterVersion; + } + void setFrameRendered() { + frameRendered = true; + } + +private: + void handleDeviceLost(); + + ComPtr pDevice; + ComPtr pDeviceContext; + ComPtr swapchain; + ComPtr swapchain1; + ComPtr renderTargetView; + bool overlayOnly = false; + // TODO D3DOverlay overlay; + bool swapOnVSync = false; + bool frameRendered = false; + std::string adapterDesc; + std::string adapterVersion; +}; +extern DX11Context theDX11Context; +#endif diff --git a/core/rend/dx11/imgui_impl_dx11.cpp b/core/rend/dx11/imgui_impl_dx11.cpp new file mode 100644 index 000000000..ff6418c35 --- /dev/null +++ b/core/rend/dx11/imgui_impl_dx11.cpp @@ -0,0 +1,552 @@ +// dear imgui: Renderer Backend for DirectX11 +// This needs to be used along with a Platform Backend (e.g. Win32) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'ID3D11ShaderResourceView*' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. + +// You can copy and use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. +// Read online: https://github.com/ocornut/imgui/tree/master/docs + +// CHANGELOG +// (minor and older changes stripped away, please see git history for details) +// 2021-02-18: DirectX11: Change blending equation to preserve alpha in output buffer. +// 2019-08-01: DirectX11: Fixed code querying the Geometry Shader state (would generally error with Debug layer enabled). +// 2019-07-21: DirectX11: Backup, clear and restore Geometry Shader is any is bound when calling ImGui_ImplDX10_RenderDrawData. Clearing Hull/Domain/Compute shaders without backup/restore. +// 2019-05-29: DirectX11: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. +// 2019-04-30: DirectX11: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. +// 2018-12-03: Misc: Added #pragma comment statement to automatically link with d3dcompiler.lib when using D3DCompile(). +// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. +// 2018-08-01: DirectX11: Querying for IDXGIFactory instead of IDXGIFactory1 to increase compatibility. +// 2018-07-13: DirectX11: Fixed unreleased resources in Init and Shutdown functions. +// 2018-06-08: Misc: Extracted imgui_impl_dx11.cpp/.h away from the old combined DX11+Win32 example. +// 2018-06-08: DirectX11: Use draw_data->DisplayPos and draw_data->DisplaySize to setup projection matrix and clipping rectangle. +// 2018-02-16: Misc: Obsoleted the io.RenderDrawListsFn callback and exposed ImGui_ImplDX11_RenderDrawData() in the .h file so you can call it yourself. +// 2018-02-06: Misc: Removed call to ImGui::Shutdown() which is not available from 1.60 WIP, user needs to call CreateContext/DestroyContext themselves. +// 2016-05-07: DirectX11: Disabling depth-write. + +#include "imgui/imgui.h" +#include "imgui_impl_dx11.h" + +// DirectX +#include +#include +#include +#ifdef _MSC_VER +#pragma comment(lib, "d3dcompiler") // Automatically link with d3dcompiler.lib as we are using D3DCompile() below. +#endif + +// DirectX data +static ID3D11Device* g_pd3dDevice = NULL; +static ID3D11DeviceContext* g_pd3dDeviceContext = NULL; +static IDXGIFactory* g_pFactory = NULL; +static ID3D11Buffer* g_pVB = NULL; +static ID3D11Buffer* g_pIB = NULL; +static ID3D11VertexShader* g_pVertexShader = NULL; +static ID3D11InputLayout* g_pInputLayout = NULL; +static ID3D11Buffer* g_pVertexConstantBuffer = NULL; +static ID3D11PixelShader* g_pPixelShader = NULL; +ID3D11SamplerState* g_pFontSampler = NULL; +ID3D11ShaderResourceView*g_pFontTextureView = NULL; +static ID3D11RasterizerState* g_pRasterizerState = NULL; +static ID3D11BlendState* g_pBlendState = NULL; +static ID3D11DepthStencilState* g_pDepthStencilState = NULL; +static int g_VertexBufferSize = 5000, g_IndexBufferSize = 10000; + +struct VERTEX_CONSTANT_BUFFER +{ + float mvp[4][4]; +}; + +static void ImGui_ImplDX11_SetupRenderState(ImDrawData* draw_data, ID3D11DeviceContext* ctx) +{ + // Setup viewport + D3D11_VIEWPORT vp; + memset(&vp, 0, sizeof(D3D11_VIEWPORT)); + vp.Width = draw_data->DisplaySize.x; + vp.Height = draw_data->DisplaySize.y; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + vp.TopLeftX = vp.TopLeftY = 0; + ctx->RSSetViewports(1, &vp); + + // Setup shader and vertex buffers + unsigned int stride = sizeof(ImDrawVert); + unsigned int offset = 0; + ctx->IASetInputLayout(g_pInputLayout); + ctx->IASetVertexBuffers(0, 1, &g_pVB, &stride, &offset); + ctx->IASetIndexBuffer(g_pIB, sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT, 0); + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ctx->VSSetShader(g_pVertexShader, NULL, 0); + ctx->VSSetConstantBuffers(0, 1, &g_pVertexConstantBuffer); + ctx->PSSetShader(g_pPixelShader, NULL, 0); + ctx->PSSetSamplers(0, 1, &g_pFontSampler); + ctx->GSSetShader(NULL, NULL, 0); + ctx->HSSetShader(NULL, NULL, 0); // In theory we should backup and restore this as well.. very infrequently used.. + ctx->DSSetShader(NULL, NULL, 0); // In theory we should backup and restore this as well.. very infrequently used.. + ctx->CSSetShader(NULL, NULL, 0); // In theory we should backup and restore this as well.. very infrequently used.. + + // Setup blend state + const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; + ctx->OMSetBlendState(g_pBlendState, blend_factor, 0xffffffff); + ctx->OMSetDepthStencilState(g_pDepthStencilState, 0); + ctx->RSSetState(g_pRasterizerState); +} + +// Render function +void ImGui_ImplDX11_RenderDrawData(ImDrawData* draw_data) +{ + // Avoid rendering when minimized + if (draw_data->DisplaySize.x <= 0.0f || draw_data->DisplaySize.y <= 0.0f) + return; + + ID3D11DeviceContext* ctx = g_pd3dDeviceContext; + + // Create and grow vertex/index buffers if needed + if (!g_pVB || g_VertexBufferSize < draw_data->TotalVtxCount) + { + if (g_pVB) { g_pVB->Release(); g_pVB = NULL; } + g_VertexBufferSize = draw_data->TotalVtxCount + 5000; + D3D11_BUFFER_DESC desc; + memset(&desc, 0, sizeof(D3D11_BUFFER_DESC)); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.ByteWidth = g_VertexBufferSize * sizeof(ImDrawVert); + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.MiscFlags = 0; + if (g_pd3dDevice->CreateBuffer(&desc, NULL, &g_pVB) < 0) + return; + } + if (!g_pIB || g_IndexBufferSize < draw_data->TotalIdxCount) + { + if (g_pIB) { g_pIB->Release(); g_pIB = NULL; } + g_IndexBufferSize = draw_data->TotalIdxCount + 10000; + D3D11_BUFFER_DESC desc; + memset(&desc, 0, sizeof(D3D11_BUFFER_DESC)); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.ByteWidth = g_IndexBufferSize * sizeof(ImDrawIdx); + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + if (g_pd3dDevice->CreateBuffer(&desc, NULL, &g_pIB) < 0) + return; + } + + // Upload vertex/index data into a single contiguous GPU buffer + D3D11_MAPPED_SUBRESOURCE vtx_resource, idx_resource; + if (ctx->Map(g_pVB, 0, D3D11_MAP_WRITE_DISCARD, 0, &vtx_resource) != S_OK) + return; + if (ctx->Map(g_pIB, 0, D3D11_MAP_WRITE_DISCARD, 0, &idx_resource) != S_OK) + return; + ImDrawVert* vtx_dst = (ImDrawVert*)vtx_resource.pData; + ImDrawIdx* idx_dst = (ImDrawIdx*)idx_resource.pData; + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); + memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); + vtx_dst += cmd_list->VtxBuffer.Size; + idx_dst += cmd_list->IdxBuffer.Size; + } + ctx->Unmap(g_pVB, 0); + ctx->Unmap(g_pIB, 0); + + // Setup orthographic projection matrix into our constant buffer + // Our visible imgui space lies from draw_data->DisplayPos (top left) to draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. + { + D3D11_MAPPED_SUBRESOURCE mapped_resource; + if (ctx->Map(g_pVertexConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_resource) != S_OK) + return; + VERTEX_CONSTANT_BUFFER* constant_buffer = (VERTEX_CONSTANT_BUFFER*)mapped_resource.pData; + float L = draw_data->DisplayPos.x; + float R = draw_data->DisplayPos.x + draw_data->DisplaySize.x; + float T = draw_data->DisplayPos.y; + float B = draw_data->DisplayPos.y + draw_data->DisplaySize.y; + float mvp[4][4] = + { + { 2.0f/(R-L), 0.0f, 0.0f, 0.0f }, + { 0.0f, 2.0f/(T-B), 0.0f, 0.0f }, + { 0.0f, 0.0f, 0.5f, 0.0f }, + { (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f }, + }; + memcpy(&constant_buffer->mvp, mvp, sizeof(mvp)); + ctx->Unmap(g_pVertexConstantBuffer, 0); + } + + // Backup DX state that will be modified to restore it afterwards (unfortunately this is very ugly looking and verbose. Close your eyes!) + struct BACKUP_DX11_STATE + { + UINT ScissorRectsCount, ViewportsCount; + D3D11_RECT ScissorRects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + D3D11_VIEWPORT Viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + ID3D11RasterizerState* RS; + ID3D11BlendState* BlendState; + FLOAT BlendFactor[4]; + UINT SampleMask; + UINT StencilRef; + ID3D11DepthStencilState* DepthStencilState; + ID3D11ShaderResourceView* PSShaderResource; + ID3D11SamplerState* PSSampler; + ID3D11PixelShader* PS; + ID3D11VertexShader* VS; + ID3D11GeometryShader* GS; + UINT PSInstancesCount, VSInstancesCount, GSInstancesCount; + ID3D11ClassInstance *PSInstances[256], *VSInstances[256], *GSInstances[256]; // 256 is max according to PSSetShader documentation + D3D11_PRIMITIVE_TOPOLOGY PrimitiveTopology; + ID3D11Buffer* IndexBuffer, *VertexBuffer, *VSConstantBuffer; + UINT IndexBufferOffset, VertexBufferStride, VertexBufferOffset; + DXGI_FORMAT IndexBufferFormat; + ID3D11InputLayout* InputLayout; + }; + BACKUP_DX11_STATE old = {}; + old.ScissorRectsCount = old.ViewportsCount = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + ctx->RSGetScissorRects(&old.ScissorRectsCount, old.ScissorRects); + ctx->RSGetViewports(&old.ViewportsCount, old.Viewports); + ctx->RSGetState(&old.RS); + ctx->OMGetBlendState(&old.BlendState, old.BlendFactor, &old.SampleMask); + ctx->OMGetDepthStencilState(&old.DepthStencilState, &old.StencilRef); + ctx->PSGetShaderResources(0, 1, &old.PSShaderResource); + ctx->PSGetSamplers(0, 1, &old.PSSampler); + old.PSInstancesCount = old.VSInstancesCount = old.GSInstancesCount = 256; + ctx->PSGetShader(&old.PS, old.PSInstances, &old.PSInstancesCount); + ctx->VSGetShader(&old.VS, old.VSInstances, &old.VSInstancesCount); + ctx->VSGetConstantBuffers(0, 1, &old.VSConstantBuffer); + ctx->GSGetShader(&old.GS, old.GSInstances, &old.GSInstancesCount); + + ctx->IAGetPrimitiveTopology(&old.PrimitiveTopology); + ctx->IAGetIndexBuffer(&old.IndexBuffer, &old.IndexBufferFormat, &old.IndexBufferOffset); + ctx->IAGetVertexBuffers(0, 1, &old.VertexBuffer, &old.VertexBufferStride, &old.VertexBufferOffset); + ctx->IAGetInputLayout(&old.InputLayout); + + // Setup desired DX state + ImGui_ImplDX11_SetupRenderState(draw_data, ctx); + + // Render command lists + // (Because we merged all buffers into a single one, we maintain our own offset into them) + int global_idx_offset = 0; + int global_vtx_offset = 0; + ImVec2 clip_off = draw_data->DisplayPos; + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback != NULL) + { + // User callback, registered via ImDrawList::AddCallback() + // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) + if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) + ImGui_ImplDX11_SetupRenderState(draw_data, ctx); + else + pcmd->UserCallback(cmd_list, pcmd); + } + else + { + // Apply scissor/clipping rectangle + const D3D11_RECT r = { (LONG)(pcmd->ClipRect.x - clip_off.x), (LONG)(pcmd->ClipRect.y - clip_off.y), (LONG)(pcmd->ClipRect.z - clip_off.x), (LONG)(pcmd->ClipRect.w - clip_off.y) }; + ctx->RSSetScissorRects(1, &r); + + // Bind texture, Draw + ID3D11ShaderResourceView* texture_srv = (ID3D11ShaderResourceView*)pcmd->TextureId; + ctx->PSSetShaderResources(0, 1, &texture_srv); + ctx->DrawIndexed(pcmd->ElemCount, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset); + } + } + global_idx_offset += cmd_list->IdxBuffer.Size; + global_vtx_offset += cmd_list->VtxBuffer.Size; + } + + // Restore modified DX state + ctx->RSSetScissorRects(old.ScissorRectsCount, old.ScissorRects); + ctx->RSSetViewports(old.ViewportsCount, old.Viewports); + ctx->RSSetState(old.RS); if (old.RS) old.RS->Release(); + ctx->OMSetBlendState(old.BlendState, old.BlendFactor, old.SampleMask); if (old.BlendState) old.BlendState->Release(); + ctx->OMSetDepthStencilState(old.DepthStencilState, old.StencilRef); if (old.DepthStencilState) old.DepthStencilState->Release(); + ctx->PSSetShaderResources(0, 1, &old.PSShaderResource); if (old.PSShaderResource) old.PSShaderResource->Release(); + ctx->PSSetSamplers(0, 1, &old.PSSampler); if (old.PSSampler) old.PSSampler->Release(); + ctx->PSSetShader(old.PS, old.PSInstances, old.PSInstancesCount); if (old.PS) old.PS->Release(); + for (UINT i = 0; i < old.PSInstancesCount; i++) if (old.PSInstances[i]) old.PSInstances[i]->Release(); + ctx->VSSetShader(old.VS, old.VSInstances, old.VSInstancesCount); if (old.VS) old.VS->Release(); + ctx->VSSetConstantBuffers(0, 1, &old.VSConstantBuffer); if (old.VSConstantBuffer) old.VSConstantBuffer->Release(); + ctx->GSSetShader(old.GS, old.GSInstances, old.GSInstancesCount); if (old.GS) old.GS->Release(); + for (UINT i = 0; i < old.VSInstancesCount; i++) if (old.VSInstances[i]) old.VSInstances[i]->Release(); + ctx->IASetPrimitiveTopology(old.PrimitiveTopology); + ctx->IASetIndexBuffer(old.IndexBuffer, old.IndexBufferFormat, old.IndexBufferOffset); if (old.IndexBuffer) old.IndexBuffer->Release(); + ctx->IASetVertexBuffers(0, 1, &old.VertexBuffer, &old.VertexBufferStride, &old.VertexBufferOffset); if (old.VertexBuffer) old.VertexBuffer->Release(); + ctx->IASetInputLayout(old.InputLayout); if (old.InputLayout) old.InputLayout->Release(); +} + +static void ImGui_ImplDX11_CreateFontsTexture() +{ + // Build texture atlas + ImGuiIO& io = ImGui::GetIO(); + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + + // Upload texture to graphics system + { + D3D11_TEXTURE2D_DESC desc; + ZeroMemory(&desc, sizeof(desc)); + desc.Width = width; + desc.Height = height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.CPUAccessFlags = 0; + + ID3D11Texture2D* pTexture = NULL; + D3D11_SUBRESOURCE_DATA subResource; + subResource.pSysMem = pixels; + subResource.SysMemPitch = desc.Width * 4; + subResource.SysMemSlicePitch = 0; + g_pd3dDevice->CreateTexture2D(&desc, &subResource, &pTexture); + IM_ASSERT(pTexture != NULL); + + // Create texture view + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; + ZeroMemory(&srvDesc, sizeof(srvDesc)); + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = desc.MipLevels; + srvDesc.Texture2D.MostDetailedMip = 0; + g_pd3dDevice->CreateShaderResourceView(pTexture, &srvDesc, &g_pFontTextureView); + pTexture->Release(); + } + + // Store our identifier + io.Fonts->SetTexID((ImTextureID)g_pFontTextureView); + + // Create texture sampler + { + D3D11_SAMPLER_DESC desc; + ZeroMemory(&desc, sizeof(desc)); + desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; + desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + desc.MipLODBias = 0.f; + desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + desc.MinLOD = 0.f; + desc.MaxLOD = 0.f; + g_pd3dDevice->CreateSamplerState(&desc, &g_pFontSampler); + } +} + +bool ImGui_ImplDX11_CreateDeviceObjects() +{ + if (!g_pd3dDevice) + return false; + if (g_pFontSampler) + ImGui_ImplDX11_InvalidateDeviceObjects(); + + // By using D3DCompile() from / d3dcompiler.lib, we introduce a dependency to a given version of d3dcompiler_XX.dll (see D3DCOMPILER_DLL_A) + // If you would like to use this DX11 sample code but remove this dependency you can: + // 1) compile once, save the compiled shader blobs into a file or source code and pass them to CreateVertexShader()/CreatePixelShader() [preferred solution] + // 2) use code to detect any version of the DLL and grab a pointer to D3DCompile from the DLL. + // See https://github.com/ocornut/imgui/pull/638 for sources and details. + + // Create the vertex shader + { + static const char* vertexShader = + "cbuffer vertexBuffer : register(b0) \ + {\ + float4x4 ProjectionMatrix; \ + };\ + struct VS_INPUT\ + {\ + float2 pos : POSITION;\ + float4 col : COLOR0;\ + float2 uv : TEXCOORD0;\ + };\ + \ + struct PS_INPUT\ + {\ + float4 pos : SV_POSITION;\ + float4 col : COLOR0;\ + float2 uv : TEXCOORD0;\ + };\ + \ + PS_INPUT main(VS_INPUT input)\ + {\ + PS_INPUT output;\ + output.pos = mul( ProjectionMatrix, float4(input.pos.xy, 0.f, 1.f));\ + output.col = input.col;\ + output.uv = input.uv;\ + return output;\ + }"; + + ID3DBlob* vertexShaderBlob; + if (FAILED(D3DCompile(vertexShader, strlen(vertexShader), NULL, NULL, NULL, "main", "vs_4_0", 0, 0, &vertexShaderBlob, NULL))) + return false; // NB: Pass ID3DBlob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! + if (g_pd3dDevice->CreateVertexShader(vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize(), NULL, &g_pVertexShader) != S_OK) + { + vertexShaderBlob->Release(); + return false; + } + + // Create the input layout + D3D11_INPUT_ELEMENT_DESC local_layout[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + if (g_pd3dDevice->CreateInputLayout(local_layout, 3, vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize(), &g_pInputLayout) != S_OK) + { + vertexShaderBlob->Release(); + return false; + } + vertexShaderBlob->Release(); + + // Create the constant buffer + { + D3D11_BUFFER_DESC desc; + desc.ByteWidth = sizeof(VERTEX_CONSTANT_BUFFER); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.MiscFlags = 0; + g_pd3dDevice->CreateBuffer(&desc, NULL, &g_pVertexConstantBuffer); + } + } + + // Create the pixel shader + { + static const char* pixelShader = + "struct PS_INPUT\ + {\ + float4 pos : SV_POSITION;\ + float4 col : COLOR0;\ + float2 uv : TEXCOORD0;\ + };\ + sampler sampler0;\ + Texture2D texture0;\ + \ + float4 main(PS_INPUT input) : SV_Target\ + {\ + float4 out_col = input.col * texture0.Sample(sampler0, input.uv); \ + return out_col; \ + }"; + + ID3DBlob* pixelShaderBlob; + if (FAILED(D3DCompile(pixelShader, strlen(pixelShader), NULL, NULL, NULL, "main", "ps_4_0", 0, 0, &pixelShaderBlob, NULL))) + return false; // NB: Pass ID3DBlob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! + if (g_pd3dDevice->CreatePixelShader(pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize(), NULL, &g_pPixelShader) != S_OK) + { + pixelShaderBlob->Release(); + return false; + } + pixelShaderBlob->Release(); + } + + // Create the blending setup + { + D3D11_BLEND_DESC desc; + ZeroMemory(&desc, sizeof(desc)); + desc.AlphaToCoverageEnable = false; + desc.RenderTarget[0].BlendEnable = true; + desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; + desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + g_pd3dDevice->CreateBlendState(&desc, &g_pBlendState); + } + + // Create the rasterizer state + { + D3D11_RASTERIZER_DESC desc; + ZeroMemory(&desc, sizeof(desc)); + desc.FillMode = D3D11_FILL_SOLID; + desc.CullMode = D3D11_CULL_NONE; + desc.ScissorEnable = true; + desc.DepthClipEnable = true; + g_pd3dDevice->CreateRasterizerState(&desc, &g_pRasterizerState); + } + + // Create depth-stencil State + { + D3D11_DEPTH_STENCIL_DESC desc; + ZeroMemory(&desc, sizeof(desc)); + desc.DepthEnable = false; + desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; + desc.DepthFunc = D3D11_COMPARISON_ALWAYS; + desc.StencilEnable = false; + desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.BackFace = desc.FrontFace; + g_pd3dDevice->CreateDepthStencilState(&desc, &g_pDepthStencilState); + } + + ImGui_ImplDX11_CreateFontsTexture(); + + return true; +} + +void ImGui_ImplDX11_InvalidateDeviceObjects() +{ + if (!g_pd3dDevice) + return; + + if (g_pFontSampler) { g_pFontSampler->Release(); g_pFontSampler = NULL; } + if (g_pFontTextureView) { g_pFontTextureView->Release(); g_pFontTextureView = NULL; ImGui::GetIO().Fonts->SetTexID(NULL); } // We copied g_pFontTextureView to io.Fonts->TexID so let's clear that as well. + if (g_pIB) { g_pIB->Release(); g_pIB = NULL; } + if (g_pVB) { g_pVB->Release(); g_pVB = NULL; } + + if (g_pBlendState) { g_pBlendState->Release(); g_pBlendState = NULL; } + if (g_pDepthStencilState) { g_pDepthStencilState->Release(); g_pDepthStencilState = NULL; } + if (g_pRasterizerState) { g_pRasterizerState->Release(); g_pRasterizerState = NULL; } + if (g_pPixelShader) { g_pPixelShader->Release(); g_pPixelShader = NULL; } + if (g_pVertexConstantBuffer) { g_pVertexConstantBuffer->Release(); g_pVertexConstantBuffer = NULL; } + if (g_pInputLayout) { g_pInputLayout->Release(); g_pInputLayout = NULL; } + if (g_pVertexShader) { g_pVertexShader->Release(); g_pVertexShader = NULL; } +} + +bool ImGui_ImplDX11_Init(ID3D11Device* device, ID3D11DeviceContext* device_context) +{ + // Setup backend capabilities flags + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = "imgui_impl_dx11"; + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + + // Get factory from device + IDXGIDevice* pDXGIDevice = NULL; + IDXGIAdapter* pDXGIAdapter = NULL; + IDXGIFactory* pFactory = NULL; + + if (device->QueryInterface(IID_PPV_ARGS(&pDXGIDevice)) == S_OK) + if (pDXGIDevice->GetParent(IID_PPV_ARGS(&pDXGIAdapter)) == S_OK) + if (pDXGIAdapter->GetParent(IID_PPV_ARGS(&pFactory)) == S_OK) + { + g_pd3dDevice = device; + g_pd3dDeviceContext = device_context; + g_pFactory = pFactory; + } + if (pDXGIDevice) pDXGIDevice->Release(); + if (pDXGIAdapter) pDXGIAdapter->Release(); + g_pd3dDevice->AddRef(); + g_pd3dDeviceContext->AddRef(); + + return true; +} + +void ImGui_ImplDX11_Shutdown() +{ + ImGui_ImplDX11_InvalidateDeviceObjects(); + if (g_pFactory) { g_pFactory->Release(); g_pFactory = NULL; } + if (g_pd3dDevice) { g_pd3dDevice->Release(); g_pd3dDevice = NULL; } + if (g_pd3dDeviceContext) { g_pd3dDeviceContext->Release(); g_pd3dDeviceContext = NULL; } +} + +void ImGui_ImplDX11_NewFrame() +{ + if (!g_pFontSampler) + ImGui_ImplDX11_CreateDeviceObjects(); +} diff --git a/core/rend/dx11/imgui_impl_dx11.h b/core/rend/dx11/imgui_impl_dx11.h new file mode 100644 index 000000000..51df4611f --- /dev/null +++ b/core/rend/dx11/imgui_impl_dx11.h @@ -0,0 +1,26 @@ +// dear imgui: Renderer Backend for DirectX11 +// This needs to be used along with a Platform Backend (e.g. Win32) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'ID3D11ShaderResourceView*' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. + +// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. +// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. +// Read online: https://github.com/ocornut/imgui/tree/master/docs + +#pragma once +#include "imgui/imgui.h" // IMGUI_IMPL_API + +struct ID3D11Device; +struct ID3D11DeviceContext; + +IMGUI_IMPL_API bool ImGui_ImplDX11_Init(ID3D11Device* device, ID3D11DeviceContext* device_context); +IMGUI_IMPL_API void ImGui_ImplDX11_Shutdown(); +IMGUI_IMPL_API void ImGui_ImplDX11_NewFrame(); +IMGUI_IMPL_API void ImGui_ImplDX11_RenderDrawData(ImDrawData* draw_data); + +// Use if you want to reset your rendering device without losing Dear ImGui state. +IMGUI_IMPL_API void ImGui_ImplDX11_InvalidateDeviceObjects(); +IMGUI_IMPL_API bool ImGui_ImplDX11_CreateDeviceObjects(); diff --git a/core/rend/dx9/CMakeLists.txt b/core/rend/dx9/CMakeLists.txt new file mode 100644 index 000000000..a338b22d7 --- /dev/null +++ b/core/rend/dx9/CMakeLists.txt @@ -0,0 +1,44 @@ +project(dx9renderer) + +add_library(${PROJECT_NAME} STATIC dxcontext.cpp) + +target_compile_definitions(${PROJECT_NAME} PRIVATE + $<$:_CRT_NONSTDC_NO_WARNINGS> + $<$:_CRT_SECURE_NO_WARNINGS> + $<$:_USE_MATH_DEFINES> + $<$:NOMINMAX> + $<$:TEST_AUTOMATION> + $<$:DEBUGFAST> + USE_SDL) + +target_sources(${PROJECT_NAME} PRIVATE + comptr.h + d3d_overlay.h + d3d_overlay.cpp + d3d_renderer.h + d3d_renderer.cpp + d3d_shaders.h + d3d_shaders.cpp + d3d_texture.h + d3d_texture.cpp + dx9_driver.h + dxcontext.h + imgui_impl_dx9.h + imgui_impl_dx9.cpp) + +target_include_directories(${PROJECT_NAME} PRIVATE + ../.. + ../../deps + ../../deps/nowide/include + ../../deps/glm + ../../deps/SDL/include) +if(NOT MINGW) + target_include_directories(${PROJECT_NAME} PRIVATE "$ENV{DXSDK_DIR}/Include") + if (CMAKE_SIZEOF_VOID_P EQUAL 8) + target_link_directories(${PROJECT_NAME} PUBLIC "$ENV{DXSDK_DIR}/Lib/x64") + else() + target_link_directories(${PROJECT_NAME} PUBLIC "$ENV{DXSDK_DIR}/Lib/x86") + endif() +endif() + +target_link_libraries(${PROJECT_NAME} PUBLIC d3d9 d3dx9) diff --git a/core/rend/dx9/comptr.h b/core/rend/dx9/comptr.h index aa59262e6..b01787cf4 100644 --- a/core/rend/dx9/comptr.h +++ b/core/rend/dx9/comptr.h @@ -66,6 +66,11 @@ public: ptr->Release(); } + template + HRESULT as(ComPtr& p) const { + return ptr->QueryInterface(IID_PPV_ARGS(&p.get())); + } + private: T *ptr = nullptr; }; diff --git a/core/rend/dx9/d3d_renderer.cpp b/core/rend/dx9/d3d_renderer.cpp index 2b306fba2..76c1ec45e 100644 --- a/core/rend/dx9/d3d_renderer.cpp +++ b/core/rend/dx9/d3d_renderer.cpp @@ -642,6 +642,7 @@ void D3DRenderer::drawModVols(int first, int count) device->SetVertexDeclaration(modVolVtxDecl); device->SetStreamSource(0, modvolBuffer, 0, 3 * sizeof(float)); + devCache.SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATER); devCache.SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE); devCache.SetRenderState(D3DRS_STENCILENABLE, TRUE); devCache.SetRenderState(D3DRS_ZWRITEENABLE, D3DZB_FALSE); @@ -1066,10 +1067,7 @@ bool D3DRenderer::Render() device->SetPixelShaderConstantF(2, ps_FOG_COL_RAM, 1); // Fog density and scale constants - u8* fog_density = (u8*)&FOG_DENSITY; - float fog_den_mant = fog_density[1] / 128.0f; //bit 7 -> x. bit, so [6:0] -> fraction -> /128 - s32 fog_den_exp = (s8)fog_density[0]; - float fog_den_float = fog_den_mant * powf(2.0f, (float)fog_den_exp) * config::ExtraDepthScale; + float fog_den_float = FOG_DENSITY.get() * config::ExtraDepthScale; float fogDensityAndScale[4]= { fog_den_float, 1.f - FPU_SHAD_SCALE.scale_factor / 256.f, 0, 1 }; device->SetPixelShaderConstantF(3, fogDensityAndScale, 1); diff --git a/core/rend/dx9/d3d_shaders.cpp b/core/rend/dx9/d3d_shaders.cpp index e21bde07f..cfda172bc 100644 --- a/core/rend/dx9/d3d_shaders.cpp +++ b/core/rend/dx9/d3d_shaders.cpp @@ -20,7 +20,7 @@ #define SHADER_DEBUG 0 // D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION -const char *VertexShader = R"( +const char * const VertexShader = R"( struct vertex_in { float4 pos : POSITION; @@ -61,7 +61,7 @@ vertex_out main(in vertex_in vin) )"; -const char *PixelShader = R"( +const char * const PixelShader = R"( #define PI 3.1415926f @@ -69,7 +69,7 @@ struct pixel { float4 uv : TEXCOORD0; float4 col : COLOR0; -#if pp_Texture == 1 && (pp_BumpMap == 1 || pp_Offset == 1) +#if pp_BumpMap == 1 || pp_Offset == 1 float4 offs : COLOR1; #endif @@ -137,12 +137,12 @@ PSO main(in pixel inpix) #if pp_Gouraud == 1 float4 color = inpix.col / inpix.uv.w; - #if pp_Texture == 1 && (pp_BumpMap == 1 || pp_Offset == 1) + #if pp_BumpMap == 1 || pp_Offset == 1 float4 offset = inpix.offs / inpix.uv.w; #endif #else float4 color = inpix.col; - #if pp_Texture == 1 && (pp_BumpMap == 1 || pp_Offset == 1) + #if pp_BumpMap == 1 || pp_Offset == 1 float4 offset = inpix.offs; #endif #endif @@ -223,7 +223,7 @@ PSO modifierVolume(float4 uv : TEXCOORD0) } )"; -const char *MacroValues[] { "0", "1", "2", "3" }; +const char * const MacroValues[] { "0", "1", "2", "3" }; static D3DXMACRO VertexMacros[] { diff --git a/core/rend/dx9/d3d_texture.h b/core/rend/dx9/d3d_texture.h index 38d36bc8a..ca72d773b 100644 --- a/core/rend/dx9/d3d_texture.h +++ b/core/rend/dx9/d3d_texture.h @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ +#pragma once #include "rend/TexCache.h" #include #include "dxcontext.h" diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 32ed9beff..9da3aeb78 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -736,10 +736,7 @@ static bool RenderFrame(int width, int height) gl4ShaderUniforms.ps_FOG_COL_RAM[2]=fog_colram_bgra [0]/255.0f; //Fog density constant - u8* fog_density=(u8*)&FOG_DENSITY; - float fog_den_mant=fog_density[1]/128.0f; //bit 7 -> x. bit, so [6:0] -> fraction -> /128 - s32 fog_den_exp=(s8)fog_density[0]; - gl4ShaderUniforms.fog_den_float = fog_den_mant * powf(2.0f,fog_den_exp) * config::ExtraDepthScale; + gl4ShaderUniforms.fog_den_float = FOG_DENSITY.get() * config::ExtraDepthScale; gl4ShaderUniforms.fog_clamp_min[0] = ((pvrrc.fog_clamp_min >> 16) & 0xFF) / 255.0f; gl4ShaderUniforms.fog_clamp_min[1] = ((pvrrc.fog_clamp_min >> 8) & 0xFF) / 255.0f; diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5cad3ccdf..5818b5a11 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -1191,10 +1191,7 @@ bool RenderFrame(int width, int height) ShaderUniforms.ps_FOG_COL_RAM[2] = fog_colram_bgra[0] / 255.0f; //Fog density constant - u8* fog_density = (u8*)&FOG_DENSITY; - float fog_den_mant = fog_density[1] / 128.0f; //bit 7 -> x. bit, so [6:0] -> fraction -> /128 - s32 fog_den_exp = (s8)fog_density[0]; - ShaderUniforms.fog_den_float = fog_den_mant * powf(2.0f, fog_den_exp) * config::ExtraDepthScale; + ShaderUniforms.fog_den_float = FOG_DENSITY.get() * config::ExtraDepthScale; ShaderUniforms.fog_clamp_min[0] = ((pvrrc.fog_clamp_min >> 16) & 0xFF) / 255.0f; ShaderUniforms.fog_clamp_min[1] = ((pvrrc.fog_clamp_min >> 8) & 0xFF) / 255.0f; diff --git a/core/rend/gui.cpp b/core/rend/gui.cpp index bc0160ef7..565929e6e 100644 --- a/core/rend/gui.cpp +++ b/core/rend/gui.cpp @@ -1551,6 +1551,10 @@ static void gui_display_settings() renderApi = 2; perPixel = false; break; + case RenderType::DirectX11: + renderApi = 3; + perPixel = false; + break; } ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, normal_padding); @@ -1643,12 +1647,17 @@ static void gui_display_settings() OptionCheckbox("Rotate Screen 90°", config::Rotate90, "Rotate the screen 90° counterclockwise"); OptionCheckbox("Delay Frame Swapping", config::DelayFrameSwapping, "Useful to avoid flashing screen or glitchy videos. Not recommended on slow platforms"); -#if defined(USE_VULKAN) || defined(USE_DX9) +#if defined(USE_VULKAN) || defined(USE_DX9) || defined(_WIN32) ImGui::Text("Graphics API:"); -#if defined(USE_VULKAN) && defined(USE_DX9) - constexpr u32 columns = 3; -#else - constexpr u32 columns = 2; + u32 columns = 1; +#ifdef USE_VULKAN + columns++; +#endif +#ifdef _WIN32 + columns++; +#ifdef USE_DX9 + columns++; +#endif #endif ImGui::Columns(columns, "renderApi", false); ImGui::RadioButton("Open GL", &renderApi, 0); @@ -1657,8 +1666,12 @@ static void gui_display_settings() ImGui::RadioButton("Vulkan", &renderApi, 1); ImGui::NextColumn(); #endif +#ifdef _WIN32 #ifdef USE_DX9 - ImGui::RadioButton("DirectX", &renderApi, 2); + ImGui::RadioButton("DirectX 9", &renderApi, 2); + ImGui::NextColumn(); +#endif + ImGui::RadioButton("DirectX 11", &renderApi, 3); ImGui::NextColumn(); #endif ImGui::Columns(1, NULL, false); @@ -1755,6 +1768,9 @@ static void gui_display_settings() case 2: config::RendererType = RenderType::DirectX9; break; + case 3: + config::RendererType = RenderType::DirectX11; + break; } } if (ImGui::BeginTabItem("Audio")) @@ -2020,6 +2036,8 @@ static void gui_display_settings() #else "macOS" #endif +#elif defined(TARGET_UWP) + "Windows Universal Platform" #elif defined(_WIN32) "Windows" #elif defined(__SWITCH__) @@ -2038,7 +2056,7 @@ static void gui_display_settings() header("Open GL"); else if (isVulkan(config::RendererType)) header("Vulkan"); - else if (config::RendererType == RenderType::DirectX9) + else if (isDirectX(config::RendererType)) header("DirectX"); ImGui::Text("Driver Name: %s", GraphicsContext::Instance()->getDriverName().c_str()); ImGui::Text("Version: %s", GraphicsContext::Instance()->getDriverVersion().c_str()); @@ -2098,12 +2116,17 @@ static void gui_display_content() ImGui::Unindent(10 * scaling); static ImGuiTextFilter filter; -#if !defined(__ANDROID__) && !defined(TARGET_IPHONE) +#if !defined(__ANDROID__) && !defined(TARGET_IPHONE) && !defined(TARGET_UWP) ImGui::SameLine(0, 32 * scaling); filter.Draw("Filter"); #endif if (gui_state != GuiState::SelectDisk) { +#if 0 //defined(TARGET_UWP) + void gui_load_game(); + if (ImGui::Button("Load...")) + gui_load_game(); +#endif ImGui::SameLine(ImGui::GetContentRegionMax().x - ImGui::CalcTextSize("Settings").x - ImGui::GetStyle().FramePadding.x * 2.0f); if (ImGui::Button("Settings")) gui_state = GuiState::Settings; diff --git a/core/rend/mainui.cpp b/core/rend/mainui.cpp index ce9d3a88f..a450b5087 100644 --- a/core/rend/mainui.cpp +++ b/core/rend/mainui.cpp @@ -89,8 +89,8 @@ void mainui_loop() if (config::RendererType != currentRenderer || forceReinit) { mainui_term(); - int prevApi = isOpenGL(currentRenderer) ? 0 : isVulkan(currentRenderer) ? 1 : 2; - int newApi = isOpenGL(config::RendererType) ? 0 : isVulkan(config::RendererType) ? 1 : 2; + int prevApi = isOpenGL(currentRenderer) ? 0 : isVulkan(currentRenderer) ? 1 : currentRenderer == RenderType::DirectX9 ? 2 : 3; + int newApi = isOpenGL(config::RendererType) ? 0 : isVulkan(config::RendererType) ? 1 : config::RendererType == RenderType::DirectX9 ? 2 : 3; if (newApi != prevApi || forceReinit) switchRenderApi(); mainui_init(); diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index 99f8309c9..afb3639f4 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -74,10 +74,7 @@ protected: fragUniforms.sp_FOG_COL_RAM[2] = fog_colram_bgra[0] / 255.0f; //Fog density constant - u8* fog_density = (u8*)&FOG_DENSITY; - float fog_den_mant = fog_density[1] / 128.0f; //bit 7 -> x. bit, so [6:0] -> fraction -> /128 - s32 fog_den_exp = (s8)fog_density[0]; - fragUniforms.sp_FOG_DENSITY = fog_den_mant * powf(2.0f, fog_den_exp) * config::ExtraDepthScale; + fragUniforms.sp_FOG_DENSITY = FOG_DENSITY.get() * config::ExtraDepthScale; fragUniforms.colorClampMin[0] = ((pvrrc.fog_clamp_min >> 16) & 0xFF) / 255.0f; fragUniforms.colorClampMin[1] = ((pvrrc.fog_clamp_min >> 8) & 0xFF) / 255.0f; diff --git a/core/sdl/sdl.cpp b/core/sdl/sdl.cpp index efa41a717..edc61473c 100644 --- a/core/sdl/sdl.cpp +++ b/core/sdl/sdl.cpp @@ -524,12 +524,21 @@ bool sdl_recreate_window(u32 flags) } #endif -#ifdef USE_DX9 - if (config::RendererType == RenderType::DirectX9) - GraphicsContext::Instance()->setWindow(getNativeHwnd()); - else + void *windowCtx = window; +#ifdef _WIN32 + if (config::RendererType == RenderType::DirectX11 || config::RendererType == RenderType::DirectX9) +#ifdef TARGET_UWP + { + SDL_SysWMinfo wmInfo; + SDL_VERSION(&wmInfo.version); + SDL_GetWindowWMInfo(window, &wmInfo); + windowCtx = wmInfo.info.winrt.window; + } +#else + windowCtx = getNativeHwnd(); #endif - GraphicsContext::Instance()->setWindow(window); +#endif + GraphicsContext::Instance()->setWindow(windowCtx); int displayIndex = SDL_GetWindowDisplayIndex(window); if (displayIndex < 0) @@ -538,8 +547,13 @@ bool sdl_recreate_window(u32 flags) { SDL_DisplayMode mode{}; if (SDL_GetDesktopDisplayMode(displayIndex, &mode) == 0) { - INFO_LOG(RENDERER, "Monitor refresh rate: %d Hz", mode.refresh_rate); + NOTICE_LOG(RENDERER, "Monitor refresh rate: %d Hz (%d x %d)", mode.refresh_rate, mode.w, mode.h); settings.display.refreshRate = mode.refresh_rate; + if (flags & SDL_WINDOW_FULLSCREEN) + { + settings.display.width = mode.w; + settings.display.height = mode.h; + } } } diff --git a/core/types.h b/core/types.h index 07b44104c..a55da5478 100644 --- a/core/types.h +++ b/core/types.h @@ -283,6 +283,7 @@ enum class RenderType { Vulkan = 4, Vulkan_OIT = 5, DirectX9 = 1, + DirectX11 = 2, }; static inline bool isOpenGL(RenderType renderType) { @@ -291,6 +292,9 @@ static inline bool isOpenGL(RenderType renderType) { static inline bool isVulkan(RenderType renderType) { return renderType == RenderType::Vulkan || renderType == RenderType::Vulkan_OIT; } +static inline bool isDirectX(RenderType renderType) { + return renderType == RenderType::DirectX9 || renderType == RenderType::DirectX11; +} enum class KeyboardLayout { JP = 1, diff --git a/core/wsi/switcher.cpp b/core/wsi/switcher.cpp index 9d6299f76..b7ca1dae1 100644 --- a/core/wsi/switcher.cpp +++ b/core/wsi/switcher.cpp @@ -25,6 +25,7 @@ #include "gl_context.h" #include "rend/dx9/dxcontext.h" +#include "rend/dx11/dx11context.h" #ifdef USE_VULKAN #include "rend/vulkan/vulkan_context.h" @@ -46,6 +47,16 @@ void initRenderApi(void *window, void *display) config::RendererType = RenderType::OpenGL; } #endif +#ifdef _WIN32 + if (config::RendererType == RenderType::DirectX11) + { + theDX11Context.setWindow(window, display); + if (theDX11Context.init()) + return; + WARN_LOG(RENDERER, "DirectX 11 init failed. Falling back to DirectX 9."); + config::RendererType = RenderType::DirectX9; + } +#endif #ifdef USE_DX9 if (config::RendererType == RenderType::DirectX9) { @@ -53,7 +64,7 @@ void initRenderApi(void *window, void *display) if (theDXContext.init()) return; // Fall back to Open GL - WARN_LOG(RENDERER, "DirectX init failed. Falling back to Open GL."); + WARN_LOG(RENDERER, "DirectX 9 init failed. Falling back to Open GL."); config::RendererType = RenderType::OpenGL; } #endif diff --git a/shell/uwp/package.appxManifest b/shell/uwp/package.appxManifest index 12f185754..dd95cbc62 100644 --- a/shell/uwp/package.appxManifest +++ b/shell/uwp/package.appxManifest @@ -4,7 +4,7 @@ xmlns:uap="http://schemas.microsoft.com/appx/manifest/uap/windows10" IgnorableNamespaces="uap mp"> - + Flycast