// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include <algorithm>

#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"

#include "Core/ConfigManager.h"

#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DUtil.h"

#include "VideoBackends/D3D12/NativeVertexFormat.h"
#include "VideoBackends/D3D12/ShaderCache.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"

#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoConfig.h"

namespace DX12
{
static bool s_cache_is_corrupted = false;
static LinearDiskCache<SmallPsoDiskDesc, u8> s_pso_disk_cache;

class PipelineStateCacheInserter : public LinearDiskCacheReader<SmallPsoDiskDesc, u8>
{
public:
  void Read(const SmallPsoDiskDesc& key, const u8* value, u32 value_size)
  {
    if (s_cache_is_corrupted)
      return;

    D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {};
    desc.pRootSignature = D3D::default_root_signature;
    desc.RTVFormats[0] =
        DXGI_FORMAT_R8G8B8A8_UNORM;          // This state changes in PSTextureEncoder::Encode.
    desc.DSVFormat = DXGI_FORMAT_D32_FLOAT;  // This state changes in PSTextureEncoder::Encode.
    desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
    desc.NumRenderTargets = 1;
    desc.SampleMask = UINT_MAX;
    desc.SampleDesc.Count = 1;
    desc.SampleDesc.Quality = 0;

    desc.GS = ShaderCache::GetGeometryShaderFromUid(&key.gs_uid);
    desc.PS = ShaderCache::GetPixelShaderFromUid(&key.ps_uid);
    desc.VS = ShaderCache::GetVertexShaderFromUid(&key.vs_uid);

    if (!desc.PS.pShaderBytecode || !desc.VS.pShaderBytecode)
    {
      s_cache_is_corrupted = true;
      return;
    }

    BlendState blend_state = {};
    blend_state.hex = key.blend_state_hex;
    desc.BlendState = StateCache::GetDesc12(blend_state);

    ZMode depth_stencil_state = {};
    depth_stencil_state.hex = key.depth_stencil_state_hex;
    desc.DepthStencilState = StateCache::GetDesc12(depth_stencil_state);

    RasterizerState rasterizer_state = {};
    rasterizer_state.hex = key.rasterizer_state_hex;
    desc.RasterizerState = StateCache::GetDesc12(rasterizer_state);

    desc.PrimitiveTopologyType = key.topology;

    // search for a cached native vertex format
    const PortableVertexDeclaration& native_vtx_decl = key.vertex_declaration;
    std::unique_ptr<NativeVertexFormat>& native =
        (*VertexLoaderManager::GetNativeVertexFormatMap())[native_vtx_decl];

    if (!native)
    {
      native.reset(g_vertex_manager->CreateNativeVertexFormat(native_vtx_decl));
    }

    desc.InputLayout = reinterpret_cast<D3DVertexFormat*>(native.get())->GetActiveInputLayout12();

    desc.CachedPSO.CachedBlobSizeInBytes = value_size;
    desc.CachedPSO.pCachedBlob = value;

    ID3D12PipelineState* pso = nullptr;
    HRESULT hr = D3D::device12->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso));

    if (FAILED(hr))
    {
      // Failure can occur if disk cache is corrupted, or a driver upgrade invalidates the existing
      // blobs.
      // In this case, we need to clear the disk cache.

      s_cache_is_corrupted = true;
      return;
    }

    SmallPsoDesc small_desc = {};
    small_desc.blend_state.hex = key.blend_state_hex;
    small_desc.depth_stencil_state.hex = key.depth_stencil_state_hex;
    small_desc.rasterizer_state.hex = key.rasterizer_state_hex;
    small_desc.gs_bytecode = desc.GS;
    small_desc.ps_bytecode = desc.PS;
    small_desc.vs_bytecode = desc.VS;
    small_desc.input_layout = reinterpret_cast<D3DVertexFormat*>(native.get());

    gx_state_cache.m_small_pso_map[small_desc] = pso;
  }
};

StateCache::StateCache()
{
  m_current_pso_desc = {};

  m_current_pso_desc.RTVFormats[0] =
      DXGI_FORMAT_R8G8B8A8_UNORM;  // This state changes in PSTextureEncoder::Encode.
  m_current_pso_desc.DSVFormat =
      DXGI_FORMAT_D32_FLOAT;  // This state changes in PSTextureEncoder::Encode.
  m_current_pso_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
  m_current_pso_desc.NumRenderTargets = 1;
  m_current_pso_desc.SampleMask = UINT_MAX;
}

void StateCache::Init()
{
  // Root signature isn't available at time of StateCache construction, so fill it in now.
  gx_state_cache.m_current_pso_desc.pRootSignature = D3D::default_root_signature;

  // Multi-sample configuration isn't available at time of StateCache construction, so fill it in
  // now.
  gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
  gx_state_cache.m_current_pso_desc.SampleDesc.Quality = 0;

  if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
    File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

  std::string cache_filename =
      StringFromFormat("%sdx12-%s-pso.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
                       SConfig::GetInstance().m_strGameID.c_str());

  PipelineStateCacheInserter inserter;
  s_pso_disk_cache.OpenAndRead(cache_filename, inserter);

  if (s_cache_is_corrupted)
  {
    // If a PSO fails to create, that means either:
    // - The file itself is corrupt.
    // - A driver/HW change has occurred, causing the existing cache blobs to be invalid.
    //
    // In either case, we want to re-create the disk cache. This should not be a frequent
    // occurrence.

    s_pso_disk_cache.Close();

    for (auto it : gx_state_cache.m_small_pso_map)
    {
      SAFE_RELEASE(it.second);
    }
    gx_state_cache.m_small_pso_map.clear();

    File::Delete(cache_filename);

    s_pso_disk_cache.OpenAndRead(cache_filename, inserter);

    s_cache_is_corrupted = false;
  }
}

D3D12_SAMPLER_DESC StateCache::GetDesc12(SamplerState state)
{
  const unsigned int d3d_mip_filters[4] = {
      TexMode0::TEXF_NONE, TexMode0::TEXF_POINT, TexMode0::TEXF_LINEAR,
      TexMode0::TEXF_NONE,  // reserved
  };
  const D3D12_TEXTURE_ADDRESS_MODE d3d_clamps[4] = {
      D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_WRAP,
      D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
      D3D12_TEXTURE_ADDRESS_MODE_WRAP  // reserved
  };

  D3D12_SAMPLER_DESC sampdc;

  unsigned int mip = d3d_mip_filters[state.min_filter & 3];

  sampdc.MaxAnisotropy = 1;
  if (g_ActiveConfig.iMaxAnisotropy > 0 && !SamplerCommon::IsBpTexMode0PointFiltering(state))
  {
    sampdc.Filter = D3D12_FILTER_ANISOTROPIC;
    sampdc.MaxAnisotropy = 1 << g_ActiveConfig.iMaxAnisotropy;
  }
  else if (state.min_filter & 4)  // linear min filter
  {
    if (state.mag_filter)  // linear mag filter
    {
      if (mip == TexMode0::TEXF_NONE)
        sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
      else if (mip == TexMode0::TEXF_POINT)
        sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
      else if (mip == TexMode0::TEXF_LINEAR)
        sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
    }
    else  // point mag filter
    {
      if (mip == TexMode0::TEXF_NONE)
        sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
      else if (mip == TexMode0::TEXF_POINT)
        sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
      else if (mip == TexMode0::TEXF_LINEAR)
        sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
    }
  }
  else  // point min filter
  {
    if (state.mag_filter)  // linear mag filter
    {
      if (mip == TexMode0::TEXF_NONE)
        sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
      else if (mip == TexMode0::TEXF_POINT)
        sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
      else if (mip == TexMode0::TEXF_LINEAR)
        sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR;
    }
    else  // point mag filter
    {
      if (mip == TexMode0::TEXF_NONE)
        sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
      else if (mip == TexMode0::TEXF_POINT)
        sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
      else if (mip == TexMode0::TEXF_LINEAR)
        sampdc.Filter = D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
    }
  }

  sampdc.AddressU = d3d_clamps[state.wrap_s];
  sampdc.AddressV = d3d_clamps[state.wrap_t];
  sampdc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;

  sampdc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;

  sampdc.BorderColor[0] = sampdc.BorderColor[1] = sampdc.BorderColor[2] = sampdc.BorderColor[3] =
      1.0f;

  sampdc.MaxLOD = SamplerCommon::AreBpTexMode0MipmapsEnabled(state) ? state.max_lod / 16.f : 0.f;
  sampdc.MinLOD = std::min(state.min_lod / 16.f, sampdc.MaxLOD);
  sampdc.MipLODBias = static_cast<s32>(state.lod_bias) / 32.0f;

  return sampdc;
}

D3D12_BLEND GetBlendingAlpha(D3D12_BLEND blend)
{
  switch (blend)
  {
  case D3D12_BLEND_SRC_COLOR:
    return D3D12_BLEND_SRC_ALPHA;
  case D3D12_BLEND_INV_SRC_COLOR:
    return D3D12_BLEND_INV_SRC_ALPHA;
  case D3D12_BLEND_DEST_COLOR:
    return D3D12_BLEND_DEST_ALPHA;
  case D3D12_BLEND_INV_DEST_COLOR:
    return D3D12_BLEND_INV_DEST_ALPHA;

  default:
    return blend;
  }
}

D3D12_BLEND_DESC StateCache::GetDesc12(BlendState state)
{
  if (!state.blend_enable)
  {
    state.src_blend = D3D12_BLEND_ONE;
    state.dst_blend = D3D12_BLEND_ZERO;
    state.blend_op = D3D12_BLEND_OP_ADD;
    state.use_dst_alpha = false;
  }

  D3D12_BLEND_DESC blenddc = {FALSE,  // BOOL AlphaToCoverageEnable;
                              FALSE,  // BOOL IndependentBlendEnable;
                              {
                                  state.blend_enable,   // BOOL BlendEnable;
                                  FALSE,                // BOOL LogicOpEnable;
                                  state.src_blend,      // D3D12_BLEND SrcBlend;
                                  state.dst_blend,      // D3D12_BLEND DestBlend;
                                  state.blend_op,       // D3D12_BLEND_OP BlendOp;
                                  state.src_blend,      // D3D12_BLEND SrcBlendAlpha;
                                  state.dst_blend,      // D3D12_BLEND DestBlendAlpha;
                                  state.blend_op,       // D3D12_BLEND_OP BlendOpAlpha;
                                  D3D12_LOGIC_OP_NOOP,  // D3D12_LOGIC_OP LogicOp
                                  state.write_mask      // UINT8 RenderTargetWriteMask;
                              }};

  blenddc.RenderTarget[0].SrcBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].SrcBlend);
  blenddc.RenderTarget[0].DestBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].DestBlend);

  if (state.use_dst_alpha)
  {
    blenddc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
    blenddc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO;
    blenddc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
  }

  return blenddc;
}

D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state)
{
  return {D3D12_FILL_MODE_SOLID,
          state.cull_mode,
          false,
          0,
          0.f,
          0,
          false,
          true,
          false,
          0,
          D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF};
}

inline D3D12_DEPTH_STENCIL_DESC StateCache::GetDesc12(ZMode state)
{
  D3D12_DEPTH_STENCIL_DESC depthdc;

  depthdc.StencilEnable = FALSE;
  depthdc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
  depthdc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;

  D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = {D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP,
                                                 D3D12_STENCIL_OP_KEEP,
                                                 D3D12_COMPARISON_FUNC_ALWAYS};
  depthdc.FrontFace = defaultStencilOp;
  depthdc.BackFace = defaultStencilOp;

  const D3D12_COMPARISON_FUNC d3dCmpFuncs[8] = {
      D3D12_COMPARISON_FUNC_NEVER,      D3D12_COMPARISON_FUNC_GREATER,
      D3D12_COMPARISON_FUNC_EQUAL,      D3D12_COMPARISON_FUNC_GREATER_EQUAL,
      D3D12_COMPARISON_FUNC_LESS,       D3D12_COMPARISON_FUNC_NOT_EQUAL,
      D3D12_COMPARISON_FUNC_LESS_EQUAL, D3D12_COMPARISON_FUNC_ALWAYS};

  if (state.testenable)
  {
    depthdc.DepthEnable = TRUE;
    depthdc.DepthWriteMask =
        state.updateenable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
    depthdc.DepthFunc = d3dCmpFuncs[state.func];
  }
  else
  {
    // if the test is disabled write is disabled too
    depthdc.DepthEnable = FALSE;
    depthdc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
  }

  return depthdc;
}

HRESULT StateCache::GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc,
                                                    ID3D12PipelineState** pso)
{
  auto it = m_pso_map.find(*pso_desc);

  if (it == m_pso_map.end())
  {
    // Not found, create new PSO.

    ID3D12PipelineState* new_pso = nullptr;
    HRESULT hr = D3D::device12->CreateGraphicsPipelineState(pso_desc, IID_PPV_ARGS(&new_pso));

    if (FAILED(hr))
    {
      return hr;
    }

    m_pso_map[*pso_desc] = new_pso;
    *pso = new_pso;
  }
  else
  {
    *pso = it->second;
  }

  return S_OK;
}

HRESULT StateCache::GetPipelineStateObjectFromCache(
    SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology,
    const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid)
{
  auto it = m_small_pso_map.find(*pso_desc);

  if (it == m_small_pso_map.end())
  {
    // Not found, create new PSO.

    // RootSignature, SampleMask, SampleDesc, NumRenderTargets, RTVFormats, DSVFormat
    // never change so they are set in constructor and forgotten.
    m_current_pso_desc.GS = pso_desc->gs_bytecode;
    m_current_pso_desc.PS = pso_desc->ps_bytecode;
    m_current_pso_desc.VS = pso_desc->vs_bytecode;
    m_current_pso_desc.BlendState = GetDesc12(pso_desc->blend_state);
    m_current_pso_desc.DepthStencilState = GetDesc12(pso_desc->depth_stencil_state);
    m_current_pso_desc.RasterizerState = GetDesc12(pso_desc->rasterizer_state);
    m_current_pso_desc.PrimitiveTopologyType = topology;
    m_current_pso_desc.InputLayout = pso_desc->input_layout->GetActiveInputLayout12();

    ID3D12PipelineState* new_pso = nullptr;
    HRESULT hr =
        D3D::device12->CreateGraphicsPipelineState(&m_current_pso_desc, IID_PPV_ARGS(&new_pso));

    if (FAILED(hr))
    {
      return hr;
    }

    m_small_pso_map[*pso_desc] = new_pso;
    *pso = new_pso;

    // This contains all of the information needed to reconstruct a PSO at startup.
    SmallPsoDiskDesc disk_desc = {};
    disk_desc.blend_state_hex = pso_desc->blend_state.hex;
    disk_desc.depth_stencil_state_hex = pso_desc->depth_stencil_state.hex;
    disk_desc.rasterizer_state_hex = pso_desc->rasterizer_state.hex;
    disk_desc.ps_uid = *ps_uid;
    disk_desc.vs_uid = *vs_uid;
    disk_desc.gs_uid = *gs_uid;
    disk_desc.vertex_declaration = pso_desc->input_layout->GetVertexDeclaration();
    disk_desc.topology = topology;

    // This shouldn't fail.. but if it does, don't cache to disk.
    ID3DBlob* psoBlob = nullptr;
    hr = new_pso->GetCachedBlob(&psoBlob);

    if (SUCCEEDED(hr))
    {
      s_pso_disk_cache.Append(disk_desc, reinterpret_cast<const u8*>(psoBlob->GetBufferPointer()),
                              static_cast<u32>(psoBlob->GetBufferSize()));
      psoBlob->Release();
    }
  }
  else
  {
    *pso = it->second;
  }

  return S_OK;
}

void StateCache::OnMSAASettingsChanged()
{
  for (auto& it : m_small_pso_map)
  {
    SAFE_RELEASE(it.second);
  }
  m_small_pso_map.clear();

  // Update sample count for new PSOs being created
  gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}

void StateCache::Clear()
{
  for (auto& it : m_pso_map)
  {
    SAFE_RELEASE(it.second);
  }
  m_pso_map.clear();

  for (auto& it : m_small_pso_map)
  {
    SAFE_RELEASE(it.second);
  }
  m_small_pso_map.clear();

  s_pso_disk_cache.Sync();
  s_pso_disk_cache.Close();
}

}  // namespace DX12