diff --git a/data/resources/shaders/reshade/Shaders/DisplayDepth.fx b/data/resources/shaders/reshade/Shaders/DisplayDepth.fx new file mode 100644 index 000000000..98e11d96e --- /dev/null +++ b/data/resources/shaders/reshade/Shaders/DisplayDepth.fx @@ -0,0 +1,427 @@ +/* + DisplayDepth by CeeJay.dk (with many updates and additions by the Reshade community) + + Visualizes the depth buffer. The distance of pixels determine their brightness. + Close objects are dark. Far away objects are bright. + Use this to configure the depth input preprocessor definitions (RESHADE_DEPTH_INPUT_*). +*/ + +#include "ReShade.fxh" + +// -- Basic options -- +#if RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN +#define TEXT_UPSIDE_DOWN "1" +#define TEXT_UPSIDE_DOWN_ALTER "0" +#else +#define TEXT_UPSIDE_DOWN "0" +#define TEXT_UPSIDE_DOWN_ALTER "1" +#endif +#if RESHADE_DEPTH_INPUT_IS_REVERSED +#define TEXT_REVERSED "1" +#define TEXT_REVERSED_ALTER "0" +#else +#define TEXT_REVERSED "0" +#define TEXT_REVERSED_ALTER "1" +#endif +#if RESHADE_DEPTH_INPUT_IS_LOGARITHMIC +#define TEXT_LOGARITHMIC "1" +#define TEXT_LOGARITHMIC_ALTER "0" +#else +#define TEXT_LOGARITHMIC "0" +#define TEXT_LOGARITHMIC_ALTER "1" +#endif + +// "ui_text" was introduced in ReShade 4.5, so cannot show instructions in older versions + +uniform int iUIPresentType < + ui_label = "Present type"; + ui_label_ja_jp = "画面効果"; + ui_type = "combo"; + ui_items = "Depth map\0Normal map\0Show both (Vertical 50/50)\0"; + ui_items_ja_jp = "深度マップ\0法線マップ\0両方を表示 (左右分割)\0"; +#if __RESHADE__ < 40500 + ui_tooltip = +#else + ui_text = +#endif + "The right settings need to be set in the dialog that opens after clicking the \"Edit global preprocessor definitions\" button above.\n" + "\n" + "RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN is currently set to " TEXT_UPSIDE_DOWN ".\n" + "If the Depth map is shown upside down set it to " TEXT_UPSIDE_DOWN_ALTER ".\n" + "\n" + "RESHADE_DEPTH_INPUT_IS_REVERSED is currently set to " TEXT_REVERSED ".\n" + "If close objects in the Depth map are bright and far ones are dark set it to " TEXT_REVERSED_ALTER ".\n" + "Also try this if you can see the normals, but the depth view is all black.\n" + "\n" + "RESHADE_DEPTH_INPUT_IS_LOGARITHMIC is currently set to " TEXT_LOGARITHMIC ".\n" + "If the Normal map has banding artifacts (extra stripes) set it to " TEXT_LOGARITHMIC_ALTER "."; + ui_text_ja_jp = +#if ADDON_ADJUST_DEPTH + "Adjust Depthアドオンのインストールを検出しました。\n" + "'設定に保存して反映する'ボタンをクリックすると、このエフェクトで調節した全ての変数が共通設定に反映されます。\n" + "または、上の'プリプロセッサの定義を編集'ボタンをクリックした後に開くダイアログで直接編集する事もできます。"; +#else + "調節が終わったら、上の'プリプロセッサの定義を編集'ボタンをクリックした後に開くダイアログに入力する必要があります。\n" + "\n" + "RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWNは現在" TEXT_UPSIDE_DOWN "に設定されています。\n" + "深度マップが上下逆さまに表示されている場合は" TEXT_UPSIDE_DOWN_ALTER "に変更して下さい。\n" + "\n" + "RESHADE_DEPTH_INPUT_IS_REVERSEDは現在" TEXT_REVERSED "に設定されています。\n" + "画面効果が深度マップのとき、近くの形状がより白く、遠くの形状がより黒い場合は" TEXT_REVERSED_ALTER "に変更して下さい。\n" + "また、法線マップで形が判別出来るが、深度マップが真っ暗に見えるという場合も、この設定の変更を試して下さい。\n" + "\n" + "RESHADE_DEPTH_INPUT_IS_LOGARITHMICは現在" TEXT_LOGARITHMIC "に設定されています。\n" + "画面効果に実際のレンダリングと合致しない縞模様がある場合は" TEXT_LOGARITHMIC_ALTER "に変更して下さい。"; +#endif + ui_tooltip_ja_jp = + "'深度マップ'は、形状の遠近を白黒で表現します。正しい見え方では、近くの形状ほど黒く、遠くの形状ほど白くなります。\n" + "'法線マップ'は、形状を滑らかに表現します。正しい見え方では、全体的に青緑風で、地平線を見たときに地面が緑掛かった色合いになります。\n" + "'両方を表示 (左右分割)'が選択された場合は、左に法線マップ、右に深度マップを表示します。"; +> = 2; + +uniform bool bUIShowOffset < + ui_label = "Blend Depth map into the image (to help with finding the right offset)"; + ui_label_ja_jp = "透かし比較"; + ui_tooltip_ja_jp = "補正作業を支援するために、画面効果を半透過で適用します。"; +> = false; + +uniform bool bUIUseLivePreview < + ui_category = "Preview settings"; + ui_category_ja_jp = "基本的な補正"; +#if __RESHADE__ <= 50902 + ui_category_closed = true; +#elif !ADDON_ADJUST_DEPTH + ui_category_toggle = true; +#endif + ui_label = "Show live preview and ignore preprocessor definitions"; + ui_label_ja_jp = "プリプロセッサの定義を無視 (補正プレビューをオン)"; + ui_tooltip = "Enable this to preview with the current preset settings instead of the global preprocessor settings."; + ui_tooltip_ja_jp = + "共通設定に保存されたプリプロセッサの定義ではなく、これより下のプレビュー設定を使用するには、これを有効にします。\n" +#if ADDON_ADJUST_DEPTH + "設定の準備が出来たら、'設定に保存して反映する'ボタンをクリックしてから、このチェックボックスをオフにして下さい。" +#else + "設定の準備が出来たら、上の'プリプロセッサの定義を編集'ボタンをクリックした後に開くダイアログに入力して下さい。" +#endif + "\n\n" + "プレビューをオンにした場合と比較して画面効果がまったく同じになれば、正しく設定が反映されています。"; +> = false; + +#if __RESHADE__ <= 50902 +uniform int iUIUpsideDown < +#else +uniform bool iUIUpsideDown < +#endif + ui_category = "Preview settings"; + ui_label = "Upside Down"; + ui_label_ja_jp = "深度バッファの上下反転を修正"; +#if __RESHADE__ <= 50902 + ui_type = "combo"; + ui_items = "Off\0On\0"; +#endif + ui_text_ja_jp = + "\n" +#if ADDON_ADJUST_DEPTH + "項目にカーソルを合わせると、設定が必要な状況の説明が表示されます。" +#else + "項目にカーソルを合わせると、設定が必要な状況の説明と、プリプロセッサの定義が表示されます。" +#endif + ; + ui_tooltip_ja_jp = + "深度マップが上下逆さまに表示されている場合は変更して下さい。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN=値\n" + "定義値は次の通りです。オンの場合は1、オフの場合は0を指定して下さい。\n" + "RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN=1\n" + "RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN=0" +#endif + ; +> = RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN; + +#if __RESHADE__ <= 50902 +uniform int iUIReversed < +#else +uniform bool iUIReversed < +#endif + ui_category = "Preview settings"; + ui_label = "Reversed"; + ui_label_ja_jp = "深度バッファの奥行反転を修正"; +#if __RESHADE__ <= 50902 + ui_type = "combo"; + ui_items = "Off\0On\0"; +#endif + ui_tooltip_ja_jp = + "画面効果が深度マップのとき、近くの形状が明るく、遠くの形状が暗い場合は変更して下さい。\n" + "また、法線マップで形が判別出来るが、深度マップが真っ暗に見えるという場合も、この設定の変更を試して下さい。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_INPUT_IS_REVERSED=値\n" + "定義値は次の通りです。オンの場合は1、オフの場合は0を指定して下さい。\n" + "RESHADE_DEPTH_INPUT_IS_REVERSED=1\n" + "RESHADE_DEPTH_INPUT_IS_REVERSED=0" +#endif + ; +> = RESHADE_DEPTH_INPUT_IS_REVERSED; + +#if __RESHADE__ <= 50902 +uniform int iUILogarithmic < +#else +uniform bool iUILogarithmic < +#endif + ui_category = "Preview settings"; + ui_label = "Logarithmic"; + ui_label_ja_jp = "深度バッファを対数分布として扱うように修正"; +#if __RESHADE__ <= 50902 + ui_type = "combo"; + ui_items = "Off\0On\0"; +#endif + ui_tooltip = "Change this setting if the displayed surface normals have stripes in them."; + ui_tooltip_ja_jp = + "画面効果に実際のゲーム画面と合致しない縞模様がある場合は変更して下さい。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_INPUT_IS_LOGARITHMIC=値\n" + "定義値は次の通りです。オンの場合は1、オフの場合は0を指定して下さい。\n" + "RESHADE_DEPTH_INPUT_IS_LOGARITHMIC=1\n" + "RESHADE_DEPTH_INPUT_IS_LOGARITHMIC=0" +#endif + ; +> = RESHADE_DEPTH_INPUT_IS_LOGARITHMIC; + +// -- Advanced options -- + +uniform float2 fUIScale < + ui_category = "Preview settings"; + ui_label = "Scale"; + ui_label_ja_jp = "拡大率"; + ui_type = "drag"; + ui_text = + "\n" + " * Advanced options\n" + "\n" + "The following settings also need to be set using \"Edit global preprocessor definitions\" above in order to take effect.\n" + "You can preview how they will affect the Depth map using the controls below.\n" + "\n" + "It is rarely necessary to change these though, as their defaults fit almost all games.\n\n"; + ui_text_ja_jp = + "\n" + " * その他の補正 (不定形またはその他)\n" + "\n" + "これより下は、深度バッファが不定形など、特別なケース向けの設定です。\n" + "通常はこれより上の'基本的な補正'のみでほとんどのゲームに適合します。\n" + "また、これらの設定は画質の向上にはまったく役に立ちません。\n\n"; + ui_tooltip = + "Best use 'Present type'->'Depth map' and enable 'Offset' in the options below to set the scale.\n" + "Use these values for:\nRESHADE_DEPTH_INPUT_X_SCALE=\nRESHADE_DEPTH_INPUT_Y_SCALE=\n" + "\n" + "If you know the right resolution of the games depth buffer then this scale value is simply the ratio\n" + "between the correct resolution and the resolution Reshade thinks it is.\n" + "For example:\n" + "If it thinks the resolution is 1920 x 1080, but it's really 1280 x 720 then the right scale is (1.5 , 1.5)\n" + "because 1920 / 1280 is 1.5 and 1080 / 720 is also 1.5, so 1.5 is the right scale for both the x and the y"; + ui_tooltip_ja_jp = + "深度バッファの解像度がクライアント解像度と異なる場合に変更して下さい。\n" + "このスケール値は、深度バッファの解像度とクライアント解像度との単純な比率になります。\n" + "深度バッファの解像度が1280×720でクライアント解像度が1920×1080の場合、横の比率が1920÷1280、縦の比率が1080÷720となります。\n" + "計算した結果を設定すると、値はそれぞれX_SCALE=1.5、Y_SCALE=1.5となります。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_INPUT_X_SCALE=横の値\n" + "RESHADE_DEPTH_INPUT_Y_SCALE=縦の値\n" + "定義値は次の通りです。横の値はX_SCALE、縦の値はY_SCALEに指定して下さい。\n" + "RESHADE_DEPTH_INPUT_X_SCALE=1.0\n" + "RESHADE_DEPTH_INPUT_Y_SCALE=1.0" +#endif + ; + ui_min = 0.0; ui_max = 2.0; + ui_step = 0.001; +> = float2(RESHADE_DEPTH_INPUT_X_SCALE, RESHADE_DEPTH_INPUT_Y_SCALE); + +uniform int2 iUIOffset < + ui_category = "Preview settings"; + ui_label = "Offset"; + ui_label_ja_jp = "位置オフセット"; + ui_type = "slider"; + ui_tooltip = + "Best use 'Present type'->'Depth map' and enable 'Offset' in the options below to set the offset in pixels.\n" + "Use these values for:\nRESHADE_DEPTH_INPUT_X_PIXEL_OFFSET=\nRESHADE_DEPTH_INPUT_Y_PIXEL_OFFSET="; + ui_tooltip_ja_jp = + "深度バッファにレンダリングされた物体の形状が画面効果と重なり合っていない場合に変更して下さい。\n" + "この値は、ピクセル単位で指定します。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_INPUT_X_PIXEL_OFFSET=横の値\n" + "RESHADE_DEPTH_INPUT_Y_PIXEL_OFFSET=縦の値\n" + "定義値は次の通りです。横の値はX_PIXEL_OFFSET、縦の値はY_PIXEL_OFFSETに指定して下さい。\n" + "RESHADE_DEPTH_INPUT_X_PIXEL_OFFSET=0.0\n" + "RESHADE_DEPTH_INPUT_Y_PIXEL_OFFSET=0.0" +#endif + ; + ui_min = -BUFFER_SCREEN_SIZE; + ui_max = BUFFER_SCREEN_SIZE; + ui_step = 1; +> = int2(RESHADE_DEPTH_INPUT_X_PIXEL_OFFSET, RESHADE_DEPTH_INPUT_Y_PIXEL_OFFSET); + +uniform float fUIFarPlane < + ui_category = "Preview settings"; + ui_label = "Far Plane"; + ui_label_ja_jp = "遠点距離"; + ui_type = "drag"; + ui_tooltip = + "RESHADE_DEPTH_LINEARIZATION_FAR_PLANE=\n" + "Changing this value is not necessary in most cases."; + ui_tooltip_ja_jp = + "深度マップの色合いが距離感と合致しない、法線マップの表面が平面に見える、などの場合に変更して下さい。\n" + "遠点距離を1000に設定すると、ゲームの描画距離が1000メートルであると見なします。\n\n" + "このプレビュー画面はあくまでプレビューであり、ほとんどの場合、深度バッファは深度マップの色数より遥かに高い精度で表現されています。\n" + "例えば、10m前後の距離の形状が純粋な黒に見えるからという理由で値を変更しないで下さい。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_LINEARIZATION_FAR_PLANE=値\n" + "定義値は次の通りです。\n" + "RESHADE_DEPTH_LINEARIZATION_FAR_PLANE=1000.0" +#endif + ; + ui_min = 0.0; ui_max = 1000.0; + ui_step = 0.1; +> = RESHADE_DEPTH_LINEARIZATION_FAR_PLANE; + +uniform float fUIDepthMultiplier < + ui_category = "Preview settings"; + ui_label = "Multiplier"; + ui_label_ja_jp = "深度乗数"; + ui_type = "drag"; + ui_tooltip = "RESHADE_DEPTH_MULTIPLIER="; + ui_tooltip_ja_jp = + "特定のエミュレータソフトウェアにおける深度バッファを修正するため、特別に追加された変数です。\n" + "この値は僅かな変更でも計算式を破壊するため、設定すべき値を知らない場合は変更しないで下さい。" +#if !ADDON_ADJUST_DEPTH + "\n\n" + "定義名は次の通りです。文字は完全に一致する必要があり、半角大文字の英字とアンダーバーを用いなければなりません。\n" + "RESHADE_DEPTH_MULTIPLIER=値\n" + "定義値は次の通りです。\n" + "RESHADE_DEPTH_MULTIPLIER=1.0" +#endif + ; + ui_min = 0.0; ui_max = 1000.0; + ui_step = 0.001; +> = RESHADE_DEPTH_MULTIPLIER; + +float GetLinearizedDepth(float2 texcoord) +{ + if (!bUIUseLivePreview) + { + return ReShade::GetLinearizedDepth(texcoord); + } + else + { + if (iUIUpsideDown) // RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN + texcoord.y = 1.0 - texcoord.y; + + texcoord.x /= fUIScale.x; // RESHADE_DEPTH_INPUT_X_SCALE + texcoord.y /= fUIScale.y; // RESHADE_DEPTH_INPUT_Y_SCALE + texcoord.x -= iUIOffset.x * BUFFER_RCP_WIDTH; // RESHADE_DEPTH_INPUT_X_PIXEL_OFFSET + texcoord.y += iUIOffset.y * BUFFER_RCP_HEIGHT; // RESHADE_DEPTH_INPUT_Y_PIXEL_OFFSET + + float depth = tex2Dlod(ReShade::DepthBuffer, float4(texcoord, 0, 0)).x * fUIDepthMultiplier; + + const float C = 0.01; + if (iUILogarithmic) // RESHADE_DEPTH_INPUT_IS_LOGARITHMIC + depth = (exp(depth * log(C + 1.0)) - 1.0) / C; + + if (iUIReversed) // RESHADE_DEPTH_INPUT_IS_REVERSED + depth = 1.0 - depth; + + const float N = 1.0; + depth /= fUIFarPlane - depth * (fUIFarPlane - N); + + return depth; + } +} + +float3 GetScreenSpaceNormal(float2 texcoord) +{ + float3 offset = float3(BUFFER_PIXEL_SIZE, 0.0); + float2 posCenter = texcoord.xy; + float2 posNorth = posCenter - offset.zy; + float2 posEast = posCenter + offset.xz; + + float3 vertCenter = float3(posCenter - 0.5, 1) * GetLinearizedDepth(posCenter); + float3 vertNorth = float3(posNorth - 0.5, 1) * GetLinearizedDepth(posNorth); + float3 vertEast = float3(posEast - 0.5, 1) * GetLinearizedDepth(posEast); + + return normalize(cross(vertCenter - vertNorth, vertCenter - vertEast)) * 0.5 + 0.5; +} + +void PS_DisplayDepth(in float4 position : SV_Position, in float2 texcoord : TEXCOORD, out float3 color : SV_Target) +{ + float3 depth = GetLinearizedDepth(texcoord).xxx; + float3 normal = GetScreenSpaceNormal(texcoord); + + // Ordered dithering +#if 1 + const float dither_bit = 8.0; // Number of bits per channel. Should be 8 for most monitors. + // Calculate grid position + float grid_position = frac(dot(texcoord, (BUFFER_SCREEN_SIZE * float2(1.0 / 16.0, 10.0 / 36.0)) + 0.25)); + // Calculate how big the shift should be + float dither_shift = 0.25 * (1.0 / (pow(2, dither_bit) - 1.0)); + // Shift the individual colors differently, thus making it even harder to see the dithering pattern + float3 dither_shift_RGB = float3(dither_shift, -dither_shift, dither_shift); // Subpixel dithering + // Modify shift acording to grid position. + dither_shift_RGB = lerp(2.0 * dither_shift_RGB, -2.0 * dither_shift_RGB, grid_position); + depth += dither_shift_RGB; +#endif + + color = depth; + if (iUIPresentType == 1) + color = normal; + if (iUIPresentType == 2) + color = lerp(normal, depth, step(BUFFER_WIDTH * 0.5, position.x)); + + if (bUIShowOffset) + { + float3 color_orig = tex2D(ReShade::BackBuffer, texcoord).rgb; + + // Blend depth and back buffer color with 'overlay' so the offset is more noticeable + color = lerp(2 * color * color_orig, 1.0 - 2.0 * (1.0 - color) * (1.0 - color_orig), max(color.r, max(color.g, color.b)) < 0.5 ? 0.0 : 1.0); + } +} + +technique DisplayDepth < + ui_tooltip = + "This shader helps you set the right preprocessor settings for depth input.\n" + "To set the settings click on 'Edit global preprocessor definitions' and set them there - not in this shader.\n" + "The settings will then take effect for all shaders, including this one.\n" + "\n" + "By default calculated normals and depth are shown side by side.\n" + "Normals (on the left) should look smooth and the ground should be greenish when looking at the horizon.\n" + "Depth (on the right) should show close objects as dark and use gradually brighter shades the further away objects are.\n"; + ui_tooltip_ja_jp = + "これは、深度バッファの入力をReShade側の計算式に合わせる調節をするための、設定作業の支援に特化した特殊な扱いのエフェクトです。\n" + "初期状態では「両方を表示」が選択されており、左に法線マップ、右に深度マップが表示されます。\n" + "\n" + "法線マップ(左側)は、形状を滑らかに表現します。正しい設定では、全体的に青緑風で、地平線を見たときに地面が緑を帯びた色になります。\n" + "深度マップ(右側)は、形状の遠近を白黒で表現します。正しい設定では、近くの形状ほど黒く、遠くの形状ほど白くなります。\n" + "\n" +#if ADDON_ADJUST_DEPTH + "設定を完了するには、DisplayDepth.fxエフェクトの変数の一覧にある'設定に保存して反映する'ボタンをクリックして下さい。\n" +#else + "設定を完了するには、エフェクト変数の編集画面にある'プリプロセッサの定義を編集'ボタンをクリックした後に開くダイアログに入力して下さい。\n" +#endif + "すると、インストール先のゲームに対して共通の設定として保存され、他のプリセットでも正しく表示されるようになります。"; +> + +{ + pass + { + VertexShader = PostProcessVS; + PixelShader = PS_DisplayDepth; + } +} diff --git a/data/resources/shaders/reshade/Shaders/ReShade.fxh b/data/resources/shaders/reshade/Shaders/ReShade.fxh index 1cd8205d6..bb7348197 100644 --- a/data/resources/shaders/reshade/Shaders/ReShade.fxh +++ b/data/resources/shaders/reshade/Shaders/ReShade.fxh @@ -1,3 +1,7 @@ +/* + * SPDX-License-Identifier: CC0-1.0 + */ + #pragma once #if !defined(__RESHADE__) || __RESHADE__ < 30000 @@ -105,6 +109,7 @@ namespace ReShade } // Vertex shader generating a triangle covering the entire screen +// See also https://www.reddit.com/r/gamedev/comments/2j17wk/a_slightly_faster_bufferless_vertex_shader_trick/ void PostProcessVS(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD) { texcoord.x = (id == 2) ? 2.0 : 0.0; diff --git a/data/resources/shaders/reshade/Shaders/ReShadeUI.fxh b/data/resources/shaders/reshade/Shaders/ReShadeUI.fxh index 6f3a41184..0cc016e31 100644 --- a/data/resources/shaders/reshade/Shaders/ReShadeUI.fxh +++ b/data/resources/shaders/reshade/Shaders/ReShadeUI.fxh @@ -7,27 +7,27 @@ #define RESHADE_VERSION(major,minor,build) (10000 * (major) + 100 * (minor) + (build)) #define SUPPORTED_VERSION(major,minor,build) (__RESHADE__ >= RESHADE_VERSION(major,minor,build)) -// Since 3.0.0 +// >= 3.0.0 // Commit current in-game user interface status // https://github.com/crosire/reshade/commit/302bacc49ae394faedc2e29a296c1cebf6da6bb2#diff-82cf230afdb2a0d5174111e6f17548a5R1183 // Added various GUI related uniform variable annotations // https://reshade.me/forum/releases/2341-3-0 #define __UNIFORM_INPUT_ANY ui_type = "input"; -#define __UNIFORM_INPUT_BOOL1 __UNIFORM_INPUT_ANY // It is unsupported on all version -#define __UNIFORM_INPUT_BOOL2 __UNIFORM_INPUT_ANY // It is unsupported on all version -#define __UNIFORM_INPUT_BOOL3 __UNIFORM_INPUT_ANY // It is unsupported on all version -#define __UNIFORM_INPUT_BOOL4 __UNIFORM_INPUT_ANY // It is unsupported on all version -#define __UNIFORM_INPUT_INT1 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_INT2 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_INT3 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_INT4 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_FLOAT1 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_FLOAT2 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_FLOAT3 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_INPUT_FLOAT4 __UNIFORM_INPUT_ANY // If it was not supported in someday or now, please add information +#define __UNIFORM_INPUT_BOOL1 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_BOOL2 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_BOOL3 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_BOOL4 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_INT1 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_INT2 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_INT3 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_INT4 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_FLOAT1 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_FLOAT2 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_FLOAT3 __UNIFORM_INPUT_ANY +#define __UNIFORM_INPUT_FLOAT4 __UNIFORM_INPUT_ANY -// Since 4.0.1 +// >= 4.0.1 // Change slider widget to be used with new "slider" instead of a "drag" type annotation // https://github.com/crosire/reshade/commit/746229f31cd6f311a3e72a543e4f1f23faa23f11#diff-59405a313bd8cbfb0ca6dd633230e504R1701 // Changed slider widget to be used with < ui_type = "slider"; > instead of < ui_type = "drag"; > @@ -35,7 +35,7 @@ #if SUPPORTED_VERSION(4,0,1) #define __UNIFORM_DRAG_ANY ui_type = "drag"; -// Since 4.0.0 +// >= 4.0.0 // Rework statistics tab and add drag widgets back // https://github.com/crosire/reshade/commit/1b2c38795f00efd66c007da1f483f1441b230309 // Changed drag widget to a slider widget (old one is still available via < ui_type = "drag2"; >) @@ -43,7 +43,7 @@ #elif SUPPORTED_VERSION(4,0,0) #define __UNIFORM_DRAG_ANY ui_type = "drag2"; -// Since 3.0.0 +// >= 3.0.0 // Commit current in-game user interface status // https://github.com/crosire/reshade/commit/302bacc49ae394faedc2e29a296c1cebf6da6bb2#diff-82cf230afdb2a0d5174111e6f17548a5R1187 // Added various GUI related uniform variable annotations @@ -52,20 +52,20 @@ #define __UNIFORM_DRAG_ANY ui_type = "drag"; #endif -#define __UNIFORM_DRAG_BOOL1 __UNIFORM_DRAG_ANY // It is unsupported on all version -#define __UNIFORM_DRAG_BOOL2 __UNIFORM_DRAG_ANY // It is unsupported on all version -#define __UNIFORM_DRAG_BOOL3 __UNIFORM_DRAG_ANY // It is unsupported on all version -#define __UNIFORM_DRAG_BOOL4 __UNIFORM_DRAG_ANY // It is unsupported on all version -#define __UNIFORM_DRAG_INT1 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_INT2 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_INT3 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_INT4 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_FLOAT1 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_FLOAT2 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_FLOAT3 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_DRAG_FLOAT4 __UNIFORM_DRAG_ANY // If it was not supported in someday or now, please add information +#define __UNIFORM_DRAG_BOOL1 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_BOOL2 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_BOOL3 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_BOOL4 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_INT1 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_INT2 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_INT3 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_INT4 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_FLOAT1 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_FLOAT2 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_FLOAT3 __UNIFORM_DRAG_ANY +#define __UNIFORM_DRAG_FLOAT4 __UNIFORM_DRAG_ANY -// Since 4.0.1 +// >= 4.0.1 // Change slider widget to be used with new "slider" instead of a "drag" type annotation // https://github.com/crosire/reshade/commit/746229f31cd6f311a3e72a543e4f1f23faa23f11#diff-59405a313bd8cbfb0ca6dd633230e504R1699 // Changed slider widget to be used with < ui_type = "slider"; > instead of < ui_type = "drag"; > @@ -73,7 +73,7 @@ #if SUPPORTED_VERSION(4,0,1) #define __UNIFORM_SLIDER_ANY ui_type = "slider"; -// Since 4.0.0 +// >= 4.0.0 // Rework statistics tab and add drag widgets back // https://github.com/crosire/reshade/commit/1b2c38795f00efd66c007da1f483f1441b230309 // Changed drag widget to a slider widget (old one is still available via < ui_type = "drag2"; >) @@ -84,20 +84,20 @@ #define __UNIFORM_SLIDER_ANY __UNIFORM_DRAG_ANY #endif -#define __UNIFORM_SLIDER_BOOL1 __UNIFORM_SLIDER_ANY // It is unsupported on all version -#define __UNIFORM_SLIDER_BOOL2 __UNIFORM_SLIDER_ANY // It is unsupported on all version -#define __UNIFORM_SLIDER_BOOL3 __UNIFORM_SLIDER_ANY // It is unsupported on all version -#define __UNIFORM_SLIDER_BOOL4 __UNIFORM_SLIDER_ANY // It is unsupported on all version -#define __UNIFORM_SLIDER_INT1 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_INT2 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_INT3 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_INT4 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_FLOAT1 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_FLOAT2 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_FLOAT3 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_SLIDER_FLOAT4 __UNIFORM_SLIDER_ANY // If it was not supported in someday or now, please add information +#define __UNIFORM_SLIDER_BOOL1 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_BOOL2 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_BOOL3 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_BOOL4 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_INT1 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_INT2 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_INT3 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_INT4 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_FLOAT1 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_FLOAT2 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_FLOAT3 __UNIFORM_SLIDER_ANY +#define __UNIFORM_SLIDER_FLOAT4 __UNIFORM_SLIDER_ANY -// Since 3.0.0 +// >= 3.0.0 // Add combo box display type for uniform variables and fix displaying of integer variable under Direct3D 9 // https://github.com/crosire/reshade/commit/b025bfae5f7343509ec0cacf6df0cff537c499f2#diff-82cf230afdb2a0d5174111e6f17548a5R1631 // Added various GUI related uniform variable annotations @@ -105,19 +105,19 @@ #define __UNIFORM_COMBO_ANY ui_type = "combo"; // __UNIFORM_COMBO_BOOL1 -#define __UNIFORM_COMBO_BOOL2 __UNIFORM_COMBO_ANY // It is unsupported on all version -#define __UNIFORM_COMBO_BOOL3 __UNIFORM_COMBO_ANY // It is unsupported on all version -#define __UNIFORM_COMBO_BOOL4 __UNIFORM_COMBO_ANY // It is unsupported on all version -#define __UNIFORM_COMBO_INT1 __UNIFORM_COMBO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_COMBO_INT2 __UNIFORM_COMBO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_COMBO_INT3 __UNIFORM_COMBO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_COMBO_INT4 __UNIFORM_COMBO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_COMBO_FLOAT1 __UNIFORM_COMBO_ANY // It is unsupported on all version -#define __UNIFORM_COMBO_FLOAT2 __UNIFORM_COMBO_ANY // It is unsupported on all version -#define __UNIFORM_COMBO_FLOAT3 __UNIFORM_COMBO_ANY // It is unsupported on all version -#define __UNIFORM_COMBO_FLOAT4 __UNIFORM_COMBO_ANY // It is unsupported on all version +#define __UNIFORM_COMBO_BOOL2 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_BOOL3 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_BOOL4 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_INT1 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_INT2 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_INT3 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_INT4 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_FLOAT1 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_FLOAT2 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_FLOAT3 __UNIFORM_COMBO_ANY +#define __UNIFORM_COMBO_FLOAT4 __UNIFORM_COMBO_ANY -// Since 4.0.0 (but the ui_items force set "Off\0On\0"), and if less than it force converted to checkbox +// >= 4.0.0 // Add option to display boolean values as combo box instead of checkbox // https://github.com/crosire/reshade/commit/aecb757c864c9679e77edd6f85a1521c49e489c1#diff-59405a313bd8cbfb0ca6dd633230e504R1147 // https://github.com/crosire/reshade/blob/v4.0.0/source/gui.cpp @@ -125,7 +125,7 @@ // https://reshade.me/forum/releases/4772-4-0 #define __UNIFORM_COMBO_BOOL1 __UNIFORM_COMBO_ANY -// Since 4.0.0 +// >= 4.0.0 // Cleanup GUI code and rearrange some widgets // https://github.com/crosire/reshade/commit/6751f7bd50ea7c0556cf0670f10a4b4ba912ee7d#diff-59405a313bd8cbfb0ca6dd633230e504R1711 // Added radio button widget (via < ui_type = "radio"; ui_items = "Button 1\0Button 2\0...\0"; >) @@ -136,48 +136,46 @@ #define __UNIFORM_RADIO_ANY __UNIFORM_COMBO_ANY #endif -#define __UNIFORM_RADIO_BOOL1 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_BOOL2 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_BOOL3 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_BOOL4 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_INT1 __UNIFORM_RADIO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_RADIO_INT2 __UNIFORM_RADIO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_RADIO_INT3 __UNIFORM_RADIO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_RADIO_INT4 __UNIFORM_RADIO_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_RADIO_FLOAT1 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_FLOAT2 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_FLOAT3 __UNIFORM_RADIO_ANY // It is unsupported on all version -#define __UNIFORM_RADIO_FLOAT4 __UNIFORM_RADIO_ANY // It is unsupported on all version +#define __UNIFORM_RADIO_BOOL1 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_BOOL2 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_BOOL3 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_BOOL4 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_INT1 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_INT2 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_INT3 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_INT4 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_FLOAT1 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_FLOAT2 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_FLOAT3 __UNIFORM_RADIO_ANY +#define __UNIFORM_RADIO_FLOAT4 __UNIFORM_RADIO_ANY -// Since 4.1.0 +// >= 4.1.0 // Fix floating point uniforms with unknown "ui_type" not showing up in UI // https://github.com/crosire/reshade/commit/50e5bf44dfc84bc4220c2b9f19d5f50c7a0fda66#diff-59405a313bd8cbfb0ca6dd633230e504R1788 // Fixed floating point uniforms with unknown "ui_type" not showing up in UI // https://reshade.me/forum/releases/5021-4-1 #define __UNIFORM_COLOR_ANY ui_type = "color"; -// Since 3.0.0 +// >= 3.0.0 // Move technique list to preset configuration file // https://github.com/crosire/reshade/blob/84bba3aa934c1ebe4c6419b69dfe1690d9ab9d34/source/runtime.cpp#L1328 // Added various GUI related uniform variable annotations // https://reshade.me/forum/releases/2341-3-0 -// If empty, these versions before 4.1.0 are decide that the type is color from the number of components - -#define __UNIFORM_COLOR_BOOL1 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_BOOL2 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_BOOL3 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_BOOL4 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_INT1 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_INT2 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_INT3 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_INT4 __UNIFORM_COLOR_ANY // It is unsupported on all version +#define __UNIFORM_COLOR_BOOL1 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_BOOL2 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_BOOL3 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_BOOL4 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_INT1 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_INT2 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_INT3 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_INT4 __UNIFORM_COLOR_ANY // __UNIFORM_COLOR_FLOAT1 -#define __UNIFORM_COLOR_FLOAT2 __UNIFORM_COLOR_ANY // It is unsupported on all version -#define __UNIFORM_COLOR_FLOAT3 __UNIFORM_COLOR_ANY // If it was not supported in someday or now, please add information -#define __UNIFORM_COLOR_FLOAT4 __UNIFORM_COLOR_ANY // If it was not supported in someday or now, please add information +#define __UNIFORM_COLOR_FLOAT2 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_FLOAT3 __UNIFORM_COLOR_ANY +#define __UNIFORM_COLOR_FLOAT4 __UNIFORM_COLOR_ANY -// Since 4.2.0 +// >= 4.2.0 // Add alpha slider widget for single component uniform variables (#86) // https://github.com/crosire/reshade/commit/87a740a8e3c4dcda1dd4eeec8d5cff7fa35fe829#diff-59405a313bd8cbfb0ca6dd633230e504R1820 // Added alpha slider widget for single component uniform variables @@ -188,7 +186,7 @@ #define __UNIFORM_COLOR_FLOAT1 __UNIFORM_SLIDER_ANY #endif -// Since 4.3.0 +// >= 4.3.0 // Add new "list" GUI widget (#103) // https://github.com/crosire/reshade/commit/515287d20ce615c19cf3d4c21b49f83896f04ddc#diff-59405a313bd8cbfb0ca6dd633230e504R1894 // Added new "list" GUI widget @@ -200,17 +198,17 @@ #endif // __UNIFORM_LIST_BOOL1 -#define __UNIFORM_LIST_BOOL2 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_BOOL3 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_BOOL4 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_INT1 __UNIFORM_LIST_ANY // Supported in 4.3.0 -#define __UNIFORM_LIST_INT2 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_INT3 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_INT4 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_FLOAT1 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_FLOAT2 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_FLOAT3 __UNIFORM_LIST_ANY // Not supported in all versions -#define __UNIFORM_LIST_FLOAT4 __UNIFORM_LIST_ANY // Not supported in all versions +#define __UNIFORM_LIST_BOOL2 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_BOOL3 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_BOOL4 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_INT1 __UNIFORM_LIST_ANY // >= 4.3.0 +#define __UNIFORM_LIST_INT2 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_INT3 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_INT4 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_FLOAT1 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_FLOAT2 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_FLOAT3 __UNIFORM_LIST_ANY +#define __UNIFORM_LIST_FLOAT4 __UNIFORM_LIST_ANY -// For compatible with ComboBox +// For compatible with 'combo' #define __UNIFORM_LIST_BOOL1 __UNIFORM_COMBO_ANY diff --git a/data/resources/shaders/reshade/Shaders/UIMask.fx b/data/resources/shaders/reshade/Shaders/UIMask.fx new file mode 100644 index 000000000..f27b7cc6a --- /dev/null +++ b/data/resources/shaders/reshade/Shaders/UIMask.fx @@ -0,0 +1,289 @@ +/* + Simple UIMask shader by luluco250 + + I have no idea why this was never ported back to ReShade 3.0 from 2.0, + but if you missed it, here it is. + + It doesn't feature the auto mask from the original shader. + + It does feature a new multi-channnel masking feature. UI masks can now contain + separate 'modes' within each of the three color channels. + + For example, you can have the regular hud on the red channel (the default one), + a mask for an inventory screen on the green channel and a mask for a quest menu + on the blue channel. You can then use keyboard keys to toggle each channel on or off. + + Multiple channels can be active at once, they'll just add up to mask the image. + + Simple/legacy masks are not affected by this, they'll work just as you'd expect, + so you can still make simple black and white masks that use all color channels, it'll + be no different than just having it on a single channel. + + Tips: + + --You can adjust how much it will affect your HUD by changing "Mask Intensity". + + --You don't actually need to place the UIMask_Bottom technique at the bottom of + your shader pipeline, if you have any effects that don't necessarily affect + the visibility of the HUD you can place it before that. + For instance, if you use color correction shaders like LUT, you might want + to place UIMask_Bottom just before that. + + --Preprocessor flags: + --UIMASK_MULTICHANNEL: + Enables having up to three different masks on each color channel. + + --Refer to this page for keycodes: + https://msdn.microsoft.com/en-us/library/windows/desktop/dd375731(v=vs.85).aspx + + --To make a custom mask: + + 1-Take a screenshot of your game with the HUD enabled, + preferrably with any effects disabled for maximum visibility. + + 2-Open the screenshot with your preferred image editor program, I use GIMP. + + 3-Make a background white layer if there isn't one already. + Be sure to leave it behind your actual screenshot for the while. + + 4-Make an empty layer for the mask itself, you can call it "mask". + + 5-Having selected the mask layer, paint the places where HUD constantly is, + such as health bars, important messages, minimaps etc. + + 6-Delete or make your screenshot layer invisible. + + 7-Before saving your mask, let's do some gaussian blurring to improve it's look and feel: + For every step of blurring you want to do, make a new layer, such as: + Mask - Blur16x16 + Mask - Blur8x8 + Mask - Blur4x4 + Mask - Blur2x2 + Mask - NoBlur + You should use your image editor's default gaussian blurring filter, if there is one. + This avoids possible artifacts and makes the mask blend more easily on the eyes. + You may not need this if your mask is accurate enough and/or the HUD is simple enough. + + 8-Now save the final image with a unique name such as "MyUIMask.png" in your textures folder. + + 9-Set the preprocessor definition UIMASK_TEXTURE to the unique name of your image, with quotes. + You're done! + + + MIT Licensed: + + Copyright (c) 2017 Lucas Melo + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +//#region Preprocessor + +#include "ReShade.fxh" +#include "ReShadeUI.fxh" + +#ifndef UIMASK_MULTICHANNEL + #define UIMASK_MULTICHANNEL 0 +#endif + +#if !UIMASK_MULTICHANNEL + #define TEXFORMAT R8 +#else + #define TEXFORMAT RGBA8 +#endif + +#ifndef UIMASK_TEXTURE + #define UIMASK_TEXTURE "UIMask.png" +#endif + +//#endregion + +namespace UIMask +{ + +//#region Uniforms + +uniform int _Help +< + ui_label = " "; + ui_text = + "For more detailed instructions, see the text at the top of this " + "effect's shader file (UIMask.fx).\n" + "\n" + "Available preprocessor definitions:\n" + " UIMASK_MULTICHANNEL:\n" + " If set to 1, each of the RGB color channels in the texture is " + "treated as a separate mask.\n" + "\n" + "How to create a mask:\n" + "\n" + "1. Take a screenshot with the game's UI appearing.\n" + "2. Open the screenshot in an image editor, GIMP or Photoshop are " + "recommended.\n" + "3. Create a new layer over the screenshot layer, fill it with black.\n" + "4. Reduce the layer opacity so you can see the screenshot layer " + "below.\n" + "5. Cover the UI with white to mask it from effects. The stronger the " + "mask white color, the more opaque the mask will be.\n" + "6. Set the mask layer opacity back to 100%.\n" + "7. Save the image in one of your texture folders, making sure to " + "use a unique name such as: \"MyUIMask.png\"\n" + "8. Set the preprocessor definition UIMASK_TEXTURE to the name of " + "your image, with quotes: \"MyUIMask.png\"\n" + ; + ui_category = "Help"; + ui_category_closed = true; + ui_type = "radio"; +>; + +uniform float fMask_Intensity +< + __UNIFORM_SLIDER_FLOAT1 + + ui_label = "Mask Intensity"; + ui_tooltip = + "How much to mask effects from affecting the original image.\n" + "\nDefault: 1.0"; + ui_min = 0.0; + ui_max = 1.0; + ui_step = 0.001; +> = 1.0; + +uniform bool bDisplayMask < + ui_label = "Display Mask"; + ui_tooltip = + "Display the mask texture.\n" + "Useful for testing multiple channels or simply the mask itself.\n" + "\nDefault: Off"; +> = false; + +#if UIMASK_MULTICHANNEL + +uniform bool bToggleRed < + ui_label = "Toggle Red Channel"; + ui_tooltip = "Toggle UI masking for the red channel.\n" + "Right click to assign a hotkey.\n" + "\nDefault: On"; +> = true; + +uniform bool bToggleGreen < + ui_label = "Toggle Green Channel"; + ui_tooltip = "Toggle UI masking for the green channel.\n" + "Right click to assign a hotkey." + "\nDefault: On"; +> = true; + +uniform bool bToggleBlue < + ui_label = "Toggle Blue Channel"; + ui_tooltip = "Toggle UI masking for the blue channel.\n" + "Right click to assign a hotkey." + "\nDefault: On"; +> = true; + +#endif + +//#endregion + +//#region Textures + +texture BackupTex +{ + Width = BUFFER_WIDTH; + Height = BUFFER_HEIGHT; +}; +sampler Backup +{ + Texture = BackupTex; +}; + +texture MaskTex +{ + Width = BUFFER_WIDTH; + Height = BUFFER_HEIGHT; + Format = TEXFORMAT; +}; +sampler Mask +{ + Texture = MaskTex; +}; + +//#endregion + +//#region Shaders + +float4 BackupPS(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_Target { + return tex2D(ReShade::BackBuffer, uv); +} + +float4 MainPS(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_Target { + float4 color = tex2D(ReShade::BackBuffer, uv); + float4 backup = tex2D(Backup, uv); + + #if !UIMASK_MULTICHANNEL + float mask = tex2D(Mask, uv).r; + #else + float3 mask_rgb = tex2D(Mask, uv).rgb; + + // This just works, it basically adds masking with each channel that has + // been toggled. + float mask = saturate( + 1.0 - dot(1.0 - mask_rgb, + float3(bToggleRed, bToggleGreen, bToggleBlue))); + #endif + + color = lerp(color, backup, mask * fMask_Intensity); + color = bDisplayMask ? mask : color; + + return color; +} + +//#endregion + +//#region Techniques + +technique UIMask_Top +< + ui_tooltip = "Place this *above* the effects to be masked."; +> +{ + pass + { + VertexShader = PostProcessVS; + PixelShader = BackupPS; + RenderTarget = BackupTex; + } +} + +technique UIMask_Bottom +< + ui_tooltip = + "Place this *below* the effects to be masked.\n" + "If you want to add a toggle key for the effect, set it to this one."; +> +{ + pass + { + VertexShader = PostProcessVS; + PixelShader = MainPS; + } +} + +//#endregion + +} // Namespace. diff --git a/dep/reshadefx/include/effect_codegen.hpp b/dep/reshadefx/include/effect_codegen.hpp index 6419214a8..1029cabe2 100644 --- a/dep/reshadefx/include/effect_codegen.hpp +++ b/dep/reshadefx/include/effect_codegen.hpp @@ -16,6 +16,8 @@ namespace reshadefx /// class codegen { + friend class parser; + public: /// /// Virtual destructor to guarantee that memory of the implementations deriving from this interface is properly destroyed. @@ -23,12 +25,21 @@ namespace reshadefx virtual ~codegen() {} /// - /// Writes result of the code generation to the specified . + /// Gets the module describing the generated code. /// - /// Target module to fill. - virtual void write_result(module &module) = 0; + const effect_module &module() const { return _module; } - public: + /// + /// Finalizes and returns the generated code for the entire module (all entry points). + /// + virtual std::basic_string finalize_code() const = 0; + /// + /// Finalizes and returns the generated code for the specified entry point (and no other entry points). + /// + /// Name of the entry point function to generate code for. + virtual std::basic_string finalize_code_for_entry_point(const std::string &entry_point_name) const = 0; + + protected: /// /// An opaque ID referring to a SSA value or basic block. /// @@ -40,14 +51,14 @@ namespace reshadefx /// Source location matching this definition (for debugging). /// Description of the type. /// New SSA ID of the type. - virtual id define_struct(const location &loc, struct_info &info) = 0; + virtual id define_struct(const location &loc, struct_type &info) = 0; /// /// Defines a new texture binding. /// /// Source location matching this definition (for debugging). /// Description of the texture object. /// New SSA ID of the binding. - virtual id define_texture(const location &loc, texture_info &info) = 0; + virtual id define_texture(const location &loc, texture &info) = 0; /// /// Defines a new sampler binding. /// @@ -55,7 +66,7 @@ namespace reshadefx /// Description of the texture this sampler object references. /// Description of the sampler object. /// New SSA ID of the binding. - virtual id define_sampler(const location &loc, const texture_info &tex_info, sampler_info &info) = 0; + virtual id define_sampler(const location &loc, const texture &tex_info, sampler &info) = 0; /// /// Defines a new storage binding. /// @@ -63,14 +74,14 @@ namespace reshadefx /// Description of the texture this storage object references. /// Description of the storage object. /// New SSA ID of the binding. - virtual id define_storage(const location &loc, const texture_info &tex_info, storage_info &info) = 0; + virtual id define_storage(const location &loc, const texture &tex_info, storage &info) = 0; /// /// Defines a new uniform variable. /// /// Source location matching this definition (for debugging). /// Description of the uniform variable. /// New SSA ID of the variable. - virtual id define_uniform(const location &loc, uniform_info &info) = 0; + virtual id define_uniform(const location &loc, uniform &info) = 0; /// /// Defines a new variable. /// @@ -82,26 +93,25 @@ namespace reshadefx /// New SSA ID of the variable. virtual id define_variable(const location &loc, const type &type, std::string name = std::string(), bool global = false, id initializer_value = 0) = 0; /// - /// Defines a new function and its function parameters and make it current. Any code added after this call is added to this function. + /// Defines a new function and its function parameters and make it current. + /// Any code added after this call is added to this function. /// /// Source location matching this definition (for debugging). /// Description of the function. /// New SSA ID of the function. - virtual id define_function(const location &loc, function_info &info) = 0; + virtual id define_function(const location &loc, function &info) = 0; /// /// Defines a new effect technique. /// /// Source location matching this definition (for debugging). /// Description of the technique. - void define_technique(technique_info &&info) { _module.techniques.push_back(std::move(info)); } + void define_technique(technique &&info) { _module.techniques.push_back(std::move(info)); } /// /// Makes a function a shader entry point. /// - /// Function to use as entry point. May be overwritten to point to a new unique function for this entry point. - /// Shader type (vertex, pixel or compute shader). - /// Number of local threads it this is a compute entry point. - virtual void define_entry_point(function_info &function, shader_type type, int num_threads[3] = nullptr) = 0; + /// Function to use as entry point. May be overwritten to point to a new uniquely generated function. + virtual void define_entry_point(function &function) = 0; /// /// Resolves the access chain and add a load operation to the output. @@ -131,6 +141,19 @@ namespace reshadefx /// Actual constant data to convert into a SSA ID. /// New SSA ID with the constant value. virtual id emit_constant(const type &type, const constant &data) = 0; + id emit_constant(const type &data_type, uint32_t value) + { + // Create a constant value of the specified type + constant data = {}; // Initialize to zero, so that components not set below still have a defined value for lookup via std::memcmp + for (unsigned int i = 0; i < data_type.components(); ++i) + { + if (data_type.is_integral()) + data.as_uint[i] = value; + else + data.as_float[i] = static_cast(value); + } + return emit_constant(data_type, data); + } /// /// Adds an unary operation to the output (built-in operation with one argument). @@ -222,7 +245,7 @@ namespace reshadefx /// /// Returns if code is currently added to a function. /// - virtual bool is_in_function() const { return is_in_block(); } + bool is_in_function() const { return _current_function != nullptr; } /// /// Creates a new basic block. @@ -272,93 +295,96 @@ namespace reshadefx /// ID of the basic block to jump to when the condition is false. /// ID of the current basic block. virtual id leave_block_and_branch_conditional(id condition, id true_target, id false_target) = 0; + /// /// Leaves the current function. Any code added after this call is added in the global scope. /// virtual void leave_function() = 0; + /// + /// Recalculates sampler and storage bindings to take as little binding space as possible for each entry point. + /// + virtual void optimize_bindings(); + /// /// Looks up an existing struct type. /// /// SSA ID of the type to find. /// Reference to the struct description. - const struct_info &get_struct(id id) const + const struct_type &get_struct(id id) const { return *std::find_if(_structs.begin(), _structs.end(), - [id](const auto &it) { return it.definition == id; }); + [id](const struct_type &info) { return info.id == id; }); } /// /// Looks up an existing texture binding. /// /// SSA ID of the texture binding to find. /// Reference to the texture description. - texture_info &get_texture(id id) + texture &get_texture(id id) { return *std::find_if(_module.textures.begin(), _module.textures.end(), - [id](const auto &it) { return it.id == id; }); + [id](const texture &info) { return info.id == id; }); } /// /// Looks up an existing sampler binding. /// /// SSA ID of the sampler binding to find. /// Reference to the sampler description. - const sampler_info &get_sampler(id id) const + const sampler &get_sampler(id id) const { return *std::find_if(_module.samplers.begin(), _module.samplers.end(), - [id](const auto &it) { return it.id == id; }); + [id](const sampler &info) { return info.id == id; }); } /// /// Looks up an existing storage binding. /// /// SSA ID of the storage binding to find. /// Reference to the storage description. - const storage_info &get_storage(id id) const + const storage &get_storage(id id) const { return *std::find_if(_module.storages.begin(), _module.storages.end(), - [id](const auto &it) { return it.id == id; }); + [id](const storage &info) { return info.id == id; }); } /// /// Looks up an existing function definition. /// /// SSA ID of the function variable to find. /// Reference to the function description. - function_info &get_function(id id) + function &get_function(id id) { return *std::find_if(_functions.begin(), _functions.end(), - [id](const auto &it) { return it->definition == id; })->get(); + [id](const std::unique_ptr &info) { return info->id == id; })->get(); + } + function &get_function(const std::string &unique_name) + { + return *std::find_if(_functions.begin(), _functions.end(), + [&unique_name](const std::unique_ptr &info) { return info->unique_name == unique_name; })->get(); } - protected: id make_id() { return _next_id++; } - static uint32_t align_up(uint32_t size, uint32_t alignment) - { - alignment -= 1; - return ((size + alignment) & ~alignment); - } - static uint32_t align_up(uint32_t size, uint32_t alignment, uint32_t elements) - { - return align_up(size, alignment) * (elements - 1) + size; - } + effect_module _module; + std::vector _structs; + std::vector> _functions; - reshadefx::module _module; - std::vector _structs; - std::vector> _functions; id _next_id = 1; id _last_block = 0; id _current_block = 0; + function *_current_function = nullptr; }; /// /// Creates a back-end implementation for GLSL code generation. /// + /// GLSL version to insert at the beginning of the file. /// Generate GLSL ES code instead of core OpenGL. /// Generate GLSL for OpenGL or for Vulkan. /// Whether to append debug information like line directives to the generated code. /// Whether to convert uniform variables to specialization constants. /// Use real 16-bit types for the minimum precision types "min16int", "min16uint" and "min16float". /// Insert code to flip the Y component of the output position in vertex shaders. - codegen *create_codegen_glsl(bool gles, bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types = false, bool flip_vert_y = false); + codegen *create_codegen_glsl(unsigned version, bool gles, bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types = false, bool flip_vert_y = false); /// /// Creates a back-end implementation for HLSL code generation. /// diff --git a/dep/reshadefx/include/effect_expression.hpp b/dep/reshadefx/include/effect_expression.hpp index 058977d8e..7278833da 100644 --- a/dep/reshadefx/include/effect_expression.hpp +++ b/dep/reshadefx/include/effect_expression.hpp @@ -15,7 +15,7 @@ namespace reshadefx /// struct type { - enum datatype : uint8_t + enum datatype : uint32_t { t_void, t_bool, @@ -101,6 +101,8 @@ namespace reshadefx bool is_function() const { return base == t_function; } bool is_array() const { return array_length != 0; } + bool is_bounded_array() const { return is_array() && array_length != 0xFFFFFFFF; } + bool is_unbounded_array() const { return array_length == 0xFFFFFFFF; } bool is_scalar() const { return is_numeric() && !is_matrix() && !is_vector() && !is_array(); } bool is_vector() const { return is_numeric() && rows > 1 && cols == 1; } bool is_matrix() const { return is_numeric() && rows >= 1 && cols > 1; } @@ -109,27 +111,27 @@ namespace reshadefx unsigned int components() const { return rows * cols; } unsigned int texture_dimension() const { return base >= t_texture1d && base <= t_storage3d_float ? ((base - t_texture1d) % 3) + 1 : 0; } - friend inline bool operator==(const type &lhs, const type &rhs) + friend bool operator==(const type &lhs, const type &rhs) { - return lhs.base == rhs.base && lhs.rows == rhs.rows && lhs.cols == rhs.cols && lhs.array_length == rhs.array_length && lhs.definition == rhs.definition; + return lhs.base == rhs.base && lhs.rows == rhs.rows && lhs.cols == rhs.cols && lhs.array_length == rhs.array_length && lhs.struct_definition == rhs.struct_definition; } - friend inline bool operator!=(const type &lhs, const type &rhs) + friend bool operator!=(const type &lhs, const type &rhs) { return !operator==(lhs, rhs); } // Underlying base type ('int', 'float', ...) - datatype base = t_void; + datatype base : 8; // Number of rows if this is a vector type - unsigned int rows = 0; + uint32_t rows : 4; // Number of columns if this is a matrix type - unsigned int cols = 0; + uint32_t cols : 4; // Bit mask of all the qualifiers decorating the type - unsigned int qualifiers = 0; - // Negative if an unsized array, otherwise the number of elements if this is an array type - int array_length = 0; + uint32_t qualifiers : 16; + // Number of elements if this is an array type, 0xFFFFFFFF if it is an unsized array + uint32_t array_length; // ID of the matching struct if this is a struct type - uint32_t definition = 0; + uint32_t struct_definition; }; /// @@ -168,8 +170,8 @@ namespace reshadefx op_type op; reshadefx::type from, to; - uint32_t index = 0; - signed char swizzle[4] = {}; + uint32_t index; + signed char swizzle[4]; }; uint32_t base = 0; diff --git a/dep/reshadefx/include/effect_module.hpp b/dep/reshadefx/include/effect_module.hpp index 6c4d00dfe..5644cd290 100644 --- a/dep/reshadefx/include/effect_module.hpp +++ b/dep/reshadefx/include/effect_module.hpp @@ -7,14 +7,48 @@ #include "effect_expression.hpp" #include -#include namespace reshadefx { /// - /// A list of supported texture types. + /// Describes an annotation attached to a variable. /// - enum class texture_type + struct annotation + { + reshadefx::type type = {}; + std::string name; + reshadefx::constant value = {}; + }; + + /// + /// Describes a struct member or parameter. + /// + struct member_type + { + reshadefx::type type = {}; + uint32_t id = 0; + std::string name; + std::string semantic; + reshadefx::location location; + bool has_default_value = false; + reshadefx::constant default_value = {}; + }; + + /// + /// Describes a struct type defined in effect code. + /// + struct struct_type + { + uint32_t id = 0; + std::string name; + std::string unique_name; + std::vector member_list; + }; + + /// + /// Available texture types. + /// + enum class texture_type : uint8_t { texture_1d = 1, texture_2d = 2, @@ -22,9 +56,9 @@ namespace reshadefx }; /// - /// A list of supported texture formats. + /// Available texture formats. /// - enum class texture_format + enum class texture_format : uint8_t { unknown, @@ -46,9 +80,46 @@ namespace reshadefx }; /// - /// A filtering type used for texture lookups. + /// Describes the properties of a object. /// - enum class filter_mode + struct texture_desc + { + uint32_t width = 1; + uint32_t height = 1; + uint16_t depth = 1; + uint16_t levels = 1; + texture_type type = texture_type::texture_2d; + texture_format format = texture_format::rgba8; + }; + + /// + /// Describes a texture object defined in effect code. + /// + struct texture : texture_desc + { + uint32_t id = 0; + std::string name; + std::string unique_name; + std::string semantic; + std::vector annotations; + bool render_target = false; + bool storage_access = false; + }; + + /// + /// Describes the binding of a object. + /// + struct texture_binding + { + uint32_t binding = 0; + std::string texture_name; + bool srgb = false; + }; + + /// + /// Texture filtering modes available for texture sampling operations. + /// + enum class filter_mode : uint8_t { min_mag_mip_point = 0, min_mag_point_mip_linear = 0x1, @@ -57,13 +128,14 @@ namespace reshadefx min_linear_mag_mip_point = 0x10, min_linear_mag_point_mip_linear = 0x11, min_mag_linear_mip_point = 0x14, - min_mag_mip_linear = 0x15 + min_mag_mip_linear = 0x15, + anisotropic = 0x55 }; /// - /// Specifies behavior of sampling with texture coordinates outside an image. + /// Sampling behavior at texture coordinates outside the bounds of a texture resource. /// - enum class texture_address_mode + enum class texture_address_mode : uint8_t { wrap = 1, mirror = 2, @@ -72,9 +144,117 @@ namespace reshadefx }; /// - /// Specifies RGB or alpha blending operations. + /// Describes the properties of a object. /// - enum class pass_blend_op : uint8_t + struct sampler_desc + { + filter_mode filter = filter_mode::min_mag_mip_linear; + texture_address_mode address_u = texture_address_mode::clamp; + texture_address_mode address_v = texture_address_mode::clamp; + texture_address_mode address_w = texture_address_mode::clamp; + float min_lod = -3.402823466e+38f; + float max_lod = +3.402823466e+38f; // FLT_MAX + float lod_bias = 0.0f; + }; + + /// + /// Describes a texture sampler object defined in effect code. + /// + struct sampler : sampler_desc + { + reshadefx::type type = {}; + uint32_t id = 0; + std::string name; + std::string unique_name; + std::string texture_name; + std::vector annotations; + bool srgb = false; + }; + + /// + /// Describes the binding of a object. + /// + struct sampler_binding : sampler_desc + { + uint32_t binding = 0; + }; + + /// + /// Describes the properties of a object. + /// + struct storage_desc + { + uint16_t level = 0; + }; + + /// + /// Describes a texture storage object defined in effect code. + /// + struct storage : storage_desc + { + reshadefx::type type = {}; + uint32_t id = 0; + std::string name; + std::string unique_name; + std::string texture_name; + }; + + /// + /// Describes the binding of a object. + /// + struct storage_binding : storage_desc + { + uint32_t binding = 0; + std::string texture_name; + }; + + /// + /// Describes a uniform variable defined in effect code. + /// + struct uniform + { + reshadefx::type type = {}; + std::string name; + uint32_t size = 0; + uint32_t offset = 0; + std::vector annotations; + bool has_initializer_value = false; + reshadefx::constant initializer_value = {}; + }; + + /// + /// Type of a shader entry point. + /// + enum class shader_type + { + unknown, + vertex, + pixel, + compute + }; + + /// + /// Describes a function defined in effect code. + /// + struct function + { + reshadefx::type return_type = {}; + uint32_t id = 0; + std::string name; + std::string unique_name; + std::string return_semantic; + std::vector parameter_list; + shader_type type = shader_type::unknown; + int num_threads[3] = {}; + std::vector referenced_samplers; + std::vector referenced_storages; + std::vector referenced_functions; + }; + + /// + /// Color or alpha blending operations. + /// + enum class blend_op : uint8_t { add = 1, subtract, @@ -84,9 +264,9 @@ namespace reshadefx }; /// - /// Specifies blend factors, which modulate values between the pixel shader output and render target. + /// Blend factors in color or alpha blending operations, which modulate values between the pixel shader output and render target. /// - enum class pass_blend_factor : uint8_t + enum class blend_factor : uint8_t { zero = 0, one = 1, @@ -101,9 +281,9 @@ namespace reshadefx }; /// - /// Specifies the stencil operations that can be performed during depth-stencil testing. + /// Stencil operations that can be performed during depth-stencil testing. /// - enum class pass_stencil_op : uint8_t + enum class stencil_op : uint8_t { zero = 0, keep, @@ -116,9 +296,9 @@ namespace reshadefx }; /// - /// Specifies comparison options for depth-stencil testing. + /// Comparison operations for depth-stencil testing. /// - enum class pass_stencil_func : uint8_t + enum class stencil_func : uint8_t { never, less, @@ -143,205 +323,70 @@ namespace reshadefx }; /// - /// A struct type defined in the effect code. + /// Describes a render pass with all its state info. /// - struct struct_info - { - std::string name; - std::string unique_name; - std::vector member_list; - uint32_t definition = 0; - }; - - /// - /// A struct field defined in the effect code. - /// - struct struct_member_info - { - reshadefx::type type; - std::string name; - std::string semantic; - reshadefx::location location; - uint32_t definition = 0; - }; - - /// - /// An annotation attached to a variable. - /// - struct annotation - { - reshadefx::type type; - std::string name; - reshadefx::constant value; - }; - - /// - /// A texture defined in the effect code. - /// - struct texture_info - { - uint32_t id = 0; - uint32_t binding = 0; - std::string name; - std::string semantic; - std::string unique_name; - std::vector annotations; - texture_type type = texture_type::texture_2d; - uint32_t width = 1; - uint32_t height = 1; - uint16_t depth = 1; - uint16_t levels = 1; - texture_format format = texture_format::rgba8; - bool render_target = false; - bool storage_access = false; - }; - - /// - /// A texture sampler defined in the effect code. - /// - struct sampler_info - { - uint32_t id = 0; - uint32_t binding = 0; - uint32_t texture_binding = 0; - std::string name; - reshadefx::type type; - std::string unique_name; - std::string texture_name; - std::vector annotations; - filter_mode filter = filter_mode::min_mag_mip_linear; - texture_address_mode address_u = texture_address_mode::clamp; - texture_address_mode address_v = texture_address_mode::clamp; - texture_address_mode address_w = texture_address_mode::clamp; - float min_lod = -3.402823466e+38f; - float max_lod = +3.402823466e+38f; // FLT_MAX - float lod_bias = 0.0f; - uint8_t srgb = false; - }; - - /// - /// A texture storage object defined in the effect code. - /// - struct storage_info - { - uint32_t id = 0; - uint32_t binding = 0; - std::string name; - reshadefx::type type; - std::string unique_name; - std::string texture_name; - uint16_t level = 0; - }; - - /// - /// An uniform variable defined in the effect code. - /// - struct uniform_info - { - std::string name; - reshadefx::type type; - uint32_t size = 0; - uint32_t offset = 0; - std::vector annotations; - bool has_initializer_value = false; - reshadefx::constant initializer_value; - }; - - /// - /// Type of a shader entry point. - /// - enum class shader_type - { - vs, - ps, - cs, - }; - - /// - /// A shader entry point function. - /// - struct entry_point - { - std::string name; - shader_type type; - }; - - /// - /// A function defined in the effect code. - /// - struct function_info - { - uint32_t definition; - std::string name; - std::string unique_name; - reshadefx::type return_type; - std::string return_semantic; - std::vector parameter_list; - std::unordered_set referenced_samplers; - std::unordered_set referenced_storages; - }; - - /// - /// A render pass with all its state info. - /// - struct pass_info + struct pass { std::string name; std::string render_target_names[8] = {}; std::string vs_entry_point; std::string ps_entry_point; std::string cs_entry_point; - uint8_t generate_mipmaps = true; - uint8_t clear_render_targets = false; - uint8_t srgb_write_enable = false; - uint8_t blend_enable[8] = { false, false, false, false, false, false, false, false }; - uint8_t stencil_enable = false; - uint8_t color_write_mask[8] = { 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF }; + bool generate_mipmaps = true; + bool clear_render_targets = false; + bool blend_enable[8] = { false, false, false, false, false, false, false, false }; + blend_factor source_color_blend_factor[8] = { blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one }; + blend_factor dest_color_blend_factor[8] = { blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero }; + blend_op color_blend_op[8] = { blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add }; + blend_factor source_alpha_blend_factor[8] = { blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one, blend_factor::one }; + blend_factor dest_alpha_blend_factor[8] = { blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero, blend_factor::zero }; + blend_op alpha_blend_op[8] = { blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add, blend_op::add }; + bool srgb_write_enable = false; + uint8_t render_target_write_mask[8] = { 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF }; + bool stencil_enable = false; uint8_t stencil_read_mask = 0xFF; uint8_t stencil_write_mask = 0xFF; - pass_blend_op blend_op[8] = { pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add }; - pass_blend_op blend_op_alpha[8] = { pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add, pass_blend_op::add }; - pass_blend_factor src_blend[8] = { pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one }; - pass_blend_factor dest_blend[8] = { pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero }; - pass_blend_factor src_blend_alpha[8] = { pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one, pass_blend_factor::one }; - pass_blend_factor dest_blend_alpha[8] = { pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero, pass_blend_factor::zero }; - pass_stencil_func stencil_comparison_func = pass_stencil_func::always; - uint32_t stencil_reference_value = 0; - pass_stencil_op stencil_op_pass = pass_stencil_op::keep; - pass_stencil_op stencil_op_fail = pass_stencil_op::keep; - pass_stencil_op stencil_op_depth_fail = pass_stencil_op::keep; - uint32_t num_vertices = 3; + stencil_func stencil_comparison_func = stencil_func::always; + stencil_op stencil_pass_op = stencil_op::keep; + stencil_op stencil_fail_op = stencil_op::keep; + stencil_op stencil_depth_fail_op = stencil_op::keep; primitive_topology topology = primitive_topology::triangle_list; + uint32_t stencil_reference_value = 0; + uint32_t num_vertices = 3; uint32_t viewport_width = 0; uint32_t viewport_height = 0; uint32_t viewport_dispatch_z = 1; - std::vector samplers; - std::vector storages; + + // Bindings specific for the code generation target (in case of combined texture and sampler, 'texture_bindings' and 'sampler_bindings' will be the same size and point to the same bindings, otherwise they are independent) + std::vector texture_bindings; + std::vector sampler_bindings; + std::vector storage_bindings; }; /// /// A collection of passes that make up an effect. /// - struct technique_info + struct technique { std::string name; - std::vector passes; + std::vector passes; std::vector annotations; }; /// /// In-memory representation of an effect file. /// - struct module + struct effect_module { - std::vector code; + std::vector> entry_points; - std::vector entry_points; - std::vector textures; - std::vector samplers; - std::vector storages; - std::vector uniforms, spec_constants; - std::vector techniques; + std::vector textures; + std::vector samplers; + std::vector storages; + + std::vector uniforms; + std::vector spec_constants; + std::vector techniques; uint32_t total_uniform_size = 0; uint32_t num_texture_bindings = 0; diff --git a/dep/reshadefx/include/effect_parser.hpp b/dep/reshadefx/include/effect_parser.hpp index f073b4790..159a4a0d4 100644 --- a/dep/reshadefx/include/effect_parser.hpp +++ b/dep/reshadefx/include/effect_parser.hpp @@ -58,14 +58,14 @@ namespace reshadefx bool peek_multary_op(unsigned int &precedence) const; bool accept_assignment_op(); - void parse_top(bool &parse_success); + bool parse_top(bool &parse_success); bool parse_struct(); - bool parse_function(type type, std::string name); + bool parse_function(type type, std::string name, shader_type stype, int num_threads[3]); bool parse_variable(type type, std::string name, bool global = false); bool parse_technique(); - bool parse_technique_pass(pass_info &info); + bool parse_technique_pass(pass &info); bool parse_type(type &type); - bool parse_array_size(type &type); + bool parse_array_length(type &type); bool parse_expression(expression &expression); bool parse_expression_unary(expression &expression); bool parse_expression_multary(expression &expression, unsigned int precedence = 0); @@ -74,15 +74,16 @@ namespace reshadefx bool parse_statement(bool scoped); bool parse_statement_block(bool scoped); - codegen *_codegen = nullptr; std::string _errors; - token _token, _token_next, _token_backup; std::unique_ptr _lexer; - size_t _lexer_backup_offset = 0; + class codegen *_codegen = nullptr; + + token _token; + token _token_next; + token _token_backup; std::vector _loop_break_target_stack; std::vector _loop_continue_target_stack; - reshadefx::function_info *_current_function = nullptr; }; } diff --git a/dep/reshadefx/include/effect_preprocessor.hpp b/dep/reshadefx/include/effect_preprocessor.hpp index aa416d4e8..7a4d57651 100644 --- a/dep/reshadefx/include/effect_preprocessor.hpp +++ b/dep/reshadefx/include/effect_preprocessor.hpp @@ -154,17 +154,16 @@ namespace reshadefx void expand_macro(const std::string &name, const macro ¯o, const std::vector &arguments); void create_macro_replacement_list(macro ¯o); - bool _success = true; include_file_exists_callback _file_exists_cb; include_read_file_callback _read_file_cb; std::string _output, _errors; - std::string _current_token_raw_data; - reshadefx::token _token; - location _output_location; std::vector _input_stack; size_t _next_input_index = 0; size_t _current_input_index = 0; + reshadefx::token _token; + std::string _current_token_raw_data; + reshadefx::location _output_location; std::vector _if_stack; diff --git a/dep/reshadefx/include/effect_symbol_table.hpp b/dep/reshadefx/include/effect_symbol_table.hpp index 0dfcf2793..983ae8785 100644 --- a/dep/reshadefx/include/effect_symbol_table.hpp +++ b/dep/reshadefx/include/effect_symbol_table.hpp @@ -42,7 +42,7 @@ namespace reshadefx uint32_t id = 0; reshadefx::type type = {}; reshadefx::constant constant = {}; - const reshadefx::function_info *function = nullptr; + const reshadefx::function *function = nullptr; }; struct scoped_symbol : symbol { diff --git a/dep/reshadefx/include/effect_token.hpp b/dep/reshadefx/include/effect_token.hpp index e4bb633c9..4b8e7b244 100644 --- a/dep/reshadefx/include/effect_token.hpp +++ b/dep/reshadefx/include/effect_token.hpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace reshadefx { @@ -246,7 +247,7 @@ namespace reshadefx }; std::string literal_as_string; - inline operator tokenid() const { return id; } + operator tokenid() const { return id; } static std::string id_to_name(tokenid id); }; diff --git a/dep/reshadefx/src/effect_codegen_glsl.cpp b/dep/reshadefx/src/effect_codegen_glsl.cpp index 3b0caaa08..9eab3ca9a 100644 --- a/dep/reshadefx/src/effect_codegen_glsl.cpp +++ b/dep/reshadefx/src/effect_codegen_glsl.cpp @@ -5,22 +5,43 @@ #include "effect_parser.hpp" #include "effect_codegen.hpp" -#include // signbit, isinf, isnan -#include // snprintf +#include // std::isinf, std::isnan, std::signbit #include -#include // std::find_if, std::max +#include // std::memcmp +#include // std::from_chars, std::to_chars +#include // std::find, std::find_if, std::max #include #include +#include #include using namespace reshadefx; namespace { + +inline char to_digit(unsigned int value) +{ + assert(value < 10); + return '0' + static_cast(value); +} + +inline uint32_t align_up(uint32_t size, uint32_t alignment) +{ + alignment -= 1; + return ((size + alignment) & ~alignment); +} + class codegen_glsl final : public codegen { public: - codegen_glsl(bool gles, bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types, bool flip_vert_y) - : _gles(gles), _debug_info(debug_info), _vulkan_semantics(vulkan_semantics), _uniforms_to_spec_constants(uniforms_to_spec_constants), _enable_16bit_types(enable_16bit_types), _flip_vert_y(flip_vert_y) + codegen_glsl(unsigned version, bool gles, bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types, bool flip_vert_y) : + _glsl_version(version), + _gles(gles), + _debug_info(debug_info), + _vulkan_semantics(vulkan_semantics), + _uniforms_to_spec_constants(uniforms_to_spec_constants), + _enable_16bit_types(enable_16bit_types), + _flip_vert_y(flip_vert_y) { // Create default block and reserve a memory block to avoid frequent reallocations std::string &block = _blocks.emplace(0, std::string()).first->second; @@ -40,36 +61,42 @@ private: expression, }; - std::string _ubo_block; - std::string _compute_block; - std::unordered_map _names; - std::unordered_map _blocks; + unsigned _glsl_version = 0; bool _gles = false; bool _debug_info = false; bool _vulkan_semantics = false; bool _uniforms_to_spec_constants = false; bool _enable_16bit_types = false; bool _flip_vert_y = false; - bool _enable_control_flow_attributes = false; + + std::unordered_map _names; + std::unordered_map _blocks; + std::string _ubo_block; + std::string _compute_block; + std::string _current_function_declaration; + std::unordered_map _remapped_sampler_variables; std::unordered_map _semantic_to_location; + std::vector> _constant_lookup; // Only write compatibility intrinsics to result if they are actually in use bool _uses_fmod = false; bool _uses_componentwise_or = false; bool _uses_componentwise_and = false; bool _uses_componentwise_cond = false; + bool _uses_control_flow_attributes = false; - void write_result(module &module) override + std::string finalize_preamble() const { - module = std::move(_module); + std::string preamble = "#version " + std::to_string(_glsl_version) + (_gles ? " es\n" : ((_glsl_version >= 330) ? " core\n" : "\n")); - std::string preamble; + if (_gles) + preamble += "precision highp float;\nprecision highp int;\nprecision highp sampler2D;\n"; if (_enable_16bit_types) // GL_NV_gpu_shader5, GL_AMD_gpu_shader_half_float or GL_EXT_shader_16bit_storage preamble += "#extension GL_NV_gpu_shader5 : require\n"; - if (_enable_control_flow_attributes) + if (_uses_control_flow_attributes) preamble += "#extension GL_EXT_control_flow_attributes : enable\n"; if (_uses_fmod) @@ -95,6 +122,9 @@ private: "vec2 compCond(bvec2 cond, vec2 a, vec2 b) { return vec2(cond.x ? a.x : b.x, cond.y ? a.y : b.y); }\n" "vec3 compCond(bvec3 cond, vec3 a, vec3 b) { return vec3(cond.x ? a.x : b.x, cond.y ? a.y : b.y, cond.z ? a.z : b.z); }\n" "vec4 compCond(bvec4 cond, vec4 a, vec4 b) { return vec4(cond.x ? a.x : b.x, cond.y ? a.y : b.y, cond.z ? a.z : b.z, cond.w ? a.w : b.w); }\n" + "bvec2 compCond(bvec2 cond, bvec2 a, bvec2 b) { return bvec2(cond.x ? a.x : b.x, cond.y ? a.y : b.y); }\n" + "bvec3 compCond(bvec3 cond, bvec3 a, bvec3 b) { return bvec3(cond.x ? a.x : b.x, cond.y ? a.y : b.y, cond.z ? a.z : b.z); }\n" + "bvec4 compCond(bvec4 cond, bvec4 a, bvec4 b) { return bvec4(cond.x ? a.x : b.x, cond.y ? a.y : b.y, cond.z ? a.z : b.z, cond.w ? a.w : b.w); }\n" "ivec2 compCond(bvec2 cond, ivec2 a, ivec2 b) { return ivec2(cond.x ? a.x : b.x, cond.y ? a.y : b.y); }\n" "ivec3 compCond(bvec3 cond, ivec3 a, ivec3 b) { return ivec3(cond.x ? a.x : b.x, cond.y ? a.y : b.y, cond.z ? a.z : b.z); }\n" "ivec4 compCond(bvec4 cond, ivec4 a, ivec4 b) { return ivec4(cond.x ? a.x : b.x, cond.y ? a.y : b.y, cond.z ? a.z : b.z, cond.w ? a.w : b.w); }\n" @@ -114,10 +144,120 @@ private: } } - module.code.assign(preamble.begin(), preamble.end()); + return preamble; + } - const std::string &main_block = _blocks.at(0); - module.code.insert(module.code.end(), main_block.begin(), main_block.end()); + std::string finalize_code() const override + { + std::string code = finalize_preamble(); + + // Add sampler definitions + for (const sampler &info : _module.samplers) + code += _blocks.at(info.id); + + // Add storage definitions + for (const storage &info : _module.storages) + code += _blocks.at(info.id); + + // Add global definitions (struct types, global variables, ...) + code += _blocks.at(0); + + // Add function definitions + for (const std::unique_ptr &func : _functions) + { + const bool is_entry_point = func->unique_name[0] == 'E'; + if (is_entry_point) + code += "#ifdef " + func->unique_name + '\n'; + + code += _blocks.at(func->id); + + if (is_entry_point) + code += "#endif\n"; + } + + return code; + } + std::string finalize_code_for_entry_point(const std::string &entry_point_name) const override + { + const auto entry_point_it = std::find_if(_functions.begin(), _functions.end(), + [&entry_point_name](const std::unique_ptr &func) { + return func->unique_name == entry_point_name; + }); + if (entry_point_it == _functions.end()) + return {}; + const function &entry_point = *entry_point_it->get(); + + std::string code = finalize_preamble(); + + if (entry_point.type != shader_type::pixel) + code += + // OpenGL does not allow using 'discard' in the vertex shader profile + "#define discard\n" + // 'dFdx', 'dFdx' and 'fwidth' too are only available in fragment shaders + "#define dFdx(x) x\n" + "#define dFdy(y) y\n" + "#define fwidth(p) p\n"; + + if (entry_point.type != shader_type::compute) + code += + // OpenGL does not allow using 'shared' in vertex/fragment shader profile + "#define shared\n" + "#define atomicAdd(a, b) a\n" + "#define atomicAnd(a, b) a\n" + "#define atomicOr(a, b) a\n" + "#define atomicXor(a, b) a\n" + "#define atomicMin(a, b) a\n" + "#define atomicMax(a, b) a\n" + "#define atomicExchange(a, b) a\n" + "#define atomicCompSwap(a, b, c) a\n" + // Barrier intrinsics are only available in compute shaders + "#define barrier()\n" + "#define memoryBarrier()\n" + "#define groupMemoryBarrier()\n"; + + const auto replace_binding = + [](std::string &code, uint32_t binding) { + const size_t beg = code.find("layout(binding = ") + 17; + const size_t end = code.find_first_of("),", beg); + code.replace(beg, end - beg, std::to_string(binding)); + }; + + // Add referenced sampler definitions + for (uint32_t binding = 0; binding < entry_point.referenced_samplers.size(); ++binding) + { + if (entry_point.referenced_samplers[binding] == 0) + continue; + + std::string block_code = _blocks.at(entry_point.referenced_samplers[binding]); + replace_binding(block_code, binding); + code += block_code; + } + + // Add referenced storage definitions + for (uint32_t binding = 0; binding < entry_point.referenced_storages.size(); ++binding) + { + if (entry_point.referenced_storages[binding] == 0) + continue; + + std::string block_code = _blocks.at(entry_point.referenced_storages[binding]); + replace_binding(block_code, binding); + code += block_code; + } + + // Add global definitions (struct types, global variables, ...) + code += _blocks.at(0); + + // Add referenced function definitions + for (const std::unique_ptr &func : _functions) + { + if (func->id != entry_point.id && + std::find(entry_point.referenced_functions.begin(), entry_point.referenced_functions.end(), func->id) == entry_point.referenced_functions.end()) + continue; + + code += _blocks.at(func->id); + } + + return code; } template @@ -161,9 +301,9 @@ private: break; case type::t_bool: if (type.cols > 1) - s += "mat" + std::to_string(type.rows) + 'x' + std::to_string(type.cols); + s += "mat", s += to_digit(type.rows), s += 'x', s += to_digit(type.cols); else if (type.rows > 1) - s += "bvec" + std::to_string(type.rows); + s += "bvec", s += to_digit(type.rows); else s += "bool"; break; @@ -172,7 +312,7 @@ private: { assert(type.cols == 1); if (type.rows > 1) - s += "i16vec" + std::to_string(type.rows); + s += "i16vec", s += to_digit(type.rows); else s += "int16_t"; break; @@ -182,9 +322,9 @@ private: [[fallthrough]]; case type::t_int: if (type.cols > 1) - s += "mat" + std::to_string(type.rows) + 'x' + std::to_string(type.cols); + s += "mat", s += to_digit(type.rows), s += 'x', s += to_digit(type.cols); else if (type.rows > 1) - s += "ivec" + std::to_string(type.rows); + s += "ivec", s += to_digit(type.rows); else s += "int"; break; @@ -193,7 +333,7 @@ private: { assert(type.cols == 1); if (type.rows > 1) - s += "u16vec" + std::to_string(type.rows); + s += "u16vec", s += to_digit(type.rows); else s += "uint16_t"; break; @@ -203,9 +343,9 @@ private: [[fallthrough]]; case type::t_uint: if (type.cols > 1) - s += "mat" + std::to_string(type.rows) + 'x' + std::to_string(type.cols); + s += "mat", s += to_digit(type.rows), s += 'x', s += to_digit(type.cols); else if (type.rows > 1) - s += "uvec" + std::to_string(type.rows); + s += "uvec", s += to_digit(type.rows); else s += "uint"; break; @@ -214,7 +354,7 @@ private: { assert(type.cols == 1); if (type.rows > 1) - s += "f16vec" + std::to_string(type.rows); + s += "f16vec", s += to_digit(type.rows); else s += "float16_t"; break; @@ -224,14 +364,14 @@ private: [[fallthrough]]; case type::t_float: if (type.cols > 1) - s += "mat" + std::to_string(type.rows) + 'x' + std::to_string(type.cols); + s += "mat", s += to_digit(type.rows), s += 'x', s += to_digit(type.cols); else if (type.rows > 1) - s += "vec" + std::to_string(type.rows); + s += "vec", s += to_digit(type.rows); else s += "float"; break; case type::t_struct: - s += id_to_name(type.definition); + s += id_to_name(type.struct_definition); break; case type::t_sampler1d_int: s += "isampler1D"; @@ -309,37 +449,40 @@ private: assert(false); } } - void write_constant(std::string &s, const type &type, const constant &data) const + void write_constant(std::string &s, const type &data_type, const constant &data) const { - if (type.is_array()) + if (data_type.is_array()) { - auto elem_type = type; + assert(data_type.is_bounded_array()); + + type elem_type = data_type; elem_type.array_length = 0; write_type(s, elem_type); - s += '[' + std::to_string(type.array_length) + "]("; + s += '[' + std::to_string(data_type.array_length) + "]("; - for (int i = 0; i < type.array_length; ++i) + for (unsigned int a = 0; a < data_type.array_length; ++a) { - write_constant(s, elem_type, i < static_cast(data.array_data.size()) ? data.array_data[i] : constant()); - - if (i < type.array_length - 1) - s += ", "; + write_constant(s, elem_type, a < static_cast(data.array_data.size()) ? data.array_data[a] : constant {}); + s += ", "; } + // Remove trailing ", " + s.erase(s.size() - 2); + s += ')'; return; } // There can only be numeric constants - assert(type.is_numeric()); + assert(data_type.is_numeric()); - if (!type.is_scalar()) - write_type(s, type), s += '('; + if (!data_type.is_scalar()) + write_type(s, data_type), s += '('; - for (unsigned int i = 0, components = type.components(); i < components; ++i) + for (unsigned int i = 0; i < data_type.components(); ++i) { - switch (type.base) + switch (data_type.base) { case type::t_bool: s += data.as_uint[i] ? "true" : "false"; @@ -362,22 +505,34 @@ private: s += std::signbit(data.as_float[i]) ? "1.0/0.0/*inf*/" : "-1.0/0.0/*-inf*/"; break; } + { +#ifdef _MSC_VER + char temp[64]; + const std::to_chars_result res = std::to_chars(temp, temp + sizeof(temp), data.as_float[i], std::chars_format::scientific, 8); + if (res.ec == std::errc()) + s.append(temp, res.ptr); + else + assert(false); +#else std::ostringstream ss; ss.imbue(std::locale::classic()); - ss << data.as_float[i]; + ss << std::scientific << data.as_float[i]; s += ss.str(); +#endif } break; default: assert(false); } - if (i < components - 1) - s += ", "; + s += ", "; } - if (!type.is_scalar()) + // Remove trailing ", " + s.erase(s.size() - 2); + + if (!data_type.is_scalar()) s += ')'; } void write_location(std::string &s, const location &loc) const @@ -404,7 +559,7 @@ private: s += "r32i"; break; case texture_format::r32u: - s += "r32u"; + s += "r32ui"; break; case texture_format::r32f: s += "r32f"; @@ -464,21 +619,17 @@ private: if constexpr (naming_type != naming::reserved) name = escape_name(std::move(name)); if constexpr (naming_type == naming::general) - if (std::find_if(_names.begin(), _names.end(), [&name](const auto &it) { return it.second == name; }) != _names.end()) + if (std::find_if(_names.begin(), _names.end(), + [&name](const auto &names_it) { return names_it.second == name; }) != _names.end()) name += '_' + std::to_string(id); // Append a numbered suffix if the name already exists _names[id] = std::move(name); } - uint32_t semantic_to_location(const std::string &semantic, uint32_t max_array_length = 1) + uint32_t semantic_to_location(const std::string &semantic, uint32_t max_attributes = 1) { - if (semantic.compare(0, 5, "COLOR") == 0) - return std::strtoul(semantic.c_str() + 5, nullptr, 10); - if (semantic.compare(0, 9, "SV_TARGET") == 0) - return std::strtoul(semantic.c_str() + 9, nullptr, 10); - - if (const auto it = _semantic_to_location.find(semantic); - it != _semantic_to_location.end()) - return it->second; + if (const auto location_it = _semantic_to_location.find(semantic); + location_it != _semantic_to_location.end()) + return location_it->second; // Extract the semantic index from the semantic name (e.g. 2 for "TEXCOORD2") size_t digit_index = semantic.size() - 1; @@ -486,13 +637,18 @@ private: digit_index--; digit_index++; - const uint32_t semantic_digit = std::strtoul(semantic.c_str() + digit_index, nullptr, 10); const std::string semantic_base = semantic.substr(0, digit_index); + uint32_t semantic_digit = 0; + std::from_chars(semantic.c_str() + digit_index, semantic.c_str() + semantic.size(), semantic_digit); + + if (semantic_base == "COLOR" || semantic_base == "SV_TARGET") + return semantic_digit; + uint32_t location = static_cast(_semantic_to_location.size()); // Now create adjoining location indices for all possible semantic indices belonging to this semantic name - for (uint32_t a = 0; a < semantic_digit + max_array_length; ++a) + for (uint32_t a = 0; a < semantic_digit + max_attributes; ++a) { const auto insert = _semantic_to_location.emplace(semantic_base + std::to_string(a), location + a); if (!insert.second) @@ -543,14 +699,14 @@ private: // Remove duplicated underscore symbols from name which can occur due to namespaces but are not allowed in GLSL for (size_t pos = 0; (pos = name.find("__", pos)) != std::string::npos;) - name.replace(pos, 2, "_"); + name.replace(pos, 2, "_x"); return name; } std::string semantic_to_builtin(std::string name, const std::string &semantic, shader_type stype) const { if (semantic == "SV_POSITION") - return stype == shader_type::ps ? "gl_FragCoord" : "gl_Position"; + return stype == shader_type::pixel ? "gl_FragCoord" : "gl_Position"; if (semantic == "SV_POINTSIZE") return "gl_PointSize"; if (semantic == "SV_DEPTH") @@ -582,10 +738,10 @@ private: block.insert(block.begin(), '\t'); } - id define_struct(const location &loc, struct_info &info) override + id define_struct(const location &loc, struct_type &info) override { - info.definition = make_id(); - define_name(info.definition, info.unique_name); + const id res = info.id = make_id(); + define_name(res, info.unique_name); _structs.push_back(info); @@ -593,9 +749,9 @@ private: write_location(code, loc); - code += "struct " + id_to_name(info.definition) + "\n{\n"; + code += "struct " + id_to_name(res) + "\n{\n"; - for (const struct_member_info &member : info.member_list) + for (const member_type &member : info.member_list) { code += '\t'; write_type(code, member.type); // GLSL does not allow interpolation attributes on struct members @@ -611,70 +767,64 @@ private: code += "};\n"; - return info.definition; + return res; } - id define_texture(const location &, texture_info &info) override + id define_texture(const location &, texture &info) override { - info.id = make_id(); - info.binding = ~0u; + const id res = info.id = make_id(); _module.textures.push_back(info); - return info.id; + return res; } - id define_sampler(const location &loc, const texture_info &, sampler_info &info) override + id define_sampler(const location &loc, const texture &, sampler &info) override { - info.id = make_id(); - info.binding = _module.num_sampler_bindings++; - info.texture_binding = ~0u; // Unset texture bindings + const id res = info.id = create_block(); + define_name(res, info.unique_name); - define_name(info.id, info.unique_name); - - std::string &code = _blocks.at(_current_block); + std::string &code = _blocks.at(res); write_location(code, loc); - code += "layout("; + // Default to a binding index equivalent to the entry in the sampler list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement) + const uint32_t default_binding = static_cast(_module.samplers.size()); + + code += "layout(binding = " + std::to_string(default_binding); if (_vulkan_semantics) - code += "set = 1, "; -#if 0 - code += "binding = " + std::to_string(info.binding); -#else - code += "binding = /*SAMPLER:" + info.unique_name + "*/0"; -#endif + code += ", set = 1"; code += ") uniform "; write_type(code, info.type); - code += ' ' + id_to_name(info.id) + ";\n"; + code += ' ' + id_to_name(res) + ";\n"; _module.samplers.push_back(info); - return info.id; + return res; } - id define_storage(const location &loc, const texture_info &tex_info, storage_info &info) override + id define_storage(const location &loc, const texture &tex_info, storage &info) override { - info.id = make_id(); - info.binding = _module.num_storage_bindings++; + const id res = info.id = create_block(); + define_name(res, info.unique_name); - define_name(info.id, info.unique_name); - - std::string &code = _blocks.at(_current_block); + std::string &code = _blocks.at(res); write_location(code, loc); - code += "layout(binding = " + std::to_string(info.binding) + ", "; + // Default to a binding index equivalent to the entry in the storage list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement) + const uint32_t default_binding = static_cast(_module.storages.size()); + + code += "layout(binding = " + std::to_string(default_binding) + ", "; write_texture_format(code, tex_info.format); code += ") uniform "; write_type(code, info.type); - code += ' ' + id_to_name(info.id) + ";\n"; + code += ' ' + id_to_name(res) + ";\n"; _module.storages.push_back(info); - return info.id; + return res; } - id define_uniform(const location &loc, uniform_info &info) override + id define_uniform(const location &loc, uniform &info) override { const id res = make_id(); - define_name(res, info.name); if (_uniforms_to_spec_constants && info.has_initializer_value) @@ -689,7 +839,8 @@ private: assert(!info.type.has(type::q_static) && !info.type.has(type::q_const)); - code += "const "; + if (!_gles) + code += "const "; write_type(code, info.type); code += ' ' + id_to_name(res) + " = "; if (!info.type.is_scalar()) @@ -708,7 +859,7 @@ private: // according to rules (1), (2), and (3), and rounded up to the base alignment of a four-component vector. // 7. If the member is a row-major matrix with C columns and R rows, the matrix is stored identically to an array of R row vectors with C components each, according to rule (4). // 8. If the member is an array of S row-major matrices with C columns and R rows, the matrix is stored identically to a row of S*R row vectors with C components each, according to rule (4). - uint32_t alignment = (info.type.rows == 3 ? 4 /* (3) */ : info.type.rows /* (2)*/) * 4 /* (1)*/; + uint32_t alignment = (info.type.rows == 3 ? 4 /* (3) */ : info.type.rows /* (2) */) * 4 /* (1) */; info.size = info.type.rows * 4; if (info.type.is_matrix()) @@ -746,11 +897,22 @@ private: } id define_variable(const location &loc, const type &type, std::string name, bool global, id initializer_value) override { + // Constant variables with a constant initializer can just point to the initializer SSA variable, since they cannot be modified anyway, thus saving an unnecessary assignment + if (initializer_value != 0 && type.has(type::q_const) && + std::find_if(_constant_lookup.begin(), _constant_lookup.end(), + [initializer_value](const auto &x) { + return initializer_value == std::get<2>(x); + }) != _constant_lookup.end()) + return initializer_value; + const id res = make_id(); // GLSL does not allow local sampler variables, so try to remap those if (!global && type.is_sampler()) - return (_remapped_sampler_variables[res] = 0), res; + { + _remapped_sampler_variables[res] = 0; + return res; + } if (!name.empty()) define_name(res, name); @@ -762,9 +924,6 @@ private: if (!global) code += '\t'; - if (initializer_value != 0 && (type.has(type::q_const) && !_gles)) - code += "const "; - write_type(code, type); code += ' ' + id_to_name(res); @@ -778,87 +937,98 @@ private: return res; } - id define_function(const location &loc, function_info &info) override + id define_function(const location &loc, function &info) override { - return define_function(loc, info, false); - } + const id res = info.id = make_id(); - id define_function(const location &loc, function_info &info, bool is_entry_point) - { - info.definition = make_id(); - - // Name is used in other places like the "ENTRY_POINT" defines, so escape it here + // Name is used in other places like the entry point defines, so escape it here info.unique_name = escape_name(info.unique_name); + assert(!info.unique_name.empty() && (info.unique_name[0] == 'F' || info.unique_name[0] == 'E')); + const bool is_entry_point = info.unique_name[0] == 'E'; if (!is_entry_point) - define_name(info.definition, info.unique_name); + define_name(res, info.unique_name); else - define_name(info.definition, "main"); + define_name(res, "main"); - std::string &code = _blocks.at(_current_block); + assert(_current_block == 0 && (_current_function_declaration.empty() || is_entry_point)); + std::string &code = _current_function_declaration; write_location(code, loc); write_type(code, info.return_type); - code += ' ' + id_to_name(info.definition) + '('; + code += ' ' + id_to_name(res) + '('; assert(info.parameter_list.empty() || !is_entry_point); - for (size_t i = 0, num_params = info.parameter_list.size(); i < num_params; ++i) + for (member_type ¶m : info.parameter_list) { - auto ¶m = info.parameter_list[i]; - - param.definition = make_id(); - define_name(param.definition, param.name); + param.id = make_id(); + define_name(param.id, param.name); code += '\n'; write_location(code, param.location); code += '\t'; write_type(code, param.type); // GLSL does not allow interpolation attributes on function parameters - code += ' ' + id_to_name(param.definition); + code += ' ' + id_to_name(param.id); if (param.type.is_array()) code += '[' + std::to_string(param.type.array_length) + ']'; - if (i < num_params - 1) - code += ','; + code += ','; } + // Remove trailing comma + if (!info.parameter_list.empty()) + code.pop_back(); + code += ")\n"; - _functions.push_back(std::make_unique(info)); + _functions.push_back(std::make_unique(info)); + _current_function = _functions.back().get(); - return info.definition; + return res; } - void define_entry_point(function_info &func, shader_type stype, int num_threads[3]) override + void define_entry_point(function &func) override { - // Modify entry point name so each thread configuration is made separate - if (stype == shader_type::cs) - func.unique_name = 'E' + func.unique_name + - '_' + std::to_string(num_threads[0]) + - '_' + std::to_string(num_threads[1]) + - '_' + std::to_string(num_threads[2]); + assert(!func.unique_name.empty() && func.unique_name[0] == 'F'); + func.unique_name[0] = 'E'; - if (const auto it = std::find_if(_module.entry_points.begin(), _module.entry_points.end(), - [&func](const auto &ep) { return ep.name == func.unique_name; }); - it != _module.entry_points.end()) + // Modify entry point name so each thread configuration is made separate + if (func.type == shader_type::compute) + func.unique_name += + '_' + std::to_string(func.num_threads[0]) + + '_' + std::to_string(func.num_threads[1]) + + '_' + std::to_string(func.num_threads[2]); + + if (std::find_if(_module.entry_points.begin(), _module.entry_points.end(), + [&func](const std::pair &entry_point) { + return entry_point.first == func.unique_name; + }) != _module.entry_points.end()) return; - _module.entry_points.push_back({ func.unique_name, stype }); + _module.entry_points.emplace_back(func.unique_name, func.type); - _blocks.at(0) += "#ifdef ENTRY_POINT_" + func.unique_name + '\n'; - if (stype == shader_type::cs) - _blocks.at(0) += "layout(local_size_x = " + std::to_string(num_threads[0]) + - ", local_size_y = " + std::to_string(num_threads[1]) + - ", local_size_z = " + std::to_string(num_threads[2]) + ") in;\n"; + assert(_current_function_declaration.empty()); + if (func.type == shader_type::compute) + _current_function_declaration += + "layout(local_size_x = " + std::to_string(func.num_threads[0]) + + ", local_size_y = " + std::to_string(func.num_threads[1]) + + ", local_size_z = " + std::to_string(func.num_threads[2]) + ") in;\n"; - function_info entry_point; + // Generate the glue entry point function + function entry_point = func; + entry_point.referenced_functions.push_back(func.id); + + // Change function signature to 'void main()' entry_point.return_type = { type::t_void }; + entry_point.return_semantic.clear(); + entry_point.parameter_list.clear(); std::unordered_map semantic_to_varying_variable; - const auto create_varying_variable = [this, stype, &semantic_to_varying_variable](type type, unsigned int extra_qualifiers, const std::string &name, const std::string &semantic) { + const auto create_varying_variable = [this, stype = func.type, &semantic_to_varying_variable](type type, unsigned int extra_qualifiers, const std::string &name, const std::string &semantic) { // Skip built in variables if (!semantic_to_builtin(std::string(), semantic, stype).empty()) return; @@ -875,29 +1045,24 @@ private: if (type.is_boolean()) type.base = type::t_float; - std::string &code = _blocks.at(_current_block); + const uint32_t location = semantic_to_location(semantic, std::max(1u, type.array_length)); - const int array_length = std::max(1, type.array_length); - const uint32_t location = semantic_to_location(semantic, array_length); - - for (int a = 0; a < array_length; ++a) + for (unsigned int a = 0; a < std::max(1u, type.array_length); ++a) { - code += "layout(location = " + std::to_string(location + a) + ") "; - write_type(code, type); - code += ' '; - code += escape_name(type.is_array() ? - name + '_' + std::to_string(a) : - name); - code += ";\n"; + _current_function_declaration += "layout(location = " + std::to_string(location + a) + ") "; + write_type(_current_function_declaration, type); + _current_function_declaration += ' '; + _current_function_declaration += escape_name(type.is_array() ? name + '_' + std::to_string(a) : name); + _current_function_declaration += ";\n"; } }; - // Translate function parameters to input/output variables + // Translate return value to output variable if (func.return_type.is_struct()) { - const struct_info &definition = get_struct(func.return_type.definition); + const struct_type &definition = get_struct(func.return_type.struct_definition); - for (const struct_member_info &member : definition.member_list) + for (const member_type &member : definition.member_list) create_varying_variable(member.type, type::q_out, "_return_" + member.name, member.semantic); } else if (!func.return_type.is_void()) @@ -905,91 +1070,96 @@ private: create_varying_variable(func.return_type, type::q_out, "_return", func.return_semantic); } - const auto num_params = func.parameter_list.size(); - for (size_t i = 0; i < num_params; ++i) + // Translate function parameters to input/output variables + for (const member_type ¶m : func.parameter_list) { - type param_type = func.parameter_list[i].type; + type param_type = param.type; param_type.qualifiers &= ~type::q_inout; // Create separate input/output variables for "inout" parameters (since "inout" is not valid on those in GLSL) - if (func.parameter_list[i].type.has(type::q_in)) + if (param.type.has(type::q_in)) { // Flatten structure parameters if (param_type.is_struct()) { - const struct_info &definition = get_struct(param_type.definition); + const struct_type &definition = get_struct(param_type.struct_definition); - for (int a = 0, array_length = std::max(1, param_type.array_length); a < array_length; a++) - for (const struct_member_info &member : definition.member_list) - create_varying_variable(member.type, param_type.qualifiers | type::q_in, "_in_param" + std::to_string(i) + '_' + std::to_string(a) + '_' + member.name, member.semantic); + for (unsigned int a = 0, array_length = std::max(1u, param_type.array_length); a < array_length; a++) + { + for (const member_type &member : definition.member_list) + create_varying_variable(member.type, param_type.qualifiers | type::q_in, "_in_" + id_to_name(param.id) + '_' + std::to_string(a) + '_' + member.name, member.semantic); + } } else { - create_varying_variable(param_type, type::q_in, "_in_param" + std::to_string(i), func.parameter_list[i].semantic); + create_varying_variable(param_type, type::q_in, "_in_" + id_to_name(param.id), param.semantic); } } - if (func.parameter_list[i].type.has(type::q_out)) + if (param.type.has(type::q_out)) { if (param_type.is_struct()) { - const struct_info &definition = get_struct(param_type.definition); + const struct_type &definition = get_struct(param_type.struct_definition); - for (int a = 0, array_length = std::max(1, param_type.array_length); a < array_length; a++) - for (const struct_member_info &member : definition.member_list) - create_varying_variable(member.type, param_type.qualifiers | type::q_out, "_out_param" + std::to_string(i) + '_' + std::to_string(a) + '_' + member.name, member.semantic); + for (unsigned int a = 0, array_length = std::max(1u, param_type.array_length); a < array_length; a++) + { + for (const member_type &member : definition.member_list) + create_varying_variable(member.type, param_type.qualifiers | type::q_out, "_out_" + id_to_name(param.id) + '_' + std::to_string(a) + '_' + member.name, member.semantic); + } } else { - create_varying_variable(param_type, type::q_out, "_out_param" + std::to_string(i), func.parameter_list[i].semantic); + create_varying_variable(param_type, type::q_out, "_out_" + id_to_name(param.id), param.semantic); } } } - // Translate return value to output variable - define_function({}, entry_point, true); + define_function({}, entry_point); enter_block(create_block()); std::string &code = _blocks.at(_current_block); // Handle input parameters - for (size_t i = 0; i < num_params; ++i) + for (const member_type ¶m : func.parameter_list) { - const type ¶m_type = func.parameter_list[i].type; - - if (param_type.has(type::q_in)) + if (param.type.has(type::q_in)) { + const std::string param_name = id_to_name(param.id); + // Create local array element variables - for (int a = 0, array_length = std::max(1, param_type.array_length); a < array_length; a++) + for (unsigned int a = 0, array_length = std::max(1u, param.type.array_length); a < array_length; a++) { - if (param_type.is_struct()) + if (param.type.is_struct()) { // Build struct from separate member input variables code += '\t'; - write_type(code, param_type); + write_type(code, param.type); code += ' '; - code += escape_name(param_type.is_array() ? - "_in_param" + std::to_string(i) + '_' + std::to_string(a) : - "_in_param" + std::to_string(i)); + code += escape_name(param.type.is_array() ? "_in_" + param_name + '_' + std::to_string(a) : "_in_" + param_name); code += " = "; - write_type(code, param_type); + write_type(code, param.type); code += '('; - const struct_info &definition = get_struct(param_type.definition); + const struct_type &struct_definition = get_struct(param.type.struct_definition); - for (const struct_member_info &member : definition.member_list) + for (const member_type &member : struct_definition.member_list) { - std::string in_param_name = "_in_param" + std::to_string(i) + '_' + std::to_string(a) + '_' + member.name; - if (const auto it = semantic_to_varying_variable.find(member.semantic); - it != semantic_to_varying_variable.end() && it->second != in_param_name) - in_param_name = it->second; + std::string in_param_name; + { + if (const auto it = semantic_to_varying_variable.find(member.semantic); + it != semantic_to_varying_variable.end()) + in_param_name = it->second; + else + in_param_name = "_in_" + param_name + '_' + std::to_string(a) + '_' + member.name; + } if (member.type.is_array()) { write_type(code, member.type); code += "[]("; - for (int b = 0; b < member.type.array_length; b++) + for (unsigned int b = 0; b < member.type.array_length; b++) { // OpenGL does not allow varying of type boolean, so need to cast here if (member.type.is_boolean()) @@ -1003,10 +1173,12 @@ private: if (member.type.is_boolean()) code += ')'; - if (b < member.type.array_length - 1) - code += ", "; + code += ", "; } + // Remove trailing ", " + code.erase(code.size() - 2); + code += ')'; } else @@ -1017,7 +1189,7 @@ private: code += '('; } - code += semantic_to_builtin(std::move(in_param_name), member.semantic, stype); + code += semantic_to_builtin(std::move(in_param_name), member.semantic, func.type); if (member.type.is_boolean() || (_gles && member.type.is_integral())) code += ')'; @@ -1027,34 +1199,31 @@ private: } // There can be no empty structs, so can assume that the last two characters are always ", " - code.pop_back(); - code.pop_back(); + code.erase(code.size() - 2); code += ");\n"; } - else if (const auto it = semantic_to_varying_variable.find(func.parameter_list[i].semantic); - it != semantic_to_varying_variable.end() && it->second != "_in_param" + std::to_string(i)) + else + if (const auto it = semantic_to_varying_variable.find(param.semantic); + it != semantic_to_varying_variable.end() && + it->second != "_in_" + id_to_name(param.id)) { // Create local variables for duplicated semantics (since no input/output variable is created for those, see 'create_varying_variable') code += '\t'; - write_type(code, param_type); + write_type(code, param.type); code += ' '; - code += escape_name(param_type.is_array() ? - "_in_param" + std::to_string(i) + '_' + std::to_string(a) : - "_in_param" + std::to_string(i)); + code += escape_name(param.type.is_array() ? "_in_" + id_to_name(param.id) + '_' + std::to_string(a) : "_in_" + id_to_name(param.id)); code += " = "; - if (param_type.is_boolean()) + if (param.type.is_boolean()) { - write_type(code, param_type); + write_type(code, param.type); code += '('; } - code += escape_name(param_type.is_array() ? - it->second + '_' + std::to_string(a) : - it->second); + code += escape_name(param.type.is_array() ? it->second + '_' + std::to_string(a) : it->second); - if (param_type.is_boolean()) + if (param.type.is_boolean()) code += ')'; code += ";\n"; @@ -1064,55 +1233,57 @@ private: // Create local parameter variables which are used as arguments in the entry point function call below code += '\t'; - write_type(code, param_type); + write_type(code, param.type); code += ' '; - code += escape_name("_param" + std::to_string(i)); - if (param_type.is_array()) - code += '[' + std::to_string(param_type.array_length) + ']'; + code += id_to_name(param.id); + if (param.type.is_array()) + code += '[' + std::to_string(param.type.array_length) + ']'; // Initialize those local variables with the input value if existing // Parameters with only an "out" qualifier are written to by the entry point function, so do not need to be initialized - if (param_type.has(type::q_in)) + if (param.type.has(type::q_in)) { code += " = "; // Build array from separate array element variables - if (param_type.is_array()) + if (param.type.is_array()) { - write_type(code, param_type); + write_type(code, param.type); code += "[]("; - for (int a = 0; a < param_type.array_length; ++a) + for (unsigned int a = 0; a < param.type.array_length; ++a) { // OpenGL does not allow varying of type boolean, so need to cast here - if (param_type.is_boolean()) + if (param.type.is_boolean()) { - write_type(code, param_type); + write_type(code, param.type); code += '('; } - code += escape_name("_in_param" + std::to_string(i) + '_' + std::to_string(a)); + code += escape_name("_in_" + id_to_name(param.id) + '_' + std::to_string(a)); - if (param_type.is_boolean()) + if (param.type.is_boolean()) code += ')'; - if (a < param_type.array_length - 1) - code += ", "; + code += ", "; } + // Remove trailing ", " + code.erase(code.size() - 2); + code += ')'; } else { - if (param_type.is_boolean() || (_gles && param_type.is_integral())) + if (param.type.is_boolean() || (_gles && param.type.is_integral())) { - write_type(code, param_type); + write_type(code, param.type); code += '('; } - code += semantic_to_builtin("_in_param" + std::to_string(i), func.parameter_list[i].semantic, stype); + code += semantic_to_builtin("_in_" + id_to_name(param.id), param.semantic, func.type); - if (param_type.is_boolean() || (_gles && param_type.is_integral())) + if (param.type.is_boolean() || (_gles && param.type.is_integral())) code += ')'; } } @@ -1130,48 +1301,77 @@ private: // All other output types can write to the output variable directly else if (!func.return_type.is_void()) { - code += semantic_to_builtin("_return", func.return_semantic, stype); + code += semantic_to_builtin("_return", func.return_semantic, func.type); code += " = "; } // Call the function this entry point refers to - code += id_to_name(func.definition) + '('; + code += id_to_name(func.id) + '('; - for (size_t i = 0; i < num_params; ++i) + for (const member_type ¶m : func.parameter_list) { - code += "_param" + std::to_string(i); - - if (i < num_params - 1) - code += ", "; + code += id_to_name(param.id); + code += ", "; } + // Remove trailing ", " + if (!func.parameter_list.empty()) + code.erase(code.size() - 2); + code += ");\n"; // Handle output parameters - for (size_t i = 0; i < num_params; ++i) + for (const member_type ¶m : func.parameter_list) { - const type ¶m_type = func.parameter_list[i].type; - if (!param_type.has(type::q_out)) - continue; - - if (param_type.is_struct()) + if (param.type.has(type::q_out)) { - const struct_info &definition = get_struct(param_type.definition); + const std::string param_name = id_to_name(param.id); - // Split out struct fields into separate output variables again - for (int a = 0, array_length = std::max(1, param_type.array_length); a < array_length; a++) + if (param.type.is_struct()) { - for (const struct_member_info &member : definition.member_list) + const struct_type &definition = get_struct(param.type.struct_definition); + + // Split out struct fields into separate output variables again + for (unsigned int a = 0, array_length = std::max(1u, param.type.array_length); a < array_length; a++) { - if (member.type.is_array()) + for (const member_type &member : definition.member_list) { - for (int b = 0; b < member.type.array_length; b++) + if (member.type.is_array()) + { + for (unsigned int b = 0; b < member.type.array_length; b++) + { + code += '\t'; + code += escape_name("_out_" + param_name + '_' + std::to_string(a) + '_' + member.name + '_' + std::to_string(b)); + code += " = "; + + // OpenGL does not allow varying of type boolean, so need to cast here + if (member.type.is_boolean()) + { + type varying_type = member.type; + varying_type.base = type::t_float; + write_type(code, varying_type); + code += '('; + } + + code += param_name; + if (param.type.is_array()) + code += '[' + std::to_string(a) + ']'; + code += '.'; + code += member.name; + code += '[' + std::to_string(b) + ']'; + + if (member.type.is_boolean()) + code += ')'; + + code += ";\n"; + } + } + else { code += '\t'; - code += escape_name("_out_param" + std::to_string(i) + '_' + std::to_string(a) + '_' + member.name + '_' + std::to_string(b)); + code += semantic_to_builtin("_out_" + param_name + '_' + std::to_string(a) + '_' + member.name, member.semantic, func.type); code += " = "; - // OpenGL does not allow varying of type boolean, so need to cast here if (member.type.is_boolean()) { type varying_type = member.type; @@ -1180,12 +1380,11 @@ private: code += '('; } - code += escape_name("_param" + std::to_string(i)); - if (param_type.is_array()) + code += param_name; + if (param.type.is_array()) code += '[' + std::to_string(a) + ']'; code += '.'; code += member.name; - code += '[' + std::to_string(b) + ']'; if (member.type.is_boolean()) code += ')'; @@ -1193,58 +1392,30 @@ private: code += ";\n"; } } - else - { - code += '\t'; - code += semantic_to_builtin("_out_param" + std::to_string(i) + '_' + std::to_string(a) + '_' + member.name, member.semantic, stype); - code += " = "; - - if (member.type.is_boolean()) - { - type varying_type = member.type; - varying_type.base = type::t_float; - write_type(code, varying_type); - code += '('; - } - - code += escape_name("_param" + std::to_string(i)); - if (param_type.is_array()) - code += '[' + std::to_string(a) + ']'; - code += '.'; - code += member.name; - - if (member.type.is_boolean()) - code += ')'; - - code += ";\n"; - } } } - } - else - { - if (param_type.is_array()) + else if (param.type.is_array()) { // Split up array output into individual array elements again - for (int a = 0; a < param_type.array_length; a++) + for (unsigned int a = 0; a < param.type.array_length; a++) { code += '\t'; - code += escape_name("_out_param" + std::to_string(i) + '_' + std::to_string(a)); + code += escape_name("_out_" + param_name + '_' + std::to_string(a)); code += " = "; // OpenGL does not allow varying of type boolean, so need to cast here - if (param_type.is_boolean()) + if (param.type.is_boolean()) { - type varying_type = param_type; + type varying_type = param.type; varying_type.base = type::t_float; write_type(code, varying_type); code += '('; } - code += escape_name("_param" + std::to_string(i)); + code += param_name; code += '[' + std::to_string(a) + ']'; - if (param_type.is_boolean()) + if (param.type.is_boolean()) code += ')'; code += ";\n"; @@ -1253,20 +1424,20 @@ private: else { code += '\t'; - code += semantic_to_builtin("_out_param" + std::to_string(i), func.parameter_list[i].semantic, stype); + code += semantic_to_builtin("_out_" + param_name, param.semantic, func.type); code += " = "; - if (param_type.is_boolean()) + if (param.type.is_boolean()) { - type varying_type = param_type; + type varying_type = param.type; varying_type.base = type::t_float; write_type(code, varying_type); code += '('; } - code += escape_name("_param" + std::to_string(i)); + code += param_name; - if (param_type.is_boolean()) + if (param.type.is_boolean()) code += ')'; code += ";\n"; @@ -1277,24 +1448,22 @@ private: // Handle return struct output variables if (func.return_type.is_struct()) { - const struct_info &definition = get_struct(func.return_type.definition); + const struct_type &struct_definition = get_struct(func.return_type.struct_definition); - for (const struct_member_info &member : definition.member_list) + for (const member_type &member : struct_definition.member_list) { code += '\t'; - code += semantic_to_builtin("_return_" + member.name, member.semantic, stype); + code += semantic_to_builtin("_return_" + member.name, member.semantic, func.type); code += " = _return." + escape_name(member.name) + ";\n"; } } // Add code to flip the output vertically - if (_flip_vert_y && stype == shader_type::vs) + if (_flip_vert_y && func.type == shader_type::vertex) code += "\tgl_Position.y = -gl_Position.y;\n"; leave_block_and_return(0); leave_function(); - - _blocks.at(0) += "#endif\n"; } id emit_load(const expression &exp, bool force_new_id) override @@ -1308,7 +1477,7 @@ private: std::string type, expr_code = id_to_name(exp.base); - for (const auto &op : exp.chain) + for (const expression::operation &op : exp.chain) { switch (op.op) { @@ -1319,7 +1488,7 @@ private: break; case expression::operation::op_member: expr_code += '.'; - expr_code += escape_name(get_struct(op.from.definition).member_list[op.index].name); + expr_code += escape_name(get_struct(op.from.struct_definition).member_list[op.index].name); break; case expression::operation::op_dynamic_index: // For matrices this will extract a column, but that is fine, since they are initialized column-wise too @@ -1338,10 +1507,14 @@ private: { if (op.swizzle[1] < 0) { - const int row = (op.swizzle[0] % 4); - const int col = (op.swizzle[0] - row) / 4; + const char row = (op.swizzle[0] % 4); + const char col = (op.swizzle[0] - row) / 4; - expr_code += '[' + std::to_string(row) + "][" + std::to_string(col) + ']'; + expr_code += '['; + expr_code += to_digit(row); + expr_code += "]["; + expr_code += to_digit(col); + expr_code += ']'; } else { @@ -1374,7 +1547,7 @@ private: else { expr_code += '.'; - for (unsigned int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) + for (int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) expr_code += "xyzw"[op.swizzle[i]]; } } @@ -1423,13 +1596,13 @@ private: code += '\t' + id_to_name(exp.base); - for (const auto &op : exp.chain) + for (const expression::operation &op : exp.chain) { switch (op.op) { case expression::operation::op_member: code += '.'; - code += escape_name(get_struct(op.from.definition).member_list[op.index].name); + code += escape_name(get_struct(op.from.struct_definition).member_list[op.index].name); break; case expression::operation::op_dynamic_index: code += "[int(" + id_to_name(op.index) + ")]"; @@ -1442,10 +1615,14 @@ private: { if (op.swizzle[1] < 0) { - const int row = (op.swizzle[0] % 4); - const int col = (op.swizzle[0] - row) / 4; + const char row = (op.swizzle[0] % 4); + const char col = (op.swizzle[0] - row) / 4; - code += '[' + std::to_string(row) + "][" + std::to_string(col) + ']'; + code += '['; + code += '1' + row - 1; + code += "]["; + code += '1' + col - 1; + code += ']'; } else { @@ -1457,7 +1634,7 @@ private: else { code += '.'; - for (unsigned int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) + for (int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) code += "xyzw"[op.swizzle[i]]; } break; @@ -1474,33 +1651,46 @@ private: code += id_to_name(value) + ";\n"; } - id emit_constant(const type &type, const constant &data) override + id emit_constant(const type &data_type, const constant &data) override { const id res = make_id(); - if (type.is_array() || type.is_struct()) + if (data_type.is_array() || data_type.is_struct()) { - assert(type.has(type::q_const)); + assert(data_type.has(type::q_const)); - std::string &code = _blocks.at(_current_block); + if (const auto it = std::find_if(_constant_lookup.begin(), _constant_lookup.end(), + [&data_type, &data](const std::tuple &x) { + if (!(std::get<0>(x) == data_type && std::memcmp(&std::get<1>(x).as_uint[0], &data.as_uint[0], sizeof(uint32_t) * 16) == 0 && std::get<1>(x).array_data.size() == data.array_data.size())) + return false; + for (size_t i = 0; i < data.array_data.size(); ++i) + if (std::memcmp(&std::get<1>(x).array_data[i].as_uint[0], &data.array_data[i].as_uint[0], sizeof(uint32_t) * 16) != 0) + return false; + return true; + }); + it != _constant_lookup.end()) + return std::get<2>(*it); // Reuse existing constant instead of duplicating the definition + else if (data_type.is_array()) + _constant_lookup.push_back({ data_type, data, res }); - code += '\t'; + // Put constant variable into global scope, so that it can be reused in different blocks + std::string &code = _blocks.at(0); // GLSL requires constants to be initialized, but struct initialization is not supported right now - if (!type.is_struct()) + if (!data_type.is_struct()) code += "const "; - write_type(code, type); + write_type(code, data_type); code += ' ' + id_to_name(res); // Array constants need to be stored in a constant variable as they cannot be used in-place - if (type.is_array()) - code += '[' + std::to_string(type.array_length) + ']'; + if (data_type.is_array()) + code += '[' + std::to_string(data_type.array_length) + ']'; // Struct initialization is not supported right now - if (!type.is_struct()) { + if (!data_type.is_struct()) { code += " = "; - write_constant(code, type, data); + write_constant(code, data_type, data); } code += ";\n"; @@ -1508,7 +1698,7 @@ private: } std::string code; - write_constant(code, type, data); + write_constant(code, data_type, data); define_name(res, std::move(code)); return res; @@ -1548,7 +1738,7 @@ private: return res; } - id emit_binary_op(const location &loc, tokenid op, const type &res_type, const type &type, id lhs, id rhs) override + id emit_binary_op(const location &loc, tokenid op, const type &res_type, const type &exp_type, id lhs, id rhs) override { const id res = make_id(); @@ -1576,7 +1766,7 @@ private: break; case tokenid::star: case tokenid::star_equal: - if (type.is_matrix()) + if (exp_type.is_matrix()) intrinsic = "matrixCompMult"; else operator_code = '*'; @@ -1587,7 +1777,7 @@ private: break; case tokenid::percent: case tokenid::percent_equal: - if (type.is_floating_point()) + if (exp_type.is_floating_point()) intrinsic = "fmodHLSL", _uses_fmod = true; else @@ -1614,51 +1804,51 @@ private: operator_code = ">>"; break; case tokenid::pipe_pipe: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "compOr", _uses_componentwise_or = true; else operator_code = "||"; break; case tokenid::ampersand_ampersand: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "compAnd", _uses_componentwise_and = true; else operator_code = "&&"; break; case tokenid::less: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "lessThan"; else operator_code = '<'; break; case tokenid::less_equal: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "lessThanEqual"; else operator_code = "<="; break; case tokenid::greater: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "greaterThan"; else operator_code = '>'; break; case tokenid::greater_equal: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "greaterThanEqual"; else operator_code = ">="; break; case tokenid::equal_equal: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "equal"; else operator_code = "=="; break; case tokenid::exclaim_equal: - if (type.is_vector()) + if (exp_type.is_vector()) intrinsic = "notEqual"; else operator_code = "!="; @@ -1732,14 +1922,16 @@ private: code += id_to_name(function) + '('; - for (size_t i = 0, num_args = args.size(); i < num_args; ++i) + for (const expression &arg : args) { - code += id_to_name(args[i].base); - - if (i < num_args - 1) - code += ", "; + code += id_to_name(arg.base); + code += ", "; } + // Remove trailing ", " + if (!args.empty()) + code.erase(code.size() - 2); + code += ");\n"; return res; @@ -1783,11 +1975,11 @@ private: return res; } - id emit_construct(const location &loc, const type &type, const std::vector &args) override + id emit_construct(const location &loc, const type &res_type, const std::vector &args) override { #ifndef NDEBUG - for (const auto &arg : args) - assert((arg.type.is_scalar() || type.is_array()) && arg.chain.empty() && arg.base != 0); + for (const expression &arg : args) + assert((arg.type.is_scalar() || res_type.is_array()) && arg.chain.empty() && arg.base != 0); #endif const id res = make_id(); @@ -1797,29 +1989,31 @@ private: write_location(code, loc); code += '\t'; - write_type(code, type); + write_type(code, res_type); code += ' ' + id_to_name(res); - if (type.is_array()) - code += '[' + std::to_string(type.array_length) + ']'; + if (res_type.is_array()) + code += '[' + std::to_string(res_type.array_length) + ']'; code += " = "; - write_type(code, type); + write_type(code, res_type); - if (type.is_array()) - code += '[' + std::to_string(type.array_length) + ']'; + if (res_type.is_array()) + code += '[' + std::to_string(res_type.array_length) + ']'; code += '('; - for (size_t i = 0, num_args = args.size(); i < num_args; ++i) + for (const expression &arg : args) { - code += id_to_name(args[i].base); - - if (i < num_args - 1) - code += ", "; + code += id_to_name(arg.base); + code += ", "; } + // Remove trailing ", " + if (!args.empty()) + code.erase(code.size() - 2); + code += ");\n"; return res; @@ -1843,7 +2037,7 @@ private: if (flags != 0 && !_gles) { - _enable_control_flow_attributes = true; + _uses_control_flow_attributes = true; code += "#if GL_EXT_control_flow_attributes\n\t[["; if ((flags & 0x1) == 0x1) @@ -1872,7 +2066,7 @@ private: _blocks.erase(true_statement_block); _blocks.erase(false_statement_block); } - id emit_phi(const location &loc, id condition_value, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &type) override + id emit_phi(const location &loc, id condition_value, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &res_type) override { assert(condition_value != 0 && condition_block != 0 && true_value != 0 && true_statement_block != 0 && false_value != 0 && false_statement_block != 0); @@ -1889,7 +2083,7 @@ private: code += _blocks.at(condition_block); code += '\t'; - write_type(code, type); + write_type(code, res_type); code += ' ' + id_to_name(res) + ";\n"; write_location(code, loc); @@ -1927,7 +2121,7 @@ private: std::string attributes; if (flags != 0 && !_gles) { - _enable_control_flow_attributes = true; + _uses_control_flow_attributes = true; attributes += "#if GL_EXT_control_flow_attributes\n\t[["; if ((flags & 0x1) == 0x1) @@ -1945,8 +2139,8 @@ private: if (condition_block == 0) { // Convert the last SSA variable initializer to an assignment statement - auto pos_assign = continue_data.rfind(condition_name); - auto pos_prev_assign = continue_data.rfind('\t', pos_assign); + const size_t pos_assign = continue_data.rfind(condition_name); + const size_t pos_prev_assign = continue_data.rfind('\t', pos_assign); continue_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1); // We need to add the continue block to all "continue" statements as well @@ -1974,9 +2168,9 @@ private: if (std::count(condition_data.begin(), condition_data.end(), '\n') == 1) { // Convert SSA variable initializer back to a condition expression - auto pos_assign = condition_data.find('='); + const size_t pos_assign = condition_data.find('='); condition_data.erase(0, pos_assign + 2); - auto pos_semicolon = condition_data.rfind(';'); + const size_t pos_semicolon = condition_data.rfind(';'); condition_data.erase(pos_semicolon); condition_name = std::move(condition_data); @@ -1989,8 +2183,8 @@ private: increase_indentation_level(condition_data); // Convert the last SSA variable initializer to an assignment statement - auto pos_assign = condition_data.rfind(condition_name); - auto pos_prev_assign = condition_data.rfind('\t', pos_assign); + const size_t pos_assign = condition_data.rfind(condition_name); + const size_t pos_prev_assign = condition_data.rfind('\t', pos_assign); condition_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1); } @@ -2116,7 +2310,7 @@ private: code += "\tdiscard;\n"; - const auto &return_type = _functions.back()->return_type; + const type &return_type = _current_function->return_type; if (!return_type.is_void()) { // Add a return statement to exit functions in case discard is the last control flow statement @@ -2133,7 +2327,7 @@ private: return 0; // Skip implicit return statement - if (!_functions.back()->return_type.is_void() && value == 0) + if (!_current_function->return_type.is_void() && value == 0) return set_block(0); std::string &code = _blocks.at(_current_block); @@ -2182,14 +2376,17 @@ private: } void leave_function() override { - assert(_last_block != 0); + assert(_current_function != nullptr && _last_block != 0); - _blocks.at(0) += "{\n" + _blocks.at(_last_block) + "}\n"; + _blocks.emplace(_current_function->id, _current_function_declaration + "{\n" + _blocks.at(_last_block) + "}\n"); + + _current_function = nullptr; + _current_function_declaration.clear(); } }; } // namespace -codegen *reshadefx::create_codegen_glsl(bool gles, bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types, bool flip_vert_y) +codegen *reshadefx::create_codegen_glsl(unsigned version, bool gles, bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types, bool flip_vert_y) { - return new codegen_glsl(gles, vulkan_semantics, debug_info, uniforms_to_spec_constants, enable_16bit_types, flip_vert_y); + return new codegen_glsl(version, gles, vulkan_semantics, debug_info, uniforms_to_spec_constants, enable_16bit_types, flip_vert_y); } diff --git a/dep/reshadefx/src/effect_codegen_hlsl.cpp b/dep/reshadefx/src/effect_codegen_hlsl.cpp index 2bf3681cd..41b3f50f2 100644 --- a/dep/reshadefx/src/effect_codegen_hlsl.cpp +++ b/dep/reshadefx/src/effect_codegen_hlsl.cpp @@ -5,22 +5,36 @@ #include "effect_parser.hpp" #include "effect_codegen.hpp" -#include // std::signbit, std::isinf, std::isnan +#include // std::isinf, std::isnan, std::signbit #include // std::tolower -#include // std::snprintf #include -#include // stricmp -#include // std::find_if, std::max +#include // stricmp, std::memcmp +#include // std::from_chars, std::to_chars +#include // std::equal, std::find, std::find_if, std::max #include #include using namespace reshadefx; +inline char to_digit(unsigned int value) +{ + assert(value < 10); + return '0' + static_cast(value); +} + +inline uint32_t align_up(uint32_t size, uint32_t alignment, uint32_t elements) +{ + alignment -= 1; + return ((size + alignment) & ~alignment) * (elements - 1) + size; +} + class codegen_hlsl final : public codegen { public: - codegen_hlsl(unsigned int shader_model, bool debug_info, bool uniforms_to_spec_constants) - : _shader_model(shader_model), _debug_info(debug_info), _uniforms_to_spec_constants(uniforms_to_spec_constants) + codegen_hlsl(unsigned int shader_model, bool debug_info, bool uniforms_to_spec_constants) : + _shader_model(shader_model), + _debug_info(debug_info), + _uniforms_to_spec_constants(uniforms_to_spec_constants) { // Create default block and reserve a memory block to avoid frequent reallocations std::string &block = _blocks.emplace(0, std::string()).first->second; @@ -38,24 +52,134 @@ private: expression, }; - std::string _cbuffer_block; - std::string _current_location; - std::unordered_map _names; - std::unordered_map _blocks; unsigned int _shader_model = 0; bool _debug_info = false; bool _uniforms_to_spec_constants = false; - std::unordered_map _remapped_semantics; + + std::unordered_map _names; + std::unordered_map _blocks; + std::string _cbuffer_block; + std::string _current_location; + std::string _current_function_declaration; + + std::string _remapped_semantics[15]; + std::vector> _constant_lookup; +#if 0 + std::vector _sampler_lookup; +#endif // Only write compatibility intrinsics to result if they are actually in use bool _uses_bitwise_cast = false; + bool _uses_bitwise_intrinsics = false; - void write_result(module &module) override + void optimize_bindings() override { - module = std::move(_module); + codegen::optimize_bindings(); +#if 0 + if (_shader_model < 40) + return; + + _module.num_sampler_bindings = static_cast(_sampler_lookup.size()); + + for (technique &tech : _module.techniques) + for (pass &pass : tech.passes) + pass.sampler_bindings.assign(_sampler_lookup.begin(), _sampler_lookup.end()); +#endif + } + + std::string finalize_preamble() const + { std::string preamble; +#define IMPLEMENT_INTRINSIC_FALLBACK_ASINT(n) \ + "int" #n " __asint(float" #n " v) {" \ + "float" #n " e = 0;" \ + "float" #n " f = frexp(v, e) * 2 - 1;" /* frexp does not include sign bit in HLSL, so can use as is */ \ + "float" #n " m = ldexp(f, 23);" \ + "return (v == 0) ? 0 : (v < 0 ? 2147483648 : 0) + (" /* Zero (does not handle negative zero) */ \ + /* isnan(v) ? 2147483647 : */ /* NaN */ \ + /* isinf(v) ? 2139095040 : */ /* Infinity */ \ + "ldexp(e + 126, 23) + m);" \ + "}" +#define IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(n) \ + "int" #n " __asuint(float" #n " v) { return __asint(v); }" +#define IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(n) \ + "float" #n " __asfloat(int" #n " v) {" \ + "float" #n " m = v % exp2(23);" \ + "float" #n " f = ldexp(m, -23);" \ + "float" #n " e = floor(ldexp(v, -23) % 256);" \ + "return (v > 2147483647 ? -1 : 1) * (" \ + /* e == 0 ? ldexp(f, -126) : */ /* Denormalized */ \ + /* e == 255 ? (m == 0 ? 1.#INF : -1.#IND) : */ /* Infinity and NaN */ \ + "ldexp(1 + f, e - 127));" \ + "}" + + // See https://graphics.stanford.edu/%7Eseander/bithacks.html#CountBitsSetParallel +#define IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(n) \ + "uint" #n " __countbits(uint" #n " v) {" \ + "v = v - ((v >> 1) & 0x55555555);" \ + "v = (v & 0x33333333) + ((v >> 2) & 0x33333333);" \ + "v = (v + (v >> 4)) & 0x0F0F0F0F;" \ + "v *= 0x01010101;" \ + "return v >> 24;" \ + "}" +#define IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(n) \ + "uint" #n " __countbits(uint" #n " v) {" \ + "uint" #n " c = 0;" \ + "while (any(v > 0)) {" \ + "c += v % 2;" \ + "v /= 2;" \ + "}" \ + "return c;" \ + "}" + + // See https://graphics.stanford.edu/%7Eseander/bithacks.html#ReverseParallel +#define IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(n) \ + "uint" #n " __reversebits(uint" #n " v) {" \ + "v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);" \ + "v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);" \ + "v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);" \ + "v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);" \ + "return (v >> 16) | (v << 16);" \ + "}" +#define IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(n) \ + "uint" #n " __reversebits(uint" #n " v) {" \ + "uint" #n " r = 0;" \ + "for (int i = 0; i < 32; i++) {" \ + "r *= 2;" \ + "r += floor(x % 2);" \ + "v /= 2;" \ + "}" \ + "return r;" \ + "}" + + // See https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightParallel +#define IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(n) \ + "uint" #n " __firstbitlow(uint" #n " v) {" \ + "uint" #n " c = (v != 0) ? 31 : 32;" \ + "v &= -int" #n "(v);" \ + "c = (v & 0x0000FFFF) ? c - 16 : c;" \ + "c = (v & 0x00FF00FF) ? c - 8 : c;" \ + "c = (v & 0x0F0F0F0F) ? c - 4 : c;" \ + "c = (v & 0x33333333) ? c - 2 : c;" \ + "c = (v & 0x55555555) ? c - 1 : c;" \ + "return c;" \ + "}" +#define IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(n) \ + "uint" #n " __firstbitlow(uint" #n " v) {" \ + "uint" #n " c = (v != 0) ? 31 : 32;" \ + "for (int i = 0; i < 32; i++) {" \ + "c = c > i && (v % 2) != 0 ? i : c;" \ + "v /= 2;" \ + "}" \ + "return c;" \ + "}" + + +#define IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(n) \ + "uint" #n " __firstbithigh(uint" #n " v) { return __firstbitlow(__reversebits(v)); }" + if (_shader_model >= 40) { preamble += @@ -72,10 +196,34 @@ private: "struct __sampler2D_float4 { Texture2D t; SamplerState s; };\n" "struct __sampler3D_float4 { Texture3D t; SamplerState s; };\n"; + if (_uses_bitwise_intrinsics && _shader_model < 50) + preamble += + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(4) "\n" + + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(4) "\n" + + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(4) "\n" + + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(4) "\n"; + if (!_cbuffer_block.empty()) { +#if 0 if (_shader_model >= 60) preamble += "[[vk::binding(0, 0)]] "; // Descriptor set 0 +#endif preamble += "cbuffer _Globals {\n" + _cbuffer_block + "};\n"; } @@ -90,48 +238,140 @@ private: if (_uses_bitwise_cast) preamble += - "int __asint(float v) {" - " if (v == 0) return 0;" // Zero (does not handle negative zero) - // if (isinf(v)) return v < 0 ? 4286578688 : 2139095040; // Infinity - // if (isnan(v)) return 2147483647; // NaN (does not handle negative NaN) - " float e = 0;" - " float f = frexp(v, e) * 2 - 1;" // frexp does not include sign bit in HLSL, so can use as is - " float m = ldexp(f, 23);" - " return (v < 0 ? 2147483648 : 0) + ldexp(e + 126, 23) + m;" - "}\n" - "int2 __asint(float2 v) { return int2(__asint(v.x), __asint(v.y)); }\n" - "int3 __asint(float3 v) { return int3(__asint(v.x), __asint(v.y), __asint(v.z)); }\n" - "int4 __asint(float4 v) { return int4(__asint(v.x), __asint(v.y), __asint(v.z), __asint(v.w)); }\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASINT(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASINT(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASINT(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASINT(4) "\n" - "int __asuint(float v) { return __asint(v); }\n" - "int2 __asuint(float2 v) { return int2(__asint(v.x), __asint(v.y)); }\n" - "int3 __asuint(float3 v) { return int3(__asint(v.x), __asint(v.y), __asint(v.z)); }\n" - "int4 __asuint(float4 v) { return int4(__asint(v.x), __asint(v.y), __asint(v.z), __asint(v.w)); }\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(4) "\n" - "float __asfloat(int v) {" - " float m = v % exp2(23);" - " float f = ldexp(m, -23);" - " float e = floor(ldexp(v, -23) % 256);" - " return (v > 2147483647 ? -1 : 1) * (" - // e == 0 ? ldexp(f, -126) : // Denormalized - // e == 255 ? (m == 0 ? 1.#INF : -1.#IND) : // Infinity and NaN - " ldexp(1 + f, e - 127));" - "}\n" - "float2 __asfloat(int2 v) { return float2(__asfloat(v.x), __asfloat(v.y)); }\n" - "float3 __asfloat(int3 v) { return float3(__asfloat(v.x), __asfloat(v.y), __asfloat(v.z)); }\n" - "float4 __asfloat(int4 v) { return float4(__asfloat(v.x), __asfloat(v.y), __asfloat(v.z), __asfloat(v.w)); }\n"; + IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(4) "\n"; + + if (_uses_bitwise_intrinsics) + preamble += + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(4) "\n" + + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(4) "\n" + + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(4) "\n" + + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(1) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(2) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(3) "\n" + IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(4) "\n"; if (!_cbuffer_block.empty()) + { preamble += _cbuffer_block; - - // Offsets were multiplied in 'define_uniform', so adjust total size here accordingly - module.total_uniform_size *= 4; + } } - module.code.assign(preamble.begin(), preamble.end()); + return preamble; + } - const std::string &main_block = _blocks.at(0); - module.code.insert(module.code.end(), main_block.begin(), main_block.end()); + std::string finalize_code() const override + { + std::string code = finalize_preamble(); + + // Add global definitions (struct types, global variables, sampler state declarations, ...) + code += _blocks.at(0); + + // Add texture and sampler definitions + for (const sampler &info : _module.samplers) + code += _blocks.at(info.id); + + // Add storage definitions + for (const storage &info : _module.storages) + code += _blocks.at(info.id); + + // Add function definitions + for (const std::unique_ptr &func : _functions) + code += _blocks.at(func->id); + + return code; + } + std::string finalize_code_for_entry_point(const std::string &entry_point_name) const override + { + const auto entry_point_it = std::find_if(_functions.begin(), _functions.end(), + [&entry_point_name](const std::unique_ptr &func) { + return func->unique_name == entry_point_name; + }); + if (entry_point_it == _functions.end()) + return {}; + const function &entry_point = *entry_point_it->get(); + + std::string code = finalize_preamble(); + + if (_shader_model < 40 && entry_point.type == shader_type::pixel) + // Overwrite position semantic in pixel shaders + code += "#define POSITION VPOS\n"; + + // Add global definitions (struct types, global variables, sampler state declarations, ...) + code += _blocks.at(0); + + const auto replace_binding = + [](std::string &code, uint32_t binding) { + for (size_t start = 0;;) + { + const size_t pos = code.find(": register(", start); + if (pos == std::string::npos) + break; + const size_t beg = pos + 12; + const size_t end = code.find(')', beg); + const std::string replacement = std::to_string(binding); + code.replace(beg, end - beg, replacement); + start = beg + replacement.length(); + } + }; + + // Add referenced texture and sampler definitions + for (uint32_t binding = 0; binding < entry_point.referenced_samplers.size(); ++binding) + { + if (entry_point.referenced_samplers[binding] == 0) + continue; + + std::string block_code = _blocks.at(entry_point.referenced_samplers[binding]); + replace_binding(block_code, binding); + code += block_code; + } + + // Add referenced storage definitions + for (uint32_t binding = 0; binding < entry_point.referenced_storages.size(); ++binding) + { + if (entry_point.referenced_storages[binding] == 0) + continue; + + std::string block_code = _blocks.at(entry_point.referenced_storages[binding]); + replace_binding(block_code, binding); + code += block_code; + } + + // Add referenced function definitions + for (const std::unique_ptr &func : _functions) + { + if (func->id != entry_point.id && + std::find(entry_point.referenced_functions.begin(), entry_point.referenced_functions.end(), func->id) == entry_point.referenced_functions.end()) + continue; + + code += _blocks.at(func->id); + } + + return code; } template @@ -196,79 +436,79 @@ private: s += "float"; break; case type::t_struct: - s += id_to_name(type.definition); + s += id_to_name(type.struct_definition); return; case type::t_sampler1d_int: - s += "__sampler1D"; - if (_shader_model >= 40) - s += "_int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); - return; case type::t_sampler2d_int: - s += "__sampler2D"; - if (_shader_model >= 40) - s += "_int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); - return; case type::t_sampler3d_int: - s += "__sampler3D"; + s += "__sampler"; + s += to_digit(type.texture_dimension()); + s += 'D'; if (_shader_model >= 40) - s += "_int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); + { + s += "_int"; + if (type.rows > 1) + s += to_digit(type.rows); + } return; case type::t_sampler1d_uint: - s += "__sampler1D"; - if (_shader_model >= 40) - s += "_uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); - return; case type::t_sampler2d_uint: - s += "__sampler2D"; - if (_shader_model >= 40) - s += "_uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); - return; case type::t_sampler3d_uint: - s += "__sampler3D"; + s += "__sampler"; + s += to_digit(type.texture_dimension()); + s += 'D'; if (_shader_model >= 40) - s += "_uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); + { + s += "_uint"; + if (type.rows > 1) + s += to_digit(type.rows); + } return; case type::t_sampler1d_float: - s += "__sampler1D"; - if (_shader_model >= 40) - s += "_float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); - return; case type::t_sampler2d_float: - s += "__sampler2D"; - if (_shader_model >= 40) - s += "_float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); - return; case type::t_sampler3d_float: - s += "__sampler3D"; + s += "__sampler"; + s += to_digit(type.texture_dimension()); + s += 'D'; if (_shader_model >= 40) - s += "_float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); + { + s += "_float"; + if (type.rows > 1) + s += to_digit(type.rows); + } return; case type::t_storage1d_int: - s += "RWTexture1D 1 ? std::to_string(type.rows) : std::string()) + '>'; - return; case type::t_storage2d_int: - s += "RWTexture2D 1 ? std::to_string(type.rows) : std::string()) + '>'; - return; case type::t_storage3d_int: - s += "RWTexture3D 1 ? std::to_string(type.rows) : std::string()) + '>'; + s += "RWTexture"; + s += to_digit(type.texture_dimension()); + s += "D<"; + s += "int"; + if (type.rows > 1) + s += to_digit(type.rows); + s += '>'; return; case type::t_storage1d_uint: - s += "RWTexture1D 1 ? std::to_string(type.rows) : std::string()) + '>'; - return; case type::t_storage2d_uint: - s += "RWTexture2D 1 ? std::to_string(type.rows) : std::string()) + '>'; - return; case type::t_storage3d_uint: - s += "RWTexture3D 1 ? std::to_string(type.rows) : std::string()) + '>'; + s += "RWTexture"; + s += to_digit(type.texture_dimension()); + s += "D<"; + s += "uint"; + if (type.rows > 1) + s += to_digit(type.rows); + s += '>'; return; case type::t_storage1d_float: - s += "RWTexture1D 1 ? std::to_string(type.rows) : std::string()) + '>'; - return; case type::t_storage2d_float: - s += "RWTexture2D 1 ? std::to_string(type.rows) : std::string()) + '>'; - return; case type::t_storage3d_float: - s += "RWTexture3D 1 ? std::to_string(type.rows) : std::string()) + '>'; + s += "RWTexture"; + s += to_digit(type.texture_dimension()); + s += "D<"; + s += "float"; + if (type.rows > 1) + s += to_digit(type.rows); + s += '>'; return; default: assert(false); @@ -276,49 +516,52 @@ private: } if (type.rows > 1) - s += std::to_string(type.rows); + s += to_digit(type.rows); if (type.cols > 1) - s += 'x' + std::to_string(type.cols); + s += 'x', s += to_digit(type.cols); } - void write_constant(std::string &s, const type &type, const constant &data) const + void write_constant(std::string &s, const type &data_type, const constant &data) const { - if (type.is_array()) + if (data_type.is_array()) { - auto elem_type = type; + assert(data_type.is_bounded_array()); + + type elem_type = data_type; elem_type.array_length = 0; s += "{ "; - for (int i = 0; i < type.array_length; ++i) + for (unsigned int a = 0; a < data_type.array_length; ++a) { - write_constant(s, elem_type, i < static_cast(data.array_data.size()) ? data.array_data[i] : constant()); - - if (i < type.array_length - 1) - s += ", "; + write_constant(s, elem_type, a < static_cast(data.array_data.size()) ? data.array_data[a] : constant {}); + s += ", "; } + // Remove trailing ", " + s.erase(s.size() - 2); + s += " }"; return; } - if (type.is_struct()) + if (data_type.is_struct()) { // The can only be zero initializer struct constants assert(data.as_uint[0] == 0); - s += '(' + id_to_name(type.definition) + ")0"; + s += '(' + id_to_name(data_type.struct_definition) + ")0"; return; } // There can only be numeric constants - assert(type.is_numeric()); + assert(data_type.is_numeric()); - if (!type.is_scalar()) - write_type(s, type), s += '('; + if (!data_type.is_scalar()) + write_type(s, data_type), s += '('; - for (unsigned int i = 0, components = type.components(); i < components; ++i) + for (unsigned int i = 0; i < data_type.components(); ++i) { - switch (type.base) + switch (data_type.base) { case type::t_bool: s += data.as_uint[i] ? "true" : "false"; @@ -342,21 +585,32 @@ private: break; } { +#ifdef _MSC_VER + char temp[64]; + const std::to_chars_result res = std::to_chars(temp, temp + sizeof(temp), data.as_float[i], std::chars_format::scientific, 8); + if (res.ec == std::errc()) + s.append(temp, res.ptr); + else + assert(false); +#else std::ostringstream ss; ss.imbue(std::locale::classic()); ss << data.as_float[i]; s += ss.str(); +#endif } break; default: assert(false); } - if (i < components - 1) - s += ", "; + s += ", "; } - if (!type.is_scalar()) + // Remove trailing ", " + s.erase(s.size() - 2); + + if (!data_type.is_scalar()) s += ')'; } template @@ -400,16 +654,14 @@ private: case texture_format::r32u: s += "uint"; break; - case texture_format::r8: - case texture_format::r16: - case texture_format::r16f: - case texture_format::r32f: - s += "float"; - break; default: assert(false); [[fallthrough]]; case texture_format::unknown: + case texture_format::r8: + case texture_format::r16: + case texture_format::r16f: + case texture_format::r32f: case texture_format::rg8: case texture_format::rg16: case texture_format::rg16f: @@ -442,17 +694,20 @@ private: return; // Filter out names that may clash with automatic ones name = escape_name(std::move(name)); if constexpr (naming_type == naming::general) - if (std::find_if(_names.begin(), _names.end(), [&name](const auto &it) { return it.second == name; }) != _names.end()) + if (std::find_if(_names.begin(), _names.end(), + [&name](const auto &names_it) { return names_it.second == name; }) != _names.end()) name += '_' + std::to_string(id); // Append a numbered suffix if the name already exists _names[id] = std::move(name); } - std::string convert_semantic(const std::string &semantic) + std::string convert_semantic(const std::string &semantic, uint32_t max_attributes = 1) { if (_shader_model < 40) { if (semantic == "SV_POSITION") return "POSITION"; // For pixel shaders this has to be "VPOS", so need to redefine that in post + if (semantic == "VPOS") + return "VPOS"; if (semantic == "SV_POINTSIZE") return "PSIZE"; if (semantic.compare(0, 9, "SV_TARGET") == 0) @@ -464,22 +719,41 @@ private: if (semantic == "SV_ISFRONTFACE") return "VFACE"; - if (semantic != "VPOS" && - semantic.compare(0, 5, "COLOR") != 0 && - semantic.compare(0, 6, "NORMAL") != 0 && - semantic.compare(0, 7, "TANGENT") != 0) - { - // Shader model 3 only supports a selected list of semantic names, so need to remap custom ones to that - if (const auto it = _remapped_semantics.find(semantic); - it != _remapped_semantics.end()) - return it->second; + size_t digit_index = semantic.size() - 1; + while (digit_index != 0 && semantic[digit_index] >= '0' && semantic[digit_index] <= '9') + digit_index--; + digit_index++; - // Legal semantic indices are between 0 and 15 - if (_remapped_semantics.size() < 15) + const std::string semantic_base = semantic.substr(0, digit_index); + + uint32_t semantic_digit = 0; + std::from_chars(semantic.c_str() + digit_index, semantic.c_str() + semantic.size(), semantic_digit); + + if (semantic_base == "TEXCOORD") + { + if (semantic_digit < 15) { - const std::string remapped_semantic = "TEXCOORD" + std::to_string(_remapped_semantics.size()) + " /* " + semantic + " */"; - _remapped_semantics.emplace(semantic, remapped_semantic); - return remapped_semantic; + assert(_remapped_semantics[semantic_digit].empty() || _remapped_semantics[semantic_digit] == semantic); // Mixing custom semantic names and multiple TEXCOORD indices is not supported + _remapped_semantics[semantic_digit] = semantic; + } + } + // Shader model 3 only supports a selected list of semantic names, so need to remap custom ones to that + else if ( + semantic_base != "COLOR" && + semantic_base != "NORMAL" && + semantic_base != "TANGENT" && + semantic_base != "BINORMAL") + { + // Legal semantic indices are between 0 and 15, but skip first entry in case both custom semantic names and the common TEXCOORD0 exist + for (int i = 1; i < 15; ++i) + { + if (_remapped_semantics[i].empty() || _remapped_semantics[i] == semantic) + { + for (uint32_t a = 0; a < max_attributes && i + a < 15; ++a) + _remapped_semantics[i + a] = semantic_base + std::to_string(semantic_digit + a); + + return "TEXCOORD" + std::to_string(i) + " /* " + semantic + " */"; + } } } } @@ -543,10 +817,10 @@ private: block.insert(block.begin(), '\t'); } - id define_struct(const location &loc, struct_info &info) override + id define_struct(const location &loc, struct_type &info) override { - info.definition = make_id(); - define_name(info.definition, info.unique_name); + const id res = info.id = make_id(); + define_name(res, info.unique_name); _structs.push_back(info); @@ -554,136 +828,133 @@ private: write_location(code, loc); - code += "struct " + id_to_name(info.definition) + "\n{\n"; + code += "struct " + id_to_name(res) + "\n{\n"; - for (const struct_member_info &member : info.member_list) + for (const member_type &member : info.member_list) { code += '\t'; write_type(code, member.type); // HLSL allows interpolation attributes on struct members, so handle this like a parameter code += ' ' + member.name; + if (member.type.is_array()) code += '[' + std::to_string(member.type.array_length) + ']'; + if (!member.semantic.empty()) - code += " : " + convert_semantic(member.semantic); + code += " : " + convert_semantic(member.semantic, std::max(1u, member.type.components() / 4) * std::max(1u, member.type.array_length)); + code += ";\n"; } code += "};\n"; - return info.definition; + return res; } - id define_texture(const location &loc, texture_info &info) override + id define_texture(const location &, texture &info) override { - info.id = make_id(); - info.binding = ~0u; - - define_name(info.id, info.unique_name); - -#if 0 - if (_shader_model >= 40) - { - info.binding = _module.num_texture_bindings; - _module.num_texture_bindings += 2; - - std::string &code = _blocks.at(_current_block); - - write_location(code, loc); - - if (_shader_model >= 60) - code += "[[vk::binding(" + std::to_string(info.binding + 0) + ", 2)]] "; // Descriptor set 2 - - code += "Texture" + std::to_string(static_cast(info.type)) + "D<"; - write_texture_format(code, info.format); - code += "> __" + info.unique_name + " : register(t" + std::to_string(info.binding + 0) + "); \n"; - - if (_shader_model >= 60) - code += "[[vk::binding(" + std::to_string(info.binding + 1) + ", 2)]] "; // Descriptor set 2 - - code += "Texture" + std::to_string(static_cast(info.type)) + "D<"; - write_texture_format(code, info.format); - code += "> __srgb" + info.unique_name + " : register(t" + std::to_string(info.binding + 1) + "); \n"; - } -#endif + const id res = info.id = make_id(); _module.textures.push_back(info); - return info.id; + return res; } - id define_sampler(const location &loc, const texture_info &tex_info, sampler_info &info) override + id define_sampler(const location &loc, const texture &tex_info, sampler &info) override { - info.id = make_id(); + const id res = info.id = create_block(); + define_name(res, info.unique_name); - define_name(info.id, info.unique_name); + std::string &code = _blocks.at(res); - std::string &code = _blocks.at(_current_block); + // Default to a register index equivalent to the entry in the sampler list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement) + const uint32_t default_binding = static_cast(_module.samplers.size()); + uint32_t sampler_state_binding = 0; if (_shader_model >= 40) { #if 0 // Try and reuse a sampler binding with the same sampler description - const auto existing_sampler = std::find_if(_module.samplers.begin(), _module.samplers.end(), - [&info](const auto &it) { - return it.filter == info.filter && it.address_u == info.address_u && it.address_v == info.address_v && it.address_w == info.address_w && it.min_lod == info.min_lod && it.max_lod == info.max_lod && it.lod_bias == info.lod_bias; + const auto existing_sampler_it = std::find_if(_sampler_lookup.begin(), _sampler_lookup.end(), + [&info](const sampler_desc &existing_info) { + return + existing_info.filter == info.filter && + existing_info.address_u == info.address_u && + existing_info.address_v == info.address_v && + existing_info.address_w == info.address_w && + existing_info.min_lod == info.min_lod && + existing_info.max_lod == info.max_lod && + existing_info.lod_bias == info.lod_bias; }); - - if (existing_sampler != _module.samplers.end()) + if (existing_sampler_it != _sampler_lookup.end()) { - info.binding = existing_sampler->binding; + sampler_state_binding = existing_sampler_it->binding; } else { - info.binding = _module.num_sampler_bindings++; + sampler_state_binding = static_cast(_sampler_lookup.size()); + + sampler_binding s; + s.filter = info.filter; + s.address_u = info.address_u; + s.address_v = info.address_v; + s.address_w = info.address_w; + s.min_lod = info.min_lod; + s.max_lod = info.max_lod; + s.lod_bias = info.lod_bias; + s.binding = sampler_state_binding; + _sampler_lookup.push_back(std::move(s)); if (_shader_model >= 60) - code += "[[vk::binding(" + std::to_string(info.binding) + ", 1)]] "; // Descriptor set 1 + _blocks.at(0) += "[[vk::binding(" + std::to_string(sampler_state_binding) + ", 1)]] "; // Descriptor set 1 - code += "SamplerState __s" + std::to_string(info.binding) + " : register(s" + std::to_string(info.binding) + ");\n"; + _blocks.at(0) += "SamplerState __s" + std::to_string(sampler_state_binding) + " : register(s" + std::to_string(sampler_state_binding) + ");\n"; } - assert(info.srgb == 0 || info.srgb == 1); - info.texture_binding = tex_info.binding + info.srgb; // Offset binding by one to choose the SRGB variant + if (_shader_model >= 60) + code += "[[vk::binding(" + std::to_string(default_binding) + ", 2)]] "; // Descriptor set 2 - write_location(code, loc); - - code += "static const "; - write_type(code, info.type); - code += ' ' + id_to_name(info.id) + " = { " + (info.srgb ? "__srgb" : "__") + info.texture_name + ", __s" + std::to_string(info.binding) + " };\n"; -#else - info.binding = _module.num_sampler_bindings++; - info.texture_binding = ~0u; // Unset texture binding - - write_location(code, loc); - - const unsigned int texture_dimension = info.type.texture_dimension(); - code += "Texture" + std::to_string(texture_dimension) + "D<"; + code += "Texture"; + code += to_digit(static_cast(tex_info.type)); + code += "D<"; write_texture_format(code, tex_info.format); - code += "> __" + info.unique_name + "_t : register( t0); \n"; + code += "> __" + info.unique_name + "_t : register(t" + std::to_string(default_binding) + "); \n"; - code += "SamplerState __" + info.unique_name + "_s : register( s0);\n"; + write_location(code, loc); code += "static const "; write_type(code, info.type); - code += ' ' + id_to_name(info.id) + " = { __" + info.unique_name + "_t, __" + info.unique_name + "_s };\n"; + code += ' ' + id_to_name(res) + " = { __" + info.unique_name + "_t, __s" + std::to_string(sampler_state_binding) + " };\n"; +#else + code += "Texture"; + code += to_digit(static_cast(tex_info.type)); + code += "D<"; + write_texture_format(code, tex_info.format); + code += "> __" + info.unique_name + "_t : register(t" + std::to_string(default_binding) + "); \n"; + + code += "SamplerState __" + info.unique_name + "_s : register(s" + std::to_string(default_binding) + "); \n"; + + write_location(code, loc); + + code += "static const "; + write_type(code, info.type); + code += ' ' + id_to_name(res) + " = { __" + info.unique_name + "_t, __" + info.unique_name + "_s };\n"; #endif } else { - info.binding = _module.num_sampler_bindings++; - info.texture_binding = ~0u; // Unset texture binding - const unsigned int texture_dimension = info.type.texture_dimension(); - code += "sampler" + std::to_string(texture_dimension) + "D __" + info.unique_name + "_s : register(s" + std::to_string(info.binding) + ");\n"; + code += "sampler"; + code += to_digit(texture_dimension); + code += "D __" + info.unique_name + "_s : register(s" + std::to_string(default_binding) + ");\n"; write_location(code, loc); code += "static const "; write_type(code, info.type); - code += ' ' + id_to_name(info.id) + " = { __" + info.unique_name + "_s, float" + std::to_string(texture_dimension) + '('; + code += ' ' + id_to_name(res) + " = { __" + info.unique_name + "_s, float" + to_digit(texture_dimension) + '('; if (tex_info.semantic.empty()) { - code += "1.0 / " + std::to_string(tex_info.width); + code += "1.0 / " + std::to_string(tex_info.width); if (texture_dimension >= 2) code += ", 1.0 / " + std::to_string(tex_info.height); if (texture_dimension >= 3) @@ -700,38 +971,38 @@ private: _module.samplers.push_back(info); - return info.id; + return res; } - id define_storage(const location &loc, const texture_info &, storage_info &info) override + id define_storage(const location &loc, const texture &, storage &info) override { - info.id = make_id(); - info.binding = ~0u; + const id res = info.id = create_block(); + define_name(res, info.unique_name); - define_name(info.id, info.unique_name); + // Default to a register index equivalent to the entry in the storage list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement) + const uint32_t default_binding = static_cast(_module.storages.size()); if (_shader_model >= 50) { - info.binding = _module.num_storage_bindings++; - - std::string &code = _blocks.at(_current_block); + std::string &code = _blocks.at(res); write_location(code, loc); +#if 0 if (_shader_model >= 60) - code += "[[vk::binding(" + std::to_string(info.binding) + ", 3)]] "; // Descriptor set 3 + code += "[[vk::binding(" + std::to_string(default_binding) + ", 3)]] "; // Descriptor set 3 +#endif write_type(code, info.type); - code += ' ' + info.unique_name + " : register(u" + std::to_string(info.binding) + ");\n"; + code += ' ' + info.unique_name + " : register(u" + std::to_string(default_binding) + ");\n"; } _module.storages.push_back(info); - return info.id; + return res; } - id define_uniform(const location &loc, uniform_info &info) override + id define_uniform(const location &loc, uniform &info) override { const id res = make_id(); - define_name(res, info.name); if (_uniforms_to_spec_constants && info.has_initializer_value) @@ -765,6 +1036,9 @@ private: if (info.type.is_array()) info.size = align_up(info.size, 16, info.type.array_length); + if (_shader_model < 40) + _module.total_uniform_size /= 4; + // Data is packed into 4-byte boundaries (see https://docs.microsoft.com/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules) // This is already guaranteed, since all types are at least 4-byte in size info.offset = _module.total_uniform_size; @@ -790,6 +1064,7 @@ private: // Simply put each uniform into a separate constant register in shader model 3 for now info.offset *= 4; + _module.total_uniform_size *= 4; } write_type(_cbuffer_block, type); @@ -814,6 +1089,14 @@ private: } id define_variable(const location &loc, const type &type, std::string name, bool global, id initializer_value) override { + // Constant variables with a constant initializer can just point to the initializer SSA variable, since they cannot be modified anyway, thus saving an unnecessary assignment + if (initializer_value != 0 && type.has(type::q_const) && + std::find_if(_constant_lookup.begin(), _constant_lookup.end(), + [initializer_value](const auto &x) { + return initializer_value == std::get<2>(x); + }) != _constant_lookup.end()) + return initializer_value; + const id res = make_id(); if (!name.empty()) @@ -826,9 +1109,6 @@ private: if (!global) code += '\t'; - if (initializer_value != 0 && type.has(type::q_const)) - code += "const "; - write_type(code, type); code += ' ' + id_to_name(res); @@ -842,42 +1122,43 @@ private: return res; } - id define_function(const location &loc, function_info &info) override + id define_function(const location &loc, function &info) override { - info.definition = make_id(); + const id res = info.id = make_id(); + define_name(res, info.unique_name); - define_name(info.definition, info.unique_name); - - std::string &code = _blocks.at(_current_block); + assert(_current_block == 0 && (_current_function_declaration.empty() || info.type != shader_type::unknown)); + std::string &code = _current_function_declaration; write_location(code, loc); write_type(code, info.return_type); - code += ' ' + id_to_name(info.definition) + '('; + code += ' ' + id_to_name(res) + '('; - for (size_t i = 0, num_params = info.parameter_list.size(); i < num_params; ++i) + for (member_type ¶m : info.parameter_list) { - auto ¶m = info.parameter_list[i]; - - param.definition = make_id(); - define_name(param.definition, param.name); + param.id = make_id(); + define_name(param.id, param.name); code += '\n'; write_location(code, param.location); code += '\t'; write_type(code, param.type); - code += ' ' + id_to_name(param.definition); + code += ' ' + id_to_name(param.id); if (param.type.is_array()) code += '[' + std::to_string(param.type.array_length) + ']'; if (!param.semantic.empty()) - code += " : " + convert_semantic(param.semantic); + code += " : " + convert_semantic(param.semantic, std::max(1u, param.type.cols / 4u) * std::max(1u, param.type.array_length)); - if (i < num_params - 1) - code += ','; + code += ','; } + // Remove trailing comma + if (!info.parameter_list.empty()) + code.pop_back(); + code += ')'; if (!info.return_semantic.empty()) @@ -885,49 +1166,54 @@ private: code += '\n'; - _functions.push_back(std::make_unique(info)); + _functions.push_back(std::make_unique(info)); + _current_function = _functions.back().get(); - return info.definition; + return res; } - void define_entry_point(function_info &func, shader_type stype, int num_threads[3]) override + void define_entry_point(function &func) override { // Modify entry point name since a new function is created for it below - if (stype == shader_type::cs) - func.unique_name = 'E' + func.unique_name + - '_' + std::to_string(num_threads[0]) + - '_' + std::to_string(num_threads[1]) + - '_' + std::to_string(num_threads[2]); - else if (_shader_model < 40) - func.unique_name = 'E' + func.unique_name; + assert(!func.unique_name.empty() && func.unique_name[0] == 'F'); + if (_shader_model < 40 || func.type == shader_type::compute) + func.unique_name[0] = 'E'; - if (const auto it = std::find_if(_module.entry_points.begin(), _module.entry_points.end(), - [&func](const auto &ep) { return ep.name == func.unique_name; }); - it != _module.entry_points.end()) + if (func.type == shader_type::compute) + func.unique_name += + '_' + std::to_string(func.num_threads[0]) + + '_' + std::to_string(func.num_threads[1]) + + '_' + std::to_string(func.num_threads[2]); + + if (std::find_if(_module.entry_points.begin(), _module.entry_points.end(), + [&func](const std::pair &entry_point) { + return entry_point.first == func.unique_name; + }) != _module.entry_points.end()) return; - _module.entry_points.push_back({ func.unique_name, stype }); + _module.entry_points.emplace_back(func.unique_name, func.type); // Only have to rewrite the entry point function signature in shader model 3 and for compute (to write "numthreads" attribute) - if (_shader_model >= 40 && stype != shader_type::cs) + if (_shader_model >= 40 && func.type != shader_type::compute) return; - auto entry_point = func; + function entry_point = func; + entry_point.referenced_functions.push_back(func.id); const auto is_color_semantic = [](const std::string &semantic) { return semantic.compare(0, 9, "SV_TARGET") == 0 || semantic.compare(0, 5, "COLOR") == 0; }; const auto is_position_semantic = [](const std::string &semantic) { return semantic == "SV_POSITION" || semantic == "POSITION"; }; - const auto ret = make_id(); + const id ret = make_id(); define_name(ret, "ret"); std::string position_variable_name; { - if (func.return_type.is_struct() && stype == shader_type::vs) + if (func.type == shader_type::vertex && func.return_type.is_struct()) { // If this function returns a struct which contains a position output, keep track of its member name - for (const struct_member_info &member : get_struct(func.return_type.definition).member_list) + for (const member_type &member : get_struct(func.return_type.struct_definition).member_list) if (is_position_semantic(member.semantic)) position_variable_name = id_to_name(ret) + '.' + member.name; } @@ -939,18 +1225,18 @@ private: } if (is_position_semantic(func.return_semantic)) { - if (stype == shader_type::vs) + if (func.type == shader_type::vertex) // Keep track of the position output variable position_variable_name = id_to_name(ret); } } - for (struct_member_info ¶m : entry_point.parameter_list) + for (member_type ¶m : entry_point.parameter_list) { - if (param.type.is_struct() && stype == shader_type::vs) + if (func.type == shader_type::vertex && param.type.is_struct()) { - for (const struct_member_info &member : get_struct(param.type.definition).member_list) + for (const member_type &member : get_struct(param.type.struct_definition).member_list) if (is_position_semantic(member.semantic)) - position_variable_name = param.name + '.' + member.name; + position_variable_name = id_to_name(param.id) + '.' + member.name; } if (is_color_semantic(param.semantic)) @@ -959,20 +1245,21 @@ private: } if (is_position_semantic(param.semantic)) { - if (stype == shader_type::vs) + if (func.type == shader_type::vertex) // Keep track of the position output variable - position_variable_name = param.name; - else if (stype == shader_type::ps) + position_variable_name = id_to_name(param.id); + else if (func.type == shader_type::pixel) // Change the position input semantic in pixel shaders param.semantic = "VPOS"; } } - if (stype == shader_type::cs) - _blocks.at(_current_block) += "[numthreads(" + - std::to_string(num_threads[0]) + ", " + - std::to_string(num_threads[1]) + ", " + - std::to_string(num_threads[2]) + ")]\n"; + assert(_current_function_declaration.empty()); + if (func.type == shader_type::compute) + _current_function_declaration += "[numthreads(" + + std::to_string(func.num_threads[0]) + ", " + + std::to_string(func.num_threads[1]) + ", " + + std::to_string(func.num_threads[2]) + ")]\n"; define_function({}, entry_point); enter_block(create_block()); @@ -980,10 +1267,10 @@ private: std::string &code = _blocks.at(_current_block); // Clear all color output parameters so no component is left uninitialized - for (struct_member_info ¶m : entry_point.parameter_list) + for (const member_type ¶m : entry_point.parameter_list) { if (is_color_semantic(param.semantic)) - code += '\t' + param.name + " = float4(0.0, 0.0, 0.0, 0.0);\n"; + code += '\t' + id_to_name(param.id) + " = float4(0.0, 0.0, 0.0, 0.0);\n"; } code += '\t'; @@ -998,29 +1285,34 @@ private: } // Call the function this entry point refers to - code += id_to_name(func.definition) + '('; + code += id_to_name(func.id) + '('; - for (size_t i = 0, num_params = func.parameter_list.size(); i < num_params; ++i) + for (size_t i = 0; i < func.parameter_list.size(); ++i) { - code += func.parameter_list[i].name; + code += id_to_name(entry_point.parameter_list[i].id); - if (is_color_semantic(func.parameter_list[i].semantic)) + const member_type ¶m = func.parameter_list[i]; + + if (is_color_semantic(param.semantic)) { code += '.'; - for (unsigned int k = 0; k < func.parameter_list[i].type.rows; k++) - code += "xyzw"[k]; + for (unsigned int c = 0; c < param.type.rows; c++) + code += "xyzw"[c]; } - if (i < num_params - 1) - code += ", "; + code += ", "; } + // Remove trailing ", " + if (!entry_point.parameter_list.empty()) + code.erase(code.size() - 2); + code += ')'; // Cast the output value to a four-component vector if (is_color_semantic(func.return_semantic)) { - for (unsigned int i = 0; i < 4 - func.return_type.rows; i++) + for (unsigned int c = 0; c < (4 - func.return_type.rows); c++) code += ", 0.0"; code += ')'; } @@ -1028,7 +1320,7 @@ private: code += ";\n"; // Shift everything by half a viewport pixel to workaround the different half-pixel offset in D3D9 (https://aras-p.info/blog/2016/04/08/solving-dx9-half-pixel-offset/) - if (!position_variable_name.empty() && stype == shader_type::vs) // Check if we are in a vertex shader definition + if (func.type == shader_type::vertex && !position_variable_name.empty()) // Check if we are in a vertex shader definition code += '\t' + position_variable_name + ".xy += __TEXEL_SIZE__ * " + position_variable_name + ".ww;\n"; leave_block_and_return(func.return_type.is_void() ? 0 : ret); @@ -1053,7 +1345,7 @@ private: std::string type, expr_code = id_to_name(exp.base); - for (const auto &op : exp.chain) + for (const expression::operation &op : exp.chain) { switch (op.op) { @@ -1065,7 +1357,7 @@ private: break; case expression::operation::op_member: expr_code += '.'; - expr_code += get_struct(op.from.definition).member_list[op.index].name; + expr_code += get_struct(op.from.struct_definition).member_list[op.index].name; break; case expression::operation::op_dynamic_index: expr_code += '[' + id_to_name(op.index) + ']'; @@ -1079,7 +1371,7 @@ private: break; case expression::operation::op_swizzle: expr_code += '.'; - for (unsigned int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) + for (int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) if (op.from.is_matrix()) expr_code += s_matrix_swizzles[op.swizzle[i]]; else @@ -1120,13 +1412,13 @@ private: "_m30", "_m31", "_m32", "_m33" }; - for (const auto &op : exp.chain) + for (const expression::operation &op : exp.chain) { switch (op.op) { case expression::operation::op_member: code += '.'; - code += get_struct(op.from.definition).member_list[op.index].name; + code += get_struct(op.from.struct_definition).member_list[op.index].name; break; case expression::operation::op_dynamic_index: code += '[' + id_to_name(op.index) + ']'; @@ -1136,7 +1428,7 @@ private: break; case expression::operation::op_swizzle: code += '.'; - for (unsigned int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) + for (int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) if (op.from.is_matrix()) code += s_matrix_swizzles[op.swizzle[i]]; else @@ -1148,30 +1440,44 @@ private: code += " = " + id_to_name(value) + ";\n"; } - id emit_constant(const type &type, const constant &data) override + id emit_constant(const type &data_type, const constant &data) override { const id res = make_id(); - if (type.is_array()) + if (data_type.is_array()) { - assert(type.has(type::q_const)); + assert(data_type.has(type::q_const)); - std::string &code = _blocks.at(_current_block); + if (const auto it = std::find_if(_constant_lookup.begin(), _constant_lookup.end(), + [&data_type, &data](const std::tuple &x) { + if (!(std::get<0>(x) == data_type && std::memcmp(&std::get<1>(x).as_uint[0], &data.as_uint[0], sizeof(uint32_t) * 16) == 0 && std::get<1>(x).array_data.size() == data.array_data.size())) + return false; + for (size_t i = 0; i < data.array_data.size(); ++i) + if (std::memcmp(&std::get<1>(x).array_data[i].as_uint[0], &data.array_data[i].as_uint[0], sizeof(uint32_t) * 16) != 0) + return false; + return true; + }); + it != _constant_lookup.end()) + return std::get<2>(*it); // Reuse existing constant instead of duplicating the definition + else + _constant_lookup.push_back({ data_type, data, res }); + + // Put constant variable into global scope, so that it can be reused in different blocks + std::string &code = _blocks.at(0); // Array constants need to be stored in a constant variable as they cannot be used in-place - code += '\t'; - code += "const "; - write_type(code, type); + code += "static const "; + write_type(code, data_type); code += ' ' + id_to_name(res); - code += '[' + std::to_string(type.array_length) + ']'; + code += '[' + std::to_string(data_type.array_length) + ']'; code += " = "; - write_constant(code, type, data); + write_constant(code, data_type, data); code += ";\n"; return res; } std::string code; - write_constant(code, type, data); + write_constant(code, data_type, data); define_name(res, std::move(code)); return res; @@ -1357,14 +1663,16 @@ private: code += id_to_name(function) + '('; - for (size_t i = 0, num_args = args.size(); i < num_args; ++i) + for (const expression &arg : args) { - code += id_to_name(args[i].base); - - if (i < num_args - 1) - code += ", "; + code += id_to_name(arg.base); + code += ", "; } + // Remove trailing ", " + if (!args.empty()) + code.erase(code.size() - 2); + code += ");\n"; return res; @@ -1390,17 +1698,7 @@ private: code += '\t'; - if (_shader_model >= 40 && ( - (intrinsic >= tex1Dsize0 && intrinsic <= tex3Dsize2) || - (intrinsic >= atomicAdd0 && intrinsic <= atomicCompareExchange1) || - (!(res_type.is_floating_point() || _shader_model >= 67) && (intrinsic >= tex1D0 && intrinsic <= tex3Dlod1)))) - { - // Implementation of the 'tex2Dsize' intrinsic passes the result variable into 'GetDimensions' as output argument - // Same with the atomic intrinsics, which use the last parameter to return the previous value of the target - write_type(code, res_type); - code += ' ' + id_to_name(res) + "; "; - } - else if (!res_type.is_void()) + if (!res_type.is_void()) { write_type(code, res_type); code += ' ' + id_to_name(res) + " = "; @@ -1418,11 +1716,11 @@ private: return res; } - id emit_construct(const location &loc, const type &type, const std::vector &args) override + id emit_construct(const location &loc, const type &res_type, const std::vector &args) override { #ifndef NDEBUG - for (const auto &arg : args) - assert((arg.type.is_scalar() || type.is_array()) && arg.chain.empty() && arg.base != 0); + for (const expression &arg : args) + assert((arg.type.is_scalar() || res_type.is_array()) && arg.chain.empty() && arg.base != 0); #endif const id res = make_id(); @@ -1432,28 +1730,30 @@ private: write_location(code, loc); code += '\t'; - write_type(code, type); + write_type(code, res_type); code += ' ' + id_to_name(res); - if (type.is_array()) - code += '[' + std::to_string(type.array_length) + ']'; + if (res_type.is_array()) + code += '[' + std::to_string(res_type.array_length) + ']'; code += " = "; - if (type.is_array()) + if (res_type.is_array()) code += "{ "; else - write_type(code, type), code += '('; + write_type(code, res_type), code += '('; - for (size_t i = 0, num_args = args.size(); i < num_args; ++i) + for (const expression &arg : args) { - code += id_to_name(args[i].base); - - if (i < num_args - 1) - code += ", "; + code += id_to_name(arg.base); + code += ", "; } - if (type.is_array()) + // Remove trailing ", " + if (!args.empty()) + code.erase(code.size() - 2); + + if (res_type.is_array()) code += " }"; else code += ')'; @@ -1500,7 +1800,7 @@ private: _blocks.erase(true_statement_block); _blocks.erase(false_statement_block); } - id emit_phi(const location &loc, id condition_value, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &type) override + id emit_phi(const location &loc, id condition_value, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &res_type) override { assert(condition_value != 0 && condition_block != 0 && true_value != 0 && true_statement_block != 0 && false_value != 0 && false_statement_block != 0); @@ -1517,7 +1817,7 @@ private: code += _blocks.at(condition_block); code += '\t'; - write_type(code, type); + write_type(code, res_type); code += ' ' + id_to_name(res) + ";\n"; write_location(code, loc); @@ -1564,8 +1864,8 @@ private: if (condition_block == 0) { // Convert the last SSA variable initializer to an assignment statement - auto pos_assign = continue_data.rfind(condition_name); - auto pos_prev_assign = continue_data.rfind('\t', pos_assign); + const size_t pos_assign = continue_data.rfind(condition_name); + const size_t pos_prev_assign = continue_data.rfind('\t', pos_assign); continue_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1); // We need to add the continue block to all "continue" statements as well @@ -1594,7 +1894,7 @@ private: // Check 'condition_name' instead of 'condition_value' here to also catch cases where a constant boolean expression was passed in as loop condition bool use_break_statement_for_condition = (_shader_model < 40 && condition_name != "true") && std::find_if(_module.uniforms.begin(), _module.uniforms.end(), - [&](const uniform_info &info) { + [&](const uniform &info) { return condition_data.find(info.name) != std::string::npos || condition_name.find(info.name) != std::string::npos; }) != _module.uniforms.end(); @@ -1602,9 +1902,9 @@ private: if (!use_break_statement_for_condition && std::count(condition_data.begin(), condition_data.end(), '\n') == 1) { // Convert SSA variable initializer back to a condition expression - auto pos_assign = condition_data.find('='); + const size_t pos_assign = condition_data.find('='); condition_data.erase(0, pos_assign + 2); - auto pos_semicolon = condition_data.rfind(';'); + const size_t pos_semicolon = condition_data.rfind(';'); condition_data.erase(pos_semicolon); condition_name = std::move(condition_data); @@ -1617,8 +1917,8 @@ private: increase_indentation_level(condition_data); // Convert the last SSA variable initializer to an assignment statement - auto pos_assign = condition_data.rfind(condition_name); - auto pos_prev_assign = condition_data.rfind('\t', pos_assign); + const size_t pos_assign = condition_data.rfind(condition_name); + const size_t pos_prev_assign = condition_data.rfind('\t', pos_assign); condition_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1); } @@ -1810,7 +2110,7 @@ private: code += "\tdiscard;\n"; - const auto &return_type = _functions.back()->return_type; + const type &return_type = _current_function->return_type; if (!return_type.is_void()) { // HLSL compiler doesn't handle discard like a shader kill @@ -1829,7 +2129,7 @@ private: return 0; // Skip implicit return statement - if (!_functions.back()->return_type.is_void() && value == 0) + if (!_current_function->return_type.is_void() && value == 0) return set_block(0); std::string &code = _blocks.at(_current_block); @@ -1878,9 +2178,12 @@ private: } void leave_function() override { - assert(_last_block != 0); + assert(_current_function != nullptr && _last_block != 0); - _blocks.at(0) += "{\n" + _blocks.at(_last_block) + "}\n"; + _blocks.emplace(_current_function->id, _current_function_declaration + "{\n" + _blocks.at(_last_block) + "}\n"); + + _current_function = nullptr; + _current_function_declaration.clear(); } }; diff --git a/dep/reshadefx/src/effect_codegen_spirv.cpp b/dep/reshadefx/src/effect_codegen_spirv.cpp index 2695b970a..e95c78202 100644 --- a/dep/reshadefx/src/effect_codegen_spirv.cpp +++ b/dep/reshadefx/src/effect_codegen_spirv.cpp @@ -6,8 +6,9 @@ #include "effect_parser.hpp" #include "effect_codegen.hpp" #include -#include // memcmp -#include // std::find_if, std::max +#include // std::memcmp +#include // std::from_chars +#include // std::find_if, std::max, std::sort #include // Use the C++ variant of the SPIR-V headers @@ -18,6 +19,12 @@ namespace spv { using namespace reshadefx; +inline uint32_t align_up(uint32_t size, uint32_t alignment) +{ + alignment -= 1; + return ((size + alignment) & ~alignment); +} + /// /// A single instruction in a SPIR-V module /// @@ -70,7 +77,7 @@ struct spirv_instruction /// Write this instruction to a SPIR-V module. /// /// The output stream to append this instruction to. - void write(std::vector &output) const + void write(std::basic_string &output) const { // See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html // 0 | Opcode: The 16 high-order bits are the WordCount of the instruction. The 16 low-order bits are the opcode enumerant. @@ -81,19 +88,32 @@ struct spirv_instruction // ... | ... // WordCount - 1 | Operand N (N is determined by WordCount minus the 1 to 3 words used for the opcode, instruction type , and instruction Result ). - const uint32_t num_words = 1 + (type != 0) + (result != 0) + static_cast(operands.size()); - output.push_back((num_words << spv::WordCountShift) | op); + const uint32_t word_count = 1 + (type != 0) + (result != 0) + static_cast(operands.size()); + write_word(output, (word_count << spv::WordCountShift) | op); // Optional instruction type ID if (type != 0) - output.push_back(type); + write_word(output, type); // Optional instruction result ID if (result != 0) - output.push_back(result); + write_word(output, result); // Write out the operands - output.insert(output.end(), operands.begin(), operands.end()); + for (const uint32_t operand : operands) + write_word(output, operand); + } + + static void write_word(std::basic_string &output, uint32_t word) + { + output.insert(output.end(), reinterpret_cast(&word), reinterpret_cast(&word + 1)); + } + + operator uint32_t() const + { + assert(result != 0); + + return result; } }; @@ -115,9 +135,15 @@ struct spirv_basic_block class codegen_spirv final : public codegen { + static_assert(sizeof(id) == sizeof(spv::Id), "unexpected SPIR-V id type size"); + public: - codegen_spirv(bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types, bool flip_vert_y) - : _debug_info(debug_info), _vulkan_semantics(vulkan_semantics), _uniforms_to_spec_constants(uniforms_to_spec_constants), _enable_16bit_types(enable_16bit_types), _flip_vert_y(flip_vert_y) + codegen_spirv(bool vulkan_semantics, bool debug_info, bool uniforms_to_spec_constants, bool enable_16bit_types, bool flip_vert_y) : + _debug_info(debug_info), + _vulkan_semantics(vulkan_semantics), + _uniforms_to_spec_constants(uniforms_to_spec_constants), + _enable_16bit_types(enable_16bit_types), + _flip_vert_y(flip_vert_y) { _glsl_ext = make_id(); } @@ -140,9 +166,8 @@ private: spirv_basic_block declaration; spirv_basic_block variables; spirv_basic_block definition; - type return_type; - std::vector param_types; - bool is_entry_point = false; + reshadefx::type return_type; + std::vector param_types; friend bool operator==(const function_blocks &lhs, const function_blocks &rhs) { @@ -155,6 +180,12 @@ private: } }; + bool _debug_info = false; + bool _vulkan_semantics = false; + bool _uniforms_to_spec_constants = false; + bool _enable_16bit_types = false; + bool _flip_vert_y = false; + spirv_basic_block _entries; spirv_basic_block _execution_modes; spirv_basic_block _debug_a; @@ -163,8 +194,16 @@ private: spirv_basic_block _types_and_constants; spirv_basic_block _variables; - std::unordered_set _spec_constants; - std::unordered_set _capabilities; + std::vector _functions_blocks; + std::unordered_map _block_data; + spirv_basic_block *_current_block_data = nullptr; + + spv::Id _glsl_ext = 0; + spv::Id _global_ubo_type = 0; + spv::Id _global_ubo_variable = 0; + std::vector _global_ubo_types; + function_blocks *_current_function_blocks = nullptr; + std::vector> _type_lookup; std::vector> _constant_lookup; std::vector> _function_type_lookup; @@ -172,22 +211,10 @@ private: std::unordered_map> _storage_lookup; std::unordered_map _semantic_to_location; - std::vector _functions_blocks; - std::unordered_map _block_data; - spirv_basic_block *_current_block_data = nullptr; + std::unordered_set _spec_constants; + std::unordered_set _capabilities; - bool _debug_info = false; - bool _vulkan_semantics = false; - bool _uniforms_to_spec_constants = false; - bool _enable_16bit_types = false; - bool _flip_vert_y = false; - id _glsl_ext = 0; - id _global_ubo_type = 0; - id _global_ubo_variable = 0; - std::vector _global_ubo_types; - function_blocks *_current_function = nullptr; - - inline void add_location(const location &loc, spirv_basic_block &block) + void add_location(const location &loc, spirv_basic_block &block) { if (loc.source.empty() || !_debug_info) return; @@ -196,10 +223,14 @@ private: if (const auto it = _string_lookup.find(loc.source); it != _string_lookup.end()) + { file = it->second; - else { - add_instruction(spv::OpString, 0, _debug_a, file) - .add_string(loc.source.c_str()); + } + else + { + file = + add_instruction(spv::OpString, 0, _debug_a) + .add_string(loc.source.c_str()); _string_lookup.emplace(loc.source, file); } @@ -209,69 +240,45 @@ private: .add(loc.line) .add(loc.column); } - inline spirv_instruction &add_instruction(spv::Op op, spv::Id type = 0) + spirv_instruction &add_instruction(spv::Op op, spv::Id type = 0) { assert(is_in_function() && is_in_block()); + return add_instruction(op, type, *_current_block_data); } - inline spirv_instruction &add_instruction(spv::Op op, spv::Id type, spirv_basic_block &block) + spirv_instruction &add_instruction(spv::Op op, spv::Id type, spirv_basic_block &block) { spirv_instruction &instruction = add_instruction_without_result(op, block); instruction.type = type; instruction.result = make_id(); return instruction; } - inline spirv_instruction &add_instruction(spv::Op op, spv::Id type, spirv_basic_block &block, spv::Id &result) - { - spirv_instruction &instruction = add_instruction_without_result(op, block); - instruction.type = type; - instruction.result = result = make_id(); - return instruction; - } - inline spirv_instruction &add_instruction_without_result(spv::Op op) + spirv_instruction &add_instruction_without_result(spv::Op op) { assert(is_in_function() && is_in_block()); + return add_instruction_without_result(op, *_current_block_data); } - inline spirv_instruction &add_instruction_without_result(spv::Op op, spirv_basic_block &block) + spirv_instruction &add_instruction_without_result(spv::Op op, spirv_basic_block &block) { return block.instructions.emplace_back(op); } - void write_result(module &module) override + void finalize_header_section(std::basic_string &spirv) const { - // First initialize the UBO type now that all member types are known - if (_global_ubo_type != 0) - { - spirv_instruction &type_inst = add_instruction_without_result(spv::OpTypeStruct, _types_and_constants); - type_inst.add(_global_ubo_types.begin(), _global_ubo_types.end()); - type_inst.result = _global_ubo_type; - - spirv_instruction &variable_inst = add_instruction_without_result(spv::OpVariable, _variables); - variable_inst.add(spv::StorageClassUniform); - variable_inst.type = convert_type({ type::t_struct, 0, 0, type::q_uniform, 0, _global_ubo_type }, true, spv::StorageClassUniform); - variable_inst.result = _global_ubo_variable; - - add_name(variable_inst.result, "$Globals"); - } - - module = std::move(_module); - - std::vector spirv; - // Write SPIRV header info - spirv.push_back(spv::MagicNumber); - spirv.push_back(0x10300); // Force SPIR-V 1.3 - spirv.push_back(0u); // Generator magic number, see https://www.khronos.org/registry/spir-v/api/spir-v.xml - spirv.push_back(_next_id); // Maximum ID - spirv.push_back(0u); // Reserved for instruction schema + spirv_instruction::write_word(spirv, spv::MagicNumber); + spirv_instruction::write_word(spirv, 0x10300); // Force SPIR-V 1.3 + spirv_instruction::write_word(spirv, 0u); // Generator magic number, see https://www.khronos.org/registry/spir-v/api/spir-v.xml + spirv_instruction::write_word(spirv, _next_id); // Maximum ID + spirv_instruction::write_word(spirv, 0u); // Reserved for instruction schema // All capabilities spirv_instruction(spv::OpCapability) .add(spv::CapabilityShader) // Implicitly declares the Matrix capability too .write(spirv); - for (spv::Capability capability : _capabilities) + for (const spv::Capability capability : _capabilities) spirv_instruction(spv::OpCapability) .add(capability) .write(spirv); @@ -286,15 +293,9 @@ private: .add(spv::AddressingModelLogical) .add(spv::MemoryModelGLSL450) .write(spirv); - - // All entry point declarations - for (const auto &node : _entries.instructions) - node.write(spirv); - - // All execution mode declarations - for (const auto &node : _execution_modes.instructions) - node.write(spirv); - + } + void finalize_debug_info_section(std::basic_string &spirv) const + { spirv_instruction(spv::OpSource) .add(spv::SourceLanguageUnknown) // ReShade FX is not a reserved token at the moment .add(0) // Language version, TODO: Maybe fill in ReShade version here? @@ -303,42 +304,209 @@ private: if (_debug_info) { // All debug instructions - for (const auto &node : _debug_a.instructions) - node.write(spirv); - for (const auto &node : _debug_b.instructions) - node.write(spirv); + for (const spirv_instruction &inst : _debug_a.instructions) + inst.write(spirv); + for (const spirv_instruction &inst : _debug_b.instructions) + inst.write(spirv); } + } + void finalize_type_and_constants_section(std::basic_string &spirv) const + { + // All type declarations + for (const spirv_instruction &inst : _types_and_constants.instructions) + inst.write(spirv); + + // Initialize the UBO type now that all member types are known + if (_global_ubo_type == 0 || _global_ubo_variable == 0) + return; + + const id global_ubo_type_ptr = _global_ubo_type + 1; + + spirv_instruction(spv::OpTypeStruct, _global_ubo_type) + .add(_global_ubo_types.begin(), _global_ubo_types.end()) + .write(spirv); + spirv_instruction(spv::OpTypePointer, global_ubo_type_ptr) + .add(spv::StorageClassUniform) + .add(_global_ubo_type) + .write(spirv); + + spirv_instruction(spv::OpVariable, global_ubo_type_ptr, _global_ubo_variable) + .add(spv::StorageClassUniform) + .write(spirv); + } + + std::basic_string finalize_code() const override + { + std::basic_string spirv; + finalize_header_section(spirv); + + // All entry point declarations + for (const spirv_instruction &inst : _entries.instructions) + inst.write(spirv); + + // All execution mode declarations + for (const spirv_instruction &inst : _execution_modes.instructions) + inst.write(spirv); + + finalize_debug_info_section(spirv); // All annotation instructions - for (const auto &node : _annotations.instructions) - node.write(spirv); + for (const spirv_instruction &inst : _annotations.instructions) + inst.write(spirv); - // All type declarations - for (const auto &node : _types_and_constants.instructions) - node.write(spirv); - for (const auto &node : _variables.instructions) - node.write(spirv); + finalize_type_and_constants_section(spirv); + + for (const spirv_instruction &inst : _variables.instructions) + inst.write(spirv); // All function definitions - for (const auto &function : _functions_blocks) + for (const function_blocks &func : _functions_blocks) { - if (function.definition.instructions.empty()) + if (func.definition.instructions.empty()) continue; - for (const auto &node : function.declaration.instructions) - node.write(spirv); + for (const spirv_instruction &inst : func.declaration.instructions) + inst.write(spirv); // Grab first label and move it in front of variable declarations - function.definition.instructions.front().write(spirv); - assert(function.definition.instructions.front().op == spv::OpLabel); + func.definition.instructions.front().write(spirv); + assert(func.definition.instructions.front().op == spv::OpLabel); - for (const auto &node : function.variables.instructions) - node.write(spirv); - for (auto it = function.definition.instructions.begin() + 1; it != function.definition.instructions.end(); ++it) - it->write(spirv); + for (const spirv_instruction &inst : func.variables.instructions) + inst.write(spirv); + for (auto inst_it = func.definition.instructions.begin() + 1; inst_it != func.definition.instructions.end(); ++inst_it) + inst_it->write(spirv); } - module.code.assign(reinterpret_cast(spirv.data()), reinterpret_cast(spirv.data() + spirv.size())); + return spirv; + } + std::basic_string finalize_code_for_entry_point(const std::string &entry_point_name) const override + { + const auto entry_point_it = std::find_if(_functions.begin(), _functions.end(), + [&entry_point_name](const std::unique_ptr &func) { + return func->unique_name == entry_point_name; + }); + if (entry_point_it == _functions.end()) + return {}; + const function &entry_point = *entry_point_it->get(); + + // Build list of IDs to remove + std::vector variables_to_remove; +#if 1 + std::vector functions_to_remove; +#else + for (const sampler &info : _module.samplers) + if (std::find(entry_point.referenced_samplers.begin(), entry_point.referenced_samplers.end(), info.id) == entry_point.referenced_samplers.end()) + variables_to_remove.push_back(info.id); + for (const storage &info : _module.storages) + if (std::find(entry_point.referenced_storages.begin(), entry_point.referenced_storages.end(), info.id) == entry_point.referenced_storages.end()) + variables_to_remove.push_back(info.id); +#endif + + std::basic_string spirv; + finalize_header_section(spirv); + + // The entry point and execution mode declaration + for (const spirv_instruction &inst : _entries.instructions) + { + assert(inst.op == spv::OpEntryPoint); + + // Only add the matching entry point + if (inst.operands[1] == entry_point.id) + { + inst.write(spirv); + } + else + { +#if 1 + functions_to_remove.push_back(inst.operands[1]); +#endif + // Add interface variables to list of variables to remove + for (uint32_t k = 2 + static_cast((std::strlen(reinterpret_cast(&inst.operands[2])) + 4) / 4); k < inst.operands.size(); ++k) + variables_to_remove.push_back(inst.operands[k]); + } + } + + for (const spirv_instruction &inst : _execution_modes.instructions) + { + assert(inst.op == spv::OpExecutionMode); + + // Only add execution mode for the matching entry point + if (inst.operands[0] == entry_point.id) + { + inst.write(spirv); + } + } + + finalize_debug_info_section(spirv); + + // All annotation instructions + for (spirv_instruction inst : _annotations.instructions) + { + if (inst.op == spv::OpDecorate) + { + // Remove all decorations targeting any of the interface variables for non-matching entry points + if (std::find(variables_to_remove.begin(), variables_to_remove.end(), inst.operands[0]) != variables_to_remove.end()) + continue; + + // Replace bindings + if (inst.operands[1] == spv::DecorationBinding) + { + if (const auto referenced_sampler_it = std::find(entry_point.referenced_samplers.begin(), entry_point.referenced_samplers.end(), inst.operands[0]); + referenced_sampler_it != entry_point.referenced_samplers.end()) + inst.operands[2] = static_cast(std::distance(entry_point.referenced_samplers.begin(), referenced_sampler_it)); + else + if (const auto referenced_storage_it = std::find(entry_point.referenced_storages.begin(), entry_point.referenced_storages.end(), inst.operands[0]); + referenced_storage_it != entry_point.referenced_storages.end()) + inst.operands[2] = static_cast(std::distance(entry_point.referenced_storages.begin(), referenced_storage_it)); + } + } + + inst.write(spirv); + } + + finalize_type_and_constants_section(spirv); + + for (const spirv_instruction &inst : _variables.instructions) + { + // Remove all declarations of the interface variables for non-matching entry points + if (inst.op == spv::OpVariable && std::find(variables_to_remove.begin(), variables_to_remove.end(), inst.result) != variables_to_remove.end()) + continue; + + inst.write(spirv); + } + + // All referenced function definitions + for (const function_blocks &func : _functions_blocks) + { + if (func.definition.instructions.empty()) + continue; + + assert(func.declaration.instructions[_debug_info ? 1 : 0].op == spv::OpFunction); + const spv::Id definition = func.declaration.instructions[_debug_info ? 1 : 0].result; + +#if 1 + if (std::find(functions_to_remove.begin(), functions_to_remove.end(), definition) != functions_to_remove.end()) +#else + if (struct_definition != entry_point.struct_definition && + entry_point.referenced_functions.find(struct_definition) == entry_point.referenced_functions.end()) +#endif + continue; + + for (const spirv_instruction &inst : func.declaration.instructions) + inst.write(spirv); + + // Grab first label and move it in front of variable declarations + func.definition.instructions.front().write(spirv); + assert(func.definition.instructions.front().op == spv::OpLabel); + + for (const spirv_instruction &inst : func.variables.instructions) + inst.write(spirv); + for (auto inst_it = func.definition.instructions.begin() + 1; inst_it != func.definition.instructions.end(); ++inst_it) + inst_it->write(spirv); + } + + return spirv; } spv::Id convert_type(type info, bool is_ptr = false, spv::StorageClass storage = spv::StorageClassFunction, spv::ImageFormat format = spv::ImageFormatUnknown, uint32_t array_stride = 0) @@ -363,66 +531,69 @@ private: const type_lookup lookup { info, is_ptr, array_stride, { storage, format } }; - if (const auto it = std::find_if(_type_lookup.begin(), _type_lookup.end(), - [&lookup](const auto &lookup_it) { return lookup_it.first == lookup; }); - it != _type_lookup.end()) - return it->second; + if (const auto lookup_it = std::find_if(_type_lookup.begin(), _type_lookup.end(), + [&lookup](const std::pair &lookup_entry) { return lookup_entry.first == lookup; }); + lookup_it != _type_lookup.end()) + return lookup_it->second; - spv::Id type, elem_type; + spv::Id type_id, elem_type_id; if (is_ptr) { - elem_type = convert_type(info, false, storage, format, array_stride); - - add_instruction(spv::OpTypePointer, 0, _types_and_constants, type) - .add(storage) - .add(elem_type); + elem_type_id = convert_type(info, false, storage, format, array_stride); + type_id = + add_instruction(spv::OpTypePointer, 0, _types_and_constants) + .add(storage) + .add(elem_type_id); } else if (info.is_array()) { - auto elem_info = info; + type elem_info = info; elem_info.array_length = 0; + elem_type_id = convert_type(elem_info, false, storage, format); + // Make sure we don't get any dynamic arrays here - assert(info.array_length > 0); + assert(info.is_bounded_array()); - elem_type = convert_type(elem_info, false, storage, format); - const spv::Id array_length = emit_constant(info.array_length); + const spv::Id array_length_id = emit_constant(info.array_length); - add_instruction(spv::OpTypeArray, 0, _types_and_constants, type) - .add(elem_type) - .add(array_length); + type_id = + add_instruction(spv::OpTypeArray, 0, _types_and_constants) + .add(elem_type_id) + .add(array_length_id); if (array_stride != 0) - add_decoration(type, spv::DecorationArrayStride, { array_stride }); + add_decoration(type_id, spv::DecorationArrayStride, { array_stride }); } else if (info.is_matrix()) { // Convert MxN matrix to a SPIR-V matrix with M vectors with N elements - auto elem_info = info; + type elem_info = info; elem_info.rows = info.cols; elem_info.cols = 1; - elem_type = convert_type(elem_info, false, storage, format); + elem_type_id = convert_type(elem_info, false, storage, format); // Matrix types with just one row are interpreted as if they were a vector type if (info.rows == 1) - return elem_type; + return elem_type_id; - add_instruction(spv::OpTypeMatrix, 0, _types_and_constants, type) - .add(elem_type) - .add(info.rows); + type_id = + add_instruction(spv::OpTypeMatrix, 0, _types_and_constants) + .add(elem_type_id) + .add(info.rows); } else if (info.is_vector()) { - auto elem_info = info; + type elem_info = info; elem_info.rows = 1; elem_info.cols = 1; - elem_type = convert_type(elem_info, false, storage, format); - - add_instruction(spv::OpTypeVector, 0, _types_and_constants, type) - .add(elem_type) - .add(info.rows); + elem_type_id = convert_type(elem_info, false, storage, format); + type_id = + add_instruction(spv::OpTypeVector, 0, _types_and_constants) + .add(elem_type_id) + .add(info.rows); } else { @@ -430,58 +601,64 @@ private: { case type::t_void: assert(info.rows == 0 && info.cols == 0); - add_instruction(spv::OpTypeVoid, 0, _types_and_constants, type); + type_id = add_instruction(spv::OpTypeVoid, 0, _types_and_constants); break; case type::t_bool: assert(info.rows == 1 && info.cols == 1); - add_instruction(spv::OpTypeBool, 0, _types_and_constants, type); + type_id = add_instruction(spv::OpTypeBool, 0, _types_and_constants); break; case type::t_min16int: assert(_enable_16bit_types && info.rows == 1 && info.cols == 1); add_capability(spv::CapabilityInt16); if (storage == spv::StorageClassInput || storage == spv::StorageClassOutput) add_capability(spv::CapabilityStorageInputOutput16); - add_instruction(spv::OpTypeInt, 0, _types_and_constants, type) - .add(16) // Width - .add(1); // Signedness + type_id = + add_instruction(spv::OpTypeInt, 0, _types_and_constants) + .add(16) // Width + .add(1); // Signedness break; case type::t_int: assert(info.rows == 1 && info.cols == 1); - add_instruction(spv::OpTypeInt, 0, _types_and_constants, type) - .add(32) // Width - .add(1); // Signedness + type_id = + add_instruction(spv::OpTypeInt, 0, _types_and_constants) + .add(32) // Width + .add(1); // Signedness break; case type::t_min16uint: assert(_enable_16bit_types && info.rows == 1 && info.cols == 1); add_capability(spv::CapabilityInt16); if (storage == spv::StorageClassInput || storage == spv::StorageClassOutput) add_capability(spv::CapabilityStorageInputOutput16); - add_instruction(spv::OpTypeInt, 0, _types_and_constants, type) - .add(16) // Width - .add(0); // Signedness + type_id = + add_instruction(spv::OpTypeInt, 0, _types_and_constants) + .add(16) // Width + .add(0); // Signedness break; case type::t_uint: assert(info.rows == 1 && info.cols == 1); - add_instruction(spv::OpTypeInt, 0, _types_and_constants, type) - .add(32) // Width - .add(0); // Signedness + type_id = + add_instruction(spv::OpTypeInt, 0, _types_and_constants) + .add(32) // Width + .add(0); // Signedness break; case type::t_min16float: assert(_enable_16bit_types && info.rows == 1 && info.cols == 1); add_capability(spv::CapabilityFloat16); if (storage == spv::StorageClassInput || storage == spv::StorageClassOutput) add_capability(spv::CapabilityStorageInputOutput16); - add_instruction(spv::OpTypeFloat, 0, _types_and_constants, type) - .add(16); // Width + type_id = + add_instruction(spv::OpTypeFloat, 0, _types_and_constants) + .add(16); // Width break; case type::t_float: assert(info.rows == 1 && info.cols == 1); - add_instruction(spv::OpTypeFloat, 0, _types_and_constants, type) - .add(32); // Width + type_id = + add_instruction(spv::OpTypeFloat, 0, _types_and_constants) + .add(32); // Width break; case type::t_struct: - assert(info.rows == 0 && info.cols == 0 && info.definition != 0); - type = info.definition; + assert(info.rows == 0 && info.cols == 0 && info.struct_definition != 0); + type_id = info.struct_definition; break; case type::t_sampler1d_int: case type::t_sampler1d_uint: @@ -494,9 +671,10 @@ private: case type::t_sampler3d_int: case type::t_sampler3d_uint: case type::t_sampler3d_float: - elem_type = convert_image_type(info, format); - add_instruction(spv::OpTypeSampledImage, 0, _types_and_constants, type) - .add(elem_type); + elem_type_id = convert_image_type(info, format); + type_id = + add_instruction(spv::OpTypeSampledImage, 0, _types_and_constants) + .add(elem_type_id); break; case type::t_storage1d_int: case type::t_storage1d_uint: @@ -514,42 +692,41 @@ private: add_capability(spv::CapabilityStorageImageWriteWithoutFormat); return convert_image_type(info, format); default: - return assert(false), 0; + assert(false); + return 0; } } - _type_lookup.push_back({ lookup, type }); + _type_lookup.push_back({ lookup, type_id }); - return type; + return type_id; } spv::Id convert_type(const function_blocks &info) { - if (const auto it = std::find_if(_function_type_lookup.begin(), _function_type_lookup.end(), - [&lookup = info](const auto &lookup_it) { return lookup_it.first == lookup; }); - it != _function_type_lookup.end()) - return it->second; + if (const auto lookup_it = std::find_if(_function_type_lookup.begin(), _function_type_lookup.end(), + [&lookup = info](const std::pair &lookup_entry) { return lookup_entry.first == lookup; }); + lookup_it != _function_type_lookup.end()) + return lookup_it->second; - auto return_type = convert_type(info.return_type); - assert(return_type != 0); + const spv::Id return_type_id = convert_type(info.return_type); + assert(return_type_id != 0); std::vector param_type_ids; param_type_ids.reserve(info.param_types.size()); for (const type ¶m_type : info.param_types) param_type_ids.push_back(convert_type(param_type, true)); - spirv_instruction &inst = add_instruction(spv::OpTypeFunction, 0, _types_and_constants); - inst.add(return_type); - inst.add(param_type_ids.begin(), param_type_ids.end()); + spirv_instruction &inst = add_instruction(spv::OpTypeFunction, 0, _types_and_constants) + .add(return_type_id) + .add(param_type_ids.begin(), param_type_ids.end()); - _function_type_lookup.push_back({ info, inst.result });; + _function_type_lookup.push_back({ info, inst }); - return inst.result; + return inst; } spv::Id convert_image_type(type info, spv::ImageFormat format = spv::ImageFormatUnknown) { - type_lookup lookup { info, false, 0u, { spv::StorageClassUniformConstant, format } }; - - auto elem_info = info; + type elem_info = info; elem_info.rows = 1; elem_info.cols = 1; @@ -563,41 +740,37 @@ private: elem_info.base = type::t_float; } + type_lookup lookup { info, false, 0u, { spv::StorageClassUniformConstant, format } }; if (!info.is_storage()) { lookup.type = elem_info; lookup.type.base = static_cast(type::t_texture1d + info.texture_dimension() - 1); - lookup.type.definition = static_cast(elem_info.base); + lookup.type.struct_definition = static_cast(elem_info.base); } - if (const auto it = std::find_if(_type_lookup.begin(), _type_lookup.end(), - [&lookup](const auto &lookup_it) { return lookup_it.first == lookup; }); - it != _type_lookup.end()) - return it->second; + if (const auto lookup_it = std::find_if(_type_lookup.begin(), _type_lookup.end(), + [&lookup](const std::pair &lookup_entry) { return lookup_entry.first == lookup; }); + lookup_it != _type_lookup.end()) + return lookup_it->second; - spv::Id type, elem_type = convert_type(elem_info, false, spv::StorageClassUniformConstant); + spv::Id type_id, elem_type_id = convert_type(elem_info, false, spv::StorageClassUniformConstant); + type_id = + add_instruction(spv::OpTypeImage, 0, _types_and_constants) + .add(elem_type_id) // Sampled Type (always a scalar type) + .add(spv::Dim1D + info.texture_dimension() - 1) + .add(0) // Not a depth image + .add(0) // Not an array + .add(0) // Not multi-sampled + .add(info.is_storage() ? 2 : 1) // Used with a sampler or as storage + .add(format); - add_instruction(spv::OpTypeImage, 0, _types_and_constants, type) - .add(elem_type) // Sampled Type (always a scalar type) - .add(spv::Dim1D + info.texture_dimension() - 1) - .add(0) // Not a depth image - .add(0) // Not an array - .add(0) // Not multi-sampled - .add(info.is_storage() ? 2 : 1) // Used with a sampler or as storage - .add(format); + _type_lookup.push_back({ lookup, type_id }); - _type_lookup.push_back({ lookup, type }); - - return type; + return type_id; } - uint32_t semantic_to_location(const std::string &semantic, uint32_t max_array_length = 1) + uint32_t semantic_to_location(const std::string &semantic, uint32_t max_attributes = 1) { - if (semantic.compare(0, 5, "COLOR") == 0) - return std::strtoul(semantic.c_str() + 5, nullptr, 10); - if (semantic.compare(0, 9, "SV_TARGET") == 0) - return std::strtoul(semantic.c_str() + 9, nullptr, 10); - if (const auto it = _semantic_to_location.find(semantic); it != _semantic_to_location.end()) return it->second; @@ -608,13 +781,18 @@ private: digit_index--; digit_index++; - const uint32_t semantic_digit = std::strtoul(semantic.c_str() + digit_index, nullptr, 10); const std::string semantic_base = semantic.substr(0, digit_index); + uint32_t semantic_digit = 0; + std::from_chars(semantic.c_str() + digit_index, semantic.c_str() + semantic.size(), semantic_digit); + + if (semantic_base == "COLOR" || semantic_base == "SV_TARGET") + return semantic_digit; + uint32_t location = static_cast(_semantic_to_location.size()); // Now create adjoining location indices for all possible semantic indices belonging to this semantic name - for (uint32_t a = 0; a < semantic_digit + max_array_length; ++a) + for (uint32_t a = 0; a < semantic_digit + max_attributes; ++a) { const auto insert = _semantic_to_location.emplace(semantic_base + std::to_string(a), location + a); if (!insert.second) @@ -629,10 +807,10 @@ private: return location + semantic_digit; } - const spv::BuiltIn semantic_to_builtin(const std::string &semantic, shader_type stype) const + spv::BuiltIn semantic_to_builtin(const std::string &semantic, shader_type stype) const { if (semantic == "SV_POSITION") - return stype == shader_type::ps ? spv::BuiltInFragCoord : spv::BuiltInPosition; + return stype == shader_type::pixel ? spv::BuiltInFragCoord : spv::BuiltInPosition; if (semantic == "SV_POINTSIZE") return spv::BuiltInPointSize; if (semantic == "SV_DEPTH") @@ -651,7 +829,7 @@ private: return spv::BuiltInGlobalInvocationId; return spv::BuiltInMax; } - const spv::ImageFormat format_to_image_format(texture_format format) + spv::ImageFormat format_to_image_format(texture_format format) { switch (format) { @@ -702,7 +880,7 @@ private: } } - inline void add_name(id id, const char *name) + void add_name(id id, const char *name) { if (!_debug_info) return; @@ -713,14 +891,14 @@ private: .add(id) .add_string(name); } - inline void add_builtin(id id, spv::BuiltIn builtin) + void add_builtin(id id, spv::BuiltIn builtin) { add_instruction_without_result(spv::OpDecorate, _annotations) .add(id) .add(spv::DecorationBuiltIn) .add(builtin); } - inline void add_decoration(id id, spv::Decoration decoration, std::initializer_list values = {}) + void add_decoration(id id, spv::Decoration decoration, std::initializer_list values = {}) { // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpDecorate add_instruction_without_result(spv::OpDecorate, _annotations) @@ -728,7 +906,7 @@ private: .add(decoration) .add(values.begin(), values.end()); } - inline void add_member_name(id id, uint32_t member_index, const char *name) + void add_member_name(id id, uint32_t member_index, const char *name) { if (!_debug_info) return; @@ -740,7 +918,7 @@ private: .add(member_index) .add_string(name); } - inline void add_member_builtin(id id, uint32_t member_index, spv::BuiltIn builtin) + void add_member_builtin(id id, uint32_t member_index, spv::BuiltIn builtin) { add_instruction_without_result(spv::OpMemberDecorate, _annotations) .add(id) @@ -748,7 +926,7 @@ private: .add(spv::DecorationBuiltIn) .add(builtin); } - inline void add_member_decoration(id id, uint32_t member_index, spv::Decoration decoration, std::initializer_list values = {}) + void add_member_decoration(id id, uint32_t member_index, spv::Decoration decoration, std::initializer_list values = {}) { // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpMemberDecorate add_instruction_without_result(spv::OpMemberDecorate, _annotations) @@ -757,77 +935,78 @@ private: .add(decoration) .add(values.begin(), values.end()); } - inline void add_capability(spv::Capability capability) + void add_capability(spv::Capability capability) { _capabilities.insert(capability); } - id define_struct(const location &loc, struct_info &info) override + id define_struct(const location &loc, struct_type &info) override { // First define all member types to make sure they are declared before the struct type references them std::vector member_types; member_types.reserve(info.member_list.size()); - for (const struct_member_info &member : info.member_list) + for (const member_type &member : info.member_list) member_types.push_back(convert_type(member.type)); // Afterwards define the actual struct type add_location(loc, _types_and_constants); - add_instruction(spv::OpTypeStruct, 0, _types_and_constants, info.definition) - .add(member_types.begin(), member_types.end()); + const id res = info.id = + add_instruction(spv::OpTypeStruct, 0, _types_and_constants) + .add(member_types.begin(), member_types.end()); if (!info.unique_name.empty()) - add_name(info.definition, info.unique_name.c_str()); + add_name(res, info.unique_name.c_str()); for (uint32_t index = 0; index < info.member_list.size(); ++index) { - const struct_member_info &member = info.member_list[index]; + const member_type &member = info.member_list[index]; - add_member_name(info.definition, index, member.name.c_str()); + add_member_name(res, index, member.name.c_str()); if (!_enable_16bit_types && member.type.is_numeric() && member.type.precision() < 32) - add_member_decoration(info.definition, index, spv::DecorationRelaxedPrecision); + add_member_decoration(res, index, spv::DecorationRelaxedPrecision); } _structs.push_back(info); - return info.definition; + return res; } - id define_texture(const location &, texture_info &info) override + id define_texture(const location &, texture &info) override { - info.id = make_id(); // Need to create an unique ID here too, so that the symbol lookup for textures works - info.binding = ~0u; + const id res = info.id = make_id(); // Need to create an unique ID here too, so that the symbol lookup for textures works _module.textures.push_back(info); - return info.id; + return res; } - id define_sampler(const location &loc, const texture_info &, sampler_info &info) override + id define_sampler(const location &loc, const texture &, sampler &info) override { - info.id = define_variable(loc, info.type, info.unique_name.c_str(), spv::StorageClassUniformConstant); - info.binding = _module.num_sampler_bindings++; - info.texture_binding = ~0u; + const id res = info.id = define_variable(loc, info.type, info.unique_name.c_str(), spv::StorageClassUniformConstant); - add_decoration(info.id, spv::DecorationBinding, { info.binding }); - add_decoration(info.id, spv::DecorationDescriptorSet, { 1 }); + // Default to a binding index equivalent to the entry in the sampler list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement) + const uint32_t default_binding = static_cast(_module.samplers.size()); + add_decoration(res, spv::DecorationBinding, { default_binding }); + add_decoration(res, spv::DecorationDescriptorSet, { 1 }); _module.samplers.push_back(info); - return info.id; + return res; } - id define_storage(const location &loc, const texture_info &tex_info, storage_info &info) override + id define_storage(const location &loc, const texture &tex_info, storage &info) override { - info.id = define_variable(loc, info.type, info.unique_name.c_str(), spv::StorageClassUniformConstant, format_to_image_format(tex_info.format)); - info.binding = _module.num_storage_bindings++; + const id res = info.id = define_variable(loc, info.type, info.unique_name.c_str(), spv::StorageClassUniformConstant, format_to_image_format(tex_info.format)); - add_decoration(info.id, spv::DecorationBinding, { info.binding }); - add_decoration(info.id, spv::DecorationDescriptorSet, { 2 }); + // Default to a binding index equivalent to the entry in the storage list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement) + const uint32_t default_binding = static_cast(_module.storages.size()); + add_decoration(res, spv::DecorationBinding, { default_binding }); + add_decoration(res, spv::DecorationDescriptorSet, { 2 }); _module.storages.push_back(info); - return info.id; + return res; } - id define_uniform(const location &, uniform_info &info) override + id define_uniform(const location &, uniform &info) override { if (_uniforms_to_spec_constants && info.has_initializer_value) { @@ -835,13 +1014,13 @@ private: add_name(res, info.name.c_str()); - const auto add_spec_constant = [this](const spirv_instruction &inst, const uniform_info &info, const constant &initializer_value, size_t initializer_offset) { + const auto add_spec_constant = [this](const spirv_instruction &inst, const uniform &info, const constant &initializer_value, size_t initializer_offset) { assert(inst.op == spv::OpSpecConstant || inst.op == spv::OpSpecConstantTrue || inst.op == spv::OpSpecConstantFalse); const uint32_t spec_id = static_cast(_module.spec_constants.size()); - add_decoration(inst.result, spv::DecorationSpecId, { spec_id }); + add_decoration(inst, spv::DecorationSpecId, { spec_id }); - uniform_info scalar_info = info; + uniform scalar_info = info; scalar_info.type.rows = 1; scalar_info.type.cols = 1; scalar_info.size = 4; @@ -849,11 +1028,11 @@ private: scalar_info.initializer_value = {}; scalar_info.initializer_value.as_uint[0] = initializer_value.as_uint[initializer_offset]; - _module.spec_constants.push_back(scalar_info); + _module.spec_constants.push_back(std::move(scalar_info)); }; const spirv_instruction &base_inst = _types_and_constants.instructions.back(); - assert(base_inst.result == res); + assert(base_inst == res); // External specialization constants need to be scalars if (info.type.is_scalar()) @@ -873,7 +1052,7 @@ private: if (info.type.is_array()) { elem_inst = *std::find_if(_types_and_constants.instructions.rbegin(), _types_and_constants.instructions.rend(), - [elem = base_inst.operands[i]](const auto &it) { return it.result == elem; }); + [operand_id = base_inst.operands[i]](const spirv_instruction &inst) { return inst == operand_id; }); assert(initializer_value.array_data.size() == base_inst.operands.size()); initializer_value = initializer_value.array_data[i]; @@ -882,7 +1061,7 @@ private: for (size_t row = 0; row < elem_inst.operands.size(); ++row) { const spirv_instruction &row_inst = *std::find_if(_types_and_constants.instructions.rbegin(), _types_and_constants.instructions.rend(), - [elem = elem_inst.operands[row]](const auto &it) { return it.result == elem; }); + [operand_id = elem_inst.operands[row]](const spirv_instruction &inst) { return inst == operand_id; }); if (row_inst.op != spv::OpSpecConstantComposite) { @@ -893,7 +1072,7 @@ private: for (size_t col = 0; col < row_inst.operands.size(); ++col) { const spirv_instruction &col_inst = *std::find_if(_types_and_constants.instructions.rbegin(), _types_and_constants.instructions.rend(), - [elem = row_inst.operands[col]](const auto &it) { return it.result == elem; }); + [operand_id = row_inst.operands[col]](const spirv_instruction &inst) { return inst == operand_id; }); add_spec_constant(col_inst, info, initializer_value, row * info.type.cols + col); } @@ -909,6 +1088,7 @@ private: if (_global_ubo_type == 0) { _global_ubo_type = make_id(); + make_id(); // Pointer type for '_global_ubo_type' add_decoration(_global_ubo_type, spv::DecorationBlock); } @@ -981,23 +1161,24 @@ private: return define_variable(loc, type, name.c_str(), storage, spv::ImageFormatUnknown, initializer_value); } - id define_variable(const location &loc, const type &type, const char *name, spv::StorageClass storage, spv::ImageFormat format = spv::ImageFormatUnknown, spv::Id initializer_value = 0) + id define_variable(const location &loc, const type &type, const char *name, spv::StorageClass storage, spv::ImageFormat format = spv::ImageFormatUnknown, id initializer_value = 0) { - assert(storage != spv::StorageClassFunction || _current_function != nullptr); + assert(storage != spv::StorageClassFunction || (_current_function_blocks != nullptr && _current_function != nullptr && !_current_function->unique_name.empty() && (_current_function->unique_name[0] == 'F' || _current_function->unique_name[0] == 'E'))); spirv_basic_block &block = (storage != spv::StorageClassFunction) ? - _variables : _current_function->variables; + _variables : _current_function_blocks->variables; add_location(loc, block); - spv::Id res; // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpVariable - spirv_instruction &inst = add_instruction(spv::OpVariable, convert_type(type, true, storage, format), block, res) - .add(storage); + spirv_instruction &inst = add_instruction(spv::OpVariable, convert_type(type, true, storage, format), block); + inst.add(storage); + + const id res = inst.result; if (initializer_value != 0) { - if (storage != spv::StorageClassFunction || _current_function->is_entry_point) + if (storage != spv::StorageClassFunction || /* is_entry_point = */ _current_function->unique_name[0] == 'E') { // The initializer for variables must be a constant inst.add(initializer_value); @@ -1021,72 +1202,81 @@ private: return res; } - id define_function(const location &loc, function_info &info) override + id define_function(const location &loc, function &info) override { assert(!is_in_function()); - auto &function = _functions_blocks.emplace_back(); - function.return_type = info.return_type; + function_blocks &func = _functions_blocks.emplace_back(); + func.return_type = info.return_type; - _current_function = &function; + for (const member_type ¶m : info.parameter_list) + func.param_types.push_back(param.type); - for (auto ¶m : info.parameter_list) - function.param_types.push_back(param.type); - - add_location(loc, function.declaration); + add_location(loc, func.declaration); // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpFunction - add_instruction(spv::OpFunction, convert_type(info.return_type), function.declaration, info.definition) - .add(spv::FunctionControlMaskNone) - .add(convert_type(function)); + const id res = info.id = + add_instruction(spv::OpFunction, convert_type(info.return_type), func.declaration) + .add(spv::FunctionControlMaskNone) + .add(convert_type(func)); if (!info.name.empty()) - add_name(info.definition, info.name.c_str()); + add_name(res, info.name.c_str()); - for (auto ¶m : info.parameter_list) + for (member_type ¶m : info.parameter_list) { - add_location(param.location, function.declaration); + add_location(param.location, func.declaration); - param.definition = add_instruction(spv::OpFunctionParameter, convert_type(param.type, true), function.declaration).result; + param.id = add_instruction(spv::OpFunctionParameter, convert_type(param.type, true), func.declaration); - add_name(param.definition, param.name.c_str()); + add_name(param.id, param.name.c_str()); } - _functions.push_back(std::make_unique(info)); + _functions.push_back(std::make_unique(info)); + _current_function = _functions.back().get(); + _current_function_blocks = &func; - return info.definition; + return res; } - void define_entry_point(function_info &func, shader_type stype, int num_threads[3]) override + void define_entry_point(function &func) override { - // Modify entry point name so each thread configuration is made separate - if (stype == shader_type::cs) - func.unique_name = 'E' + func.unique_name + - '_' + std::to_string(num_threads[0]) + - '_' + std::to_string(num_threads[1]) + - '_' + std::to_string(num_threads[2]); + assert(!func.unique_name.empty() && func.unique_name[0] == 'F'); + func.unique_name[0] = 'E'; - if (const auto it = std::find_if(_module.entry_points.begin(), _module.entry_points.end(), - [&func](const auto &ep) { return ep.name == func.unique_name; }); - it != _module.entry_points.end()) + // Modify entry point name so each thread configuration is made separate + if (func.type == shader_type::compute) + func.unique_name += + '_' + std::to_string(func.num_threads[0]) + + '_' + std::to_string(func.num_threads[1]) + + '_' + std::to_string(func.num_threads[2]); + + if (std::find_if(_module.entry_points.begin(), _module.entry_points.end(), + [&func](const std::pair &entry_point) { + return entry_point.first == func.unique_name; + }) != _module.entry_points.end()) return; - _module.entry_points.push_back({ func.unique_name, stype }); + _module.entry_points.emplace_back(func.unique_name, func.type); - spv::Id position_variable = 0, point_size_variable = 0; + spv::Id position_variable = 0; + spv::Id point_size_variable = 0; std::vector inputs_and_outputs; std::vector call_params; // Generate the glue entry point function - function_info entry_point; - entry_point.return_type = { type::t_void }; + function entry_point = func; + entry_point.referenced_functions.push_back(func.id); - define_function({}, entry_point); + // Change function signature to 'void main()' + entry_point.return_type = { type::t_void }; + entry_point.return_semantic.clear(); + entry_point.parameter_list.clear(); + + const id entry_point_definition = define_function({}, entry_point); enter_block(create_block()); - _current_function->is_entry_point = true; - - const auto create_varying_param = [this, &call_params](const struct_member_info ¶m) { + const auto create_varying_param = [this, &call_params](const member_type ¶m) { // Initialize all output variables with zero const spv::Id variable = define_variable({}, param.type, nullptr, spv::StorageClassFunction, spv::ImageFormatUnknown, emit_constant(param.type, 0u)); @@ -1096,7 +1286,7 @@ private: return variable; }; - const auto create_varying_variable = [this, &inputs_and_outputs, &position_variable, &point_size_variable, stype](const type ¶m_type, const std::string &semantic, spv::StorageClass storage, int a = 0) { + const auto create_varying_variable = [this, &inputs_and_outputs, &position_variable, &point_size_variable, stype = func.type](const type ¶m_type, const std::string &semantic, spv::StorageClass storage, int a = 0) { const spv::Id variable = define_variable({}, param_type, nullptr, storage); if (const spv::BuiltIn builtin = semantic_to_builtin(semantic, stype); @@ -1113,9 +1303,9 @@ private: } else { - assert(stype != shader_type::cs); // Compute shaders cannot have custom inputs or outputs + assert(stype != shader_type::compute); // Compute shaders cannot have custom inputs or outputs - const uint32_t location = semantic_to_location(semantic, std::max(1, param_type.array_length)); + const uint32_t location = semantic_to_location(semantic, std::max(1u, param_type.array_length)); add_decoration(variable, spv::DecorationLocation, { location + a }); } @@ -1131,7 +1321,7 @@ private: }; // Translate function parameters to input/output variables - for (const struct_member_info ¶m : func.parameter_list) + for (const member_type ¶m : func.parameter_list) { spv::Id param_var = create_varying_param(param); @@ -1143,46 +1333,50 @@ private: // Flatten structure parameters if (param.type.is_struct()) { - const struct_info &definition = get_struct(param.type.definition); + const struct_type &struct_definition = get_struct(param.type.struct_definition); type struct_type = param.type; - const int array_length = std::max(1, param.type.array_length); + const auto array_length = std::max(1u, param.type.array_length); struct_type.array_length = 0; // Struct arrays need to be flattened into individual elements as well - std::vector array_elements; - array_elements.reserve(array_length); - for (int a = 0; a < array_length; a++) + std::vector array_element_ids; + array_element_ids.reserve(array_length); + for (unsigned int a = 0; a < array_length; a++) { - std::vector struct_elements; - struct_elements.reserve(definition.member_list.size()); - for (const struct_member_info &member : definition.member_list) + std::vector struct_element_ids; + struct_element_ids.reserve(struct_definition.member_list.size()); + for (const member_type &member : struct_definition.member_list) { - spv::Id input_var = create_varying_variable(member.type, member.semantic, spv::StorageClassInput, a); + const spv::Id input_var = create_varying_variable(member.type, member.semantic, spv::StorageClassInput, a); - param_value = add_instruction(spv::OpLoad, convert_type(member.type)) - .add(input_var).result; - struct_elements.push_back(param_value); + param_value = + add_instruction(spv::OpLoad, convert_type(member.type)) + .add(input_var); + struct_element_ids.push_back(param_value); } - param_value = add_instruction(spv::OpCompositeConstruct, convert_type(struct_type)) - .add(struct_elements.begin(), struct_elements.end()).result; - array_elements.push_back(param_value); + param_value = + add_instruction(spv::OpCompositeConstruct, convert_type(struct_type)) + .add(struct_element_ids.begin(), struct_element_ids.end()); + array_element_ids.push_back(param_value); } if (param.type.is_array()) { // Build the array from all constructed struct elements - param_value = add_instruction(spv::OpCompositeConstruct, convert_type(param.type)) - .add(array_elements.begin(), array_elements.end()).result; + param_value = + add_instruction(spv::OpCompositeConstruct, convert_type(param.type)) + .add(array_element_ids.begin(), array_element_ids.end()); } } else { - spv::Id input_var = create_varying_variable(param.type, param.semantic, spv::StorageClassInput); + const spv::Id input_var = create_varying_variable(param.type, param.semantic, spv::StorageClassInput); - param_value = add_instruction(spv::OpLoad, convert_type(param.type)) - .add(input_var).result; + param_value = + add_instruction(spv::OpLoad, convert_type(param.type)) + .add(input_var); } add_instruction_without_result(spv::OpStore) @@ -1194,11 +1388,11 @@ private: { if (param.type.is_struct()) { - const struct_info &definition = get_struct(param.type.definition); + const struct_type &struct_definition = get_struct(param.type.struct_definition); - for (int a = 0, array_length = std::max(1, param.type.array_length); a < array_length; a++) + for (unsigned int a = 0, array_length = std::max(1u, param.type.array_length); a < array_length; a++) { - for (const struct_member_info &member : definition.member_list) + for (const member_type &member : struct_definition.member_list) { create_varying_variable(member.type, member.semantic, spv::StorageClassOutput, a); } @@ -1211,48 +1405,49 @@ private: } } - const auto call_result = emit_call({}, func.definition, func.return_type, call_params); + const id call_result = emit_call({}, func.id, func.return_type, call_params); for (size_t i = 0, inputs_and_outputs_index = 0; i < func.parameter_list.size(); ++i) { - const struct_member_info ¶m = func.parameter_list[i]; + const member_type ¶m = func.parameter_list[i]; if (param.type.has(type::q_out)) { - const spv::Id value = add_instruction(spv::OpLoad, convert_type(param.type)) - .add(call_params[i].base).result; + const spv::Id value = + add_instruction(spv::OpLoad, convert_type(param.type)) + .add(call_params[i].base); if (param.type.is_struct()) { - const struct_info &definition = get_struct(param.type.definition); + const struct_type &struct_definition = get_struct(param.type.struct_definition); type struct_type = param.type; - const int array_length = std::max(1, param.type.array_length); + const auto array_length = std::max(1u, param.type.array_length); struct_type.array_length = 0; // Skip input variables if this is an "inout" parameter if (param.type.has(type::q_in)) - inputs_and_outputs_index += definition.member_list.size() * array_length; + inputs_and_outputs_index += struct_definition.member_list.size() * array_length; // Split up struct array into individual struct elements again - for (int a = 0; a < array_length; a++) + for (unsigned int a = 0; a < array_length; a++) { spv::Id element_value = value; if (param.type.is_array()) { - element_value = add_instruction(spv::OpCompositeExtract, convert_type(struct_type)) - .add(value) - .add(a).result; + element_value = + add_instruction(spv::OpCompositeExtract, convert_type(struct_type)) + .add(value) + .add(a); } // Split out struct fields into separate output variables again - for (uint32_t member_index = 0; member_index < definition.member_list.size(); ++member_index) + for (uint32_t member_index = 0; member_index < struct_definition.member_list.size(); ++member_index) { - const struct_member_info &member = definition.member_list[member_index]; - - const spv::Id member_value = add_instruction(spv::OpCompositeExtract, convert_type(member.type)) - .add(element_value) - .add(member_index).result; + const spv::Id member_value = + add_instruction(spv::OpCompositeExtract, convert_type(struct_definition.member_list[member_index].type)) + .add(element_value) + .add(member_index); add_instruction_without_result(spv::OpStore) .add(inputs_and_outputs[inputs_and_outputs_index++]) @@ -1276,8 +1471,8 @@ private: // Input parameters do not need to store anything, but increase the input/output variable index if (param.type.is_struct()) { - const struct_info &definition = get_struct(param.type.definition); - inputs_and_outputs_index += definition.member_list.size() * std::max(1, param.type.array_length); + const struct_type &struct_definition = get_struct(param.type.struct_definition); + inputs_and_outputs_index += struct_definition.member_list.size() * std::max(1u, param.type.array_length); } else { @@ -1288,33 +1483,35 @@ private: if (func.return_type.is_struct()) { - const struct_info &definition = get_struct(func.return_type.definition); + const struct_type &struct_definition = get_struct(func.return_type.struct_definition); - for (uint32_t member_index = 0; member_index < definition.member_list.size(); ++member_index) + for (uint32_t member_index = 0; member_index < struct_definition.member_list.size(); ++member_index) { - const struct_member_info &member = definition.member_list[member_index]; + const member_type &member = struct_definition.member_list[member_index]; - const spv::Id result = create_varying_variable(member.type, member.semantic, spv::StorageClassOutput); - const spv::Id member_result = add_instruction(spv::OpCompositeExtract, convert_type(member.type)) - .add(call_result) - .add(member_index).result; + const spv::Id result_var = create_varying_variable(member.type, member.semantic, spv::StorageClassOutput); + + const spv::Id member_result = + add_instruction(spv::OpCompositeExtract, convert_type(member.type)) + .add(call_result) + .add(member_index); add_instruction_without_result(spv::OpStore) - .add(result) + .add(result_var) .add(member_result); } } else if (!func.return_type.is_void()) { - const spv::Id result = create_varying_variable(func.return_type, func.return_semantic, spv::StorageClassOutput); + const spv::Id result_var = create_varying_variable(func.return_type, func.return_semantic, spv::StorageClassOutput); add_instruction_without_result(spv::OpStore) - .add(result) + .add(result_var) .add(call_result); } // Add code to flip the output vertically - if (_flip_vert_y && position_variable != 0 && stype == shader_type::vs) + if (_flip_vert_y && position_variable != 0 && func.type == shader_type::vertex) { expression position; position.reset_to_lvalue({}, position_variable, { type::t_float, 4, 1 }); @@ -1327,7 +1524,7 @@ private: } // Add code that sets the point size to a default value (in case this vertex shader is used with point primitives) - if (point_size_variable == 0 && stype == shader_type::vs) + if (point_size_variable == 0 && func.type == shader_type::vertex) { create_varying_variable({ type::t_float, 1, 1 }, "SV_POINTSIZE", spv::StorageClassOutput); @@ -1342,35 +1539,34 @@ private: leave_function(); spv::ExecutionModel model; - switch (stype) + switch (func.type) { - case shader_type::vs: + case shader_type::vertex: model = spv::ExecutionModelVertex; break; - case shader_type::ps: + case shader_type::pixel: model = spv::ExecutionModelFragment; add_instruction_without_result(spv::OpExecutionMode, _execution_modes) - .add(entry_point.definition) + .add(entry_point_definition) .add(_vulkan_semantics ? spv::ExecutionModeOriginUpperLeft : spv::ExecutionModeOriginLowerLeft); break; - case shader_type::cs: + case shader_type::compute: model = spv::ExecutionModelGLCompute; add_instruction_without_result(spv::OpExecutionMode, _execution_modes) - .add(entry_point.definition) + .add(entry_point_definition) .add(spv::ExecutionModeLocalSize) - .add(num_threads[0]) - .add(num_threads[1]) - .add(num_threads[2]); + .add(func.num_threads[0]) + .add(func.num_threads[1]) + .add(func.num_threads[2]); break; default: assert(false); return; } - assert(!func.unique_name.empty()); add_instruction_without_result(spv::OpEntryPoint, _entries) .add(model) - .add(entry_point.definition) + .add(entry_point_definition) .add_string(func.unique_name.c_str()) .add(inputs_and_outputs.begin(), inputs_and_outputs.end()); } @@ -1382,7 +1578,7 @@ private: size_t i = 0; spv::Id result = exp.base; - auto base_type = exp.type; + type base_type = exp.type; bool is_uniform_bool = false; if (exp.is_lvalue || !exp.chain.empty()) @@ -1452,9 +1648,9 @@ private: result = access_chain->result; } - result = add_instruction(spv::OpLoad, convert_type(base_type, false, spv::StorageClassFunction, storage.second)) - .add(result) // Pointer - .result; + result = + add_instruction(spv::OpLoad, convert_type(base_type, false, spv::StorageClassFunction, storage.second)) + .add(result); // Pointer } // Need to convert boolean uniforms which are actually integers in SPIR-V @@ -1462,17 +1658,17 @@ private: { base_type.base = type::t_bool; - result = add_instruction(spv::OpINotEqual, convert_type(base_type)) - .add(result) - .add(emit_constant(0)) - .result; + result = + add_instruction(spv::OpINotEqual, convert_type(base_type)) + .add(result) + .add(emit_constant(0)); } // Work through all remaining operations in the access chain and apply them to the value for (; i < exp.chain.size(); ++i) { assert(result != 0); - const auto &op = exp.chain[i]; + const expression::operation &op = exp.chain[i]; switch (op.op) { @@ -1483,6 +1679,7 @@ private: cast_type.base = op.from.base; std::vector args; + args.reserve(op.to.components()); for (unsigned int c = 0; c < op.to.components(); ++c) args.emplace_back().reset_to_rvalue(exp.location, result, op.from); @@ -1494,11 +1691,11 @@ private: const spv::Id true_constant = emit_constant(op.to, 1); const spv::Id false_constant = emit_constant(op.to, 0); - result = add_instruction(spv::OpSelect, convert_type(op.to)) - .add(result) // Condition - .add(true_constant) - .add(false_constant) - .result; + result = + add_instruction(spv::OpSelect, convert_type(op.to)) + .add(result) // Condition + .add(true_constant) + .add(false_constant); } else { @@ -1511,10 +1708,10 @@ private: else spv_op = spv::OpINotEqual; // Add instruction to compare value against zero instead of casting - result = add_instruction(spv_op, convert_type(op.to)) - .add(result) - .add(emit_constant(op.from, 0)) - .result; + result = + add_instruction(spv_op, convert_type(op.to)) + .add(result) + .add(emit_constant(op.from, 0)); continue; case type::t_min16int: case type::t_int: @@ -1553,25 +1750,25 @@ private: assert(false); } - result = add_instruction(spv_op, convert_type(op.to)) - .add(result) - .result; + result = + add_instruction(spv_op, convert_type(op.to)) + .add(result); } break; case expression::operation::op_dynamic_index: assert(op.from.is_vector() && op.to.is_scalar()); - result = add_instruction(spv::OpVectorExtractDynamic, convert_type(op.to)) - .add(result) // Vector - .add(op.index) // Index - .result; + result = + add_instruction(spv::OpVectorExtractDynamic, convert_type(op.to)) + .add(result) // Vector + .add(op.index); // Index break; case expression::operation::op_member: // In case of struct return values, which are r-values case expression::operation::op_constant_index: assert(op.from.is_vector() || op.from.is_matrix() || op.from.is_struct()); - result = add_instruction(spv::OpCompositeExtract, convert_type(op.to)) - .add(result) - .add(op.index) // Literal Index - .result; + result = + add_instruction(spv::OpCompositeExtract, convert_type(op.to)) + .add(result) + .add(op.index); // Literal Index break; case expression::operation::op_swizzle: if (op.to.is_vector()) @@ -1579,7 +1776,7 @@ private: if (op.from.is_matrix()) { spv::Id components[4]; - for (unsigned int c = 0; c < 4 && op.swizzle[c] >= 0; ++c) + for (int c = 0; c < 4 && op.swizzle[c] >= 0; ++c) { const unsigned int row = op.swizzle[c] / 4; const unsigned int column = op.swizzle[c] - row * 4; @@ -1588,64 +1785,63 @@ private: scalar_type.rows = 1; scalar_type.cols = 1; - spirv_instruction &node = add_instruction(spv::OpCompositeExtract, convert_type(scalar_type)) - .add(result); - + spirv_instruction &inst = add_instruction(spv::OpCompositeExtract, convert_type(scalar_type)); + inst.add(result); if (op.from.rows > 1) // Matrix types with a single row are actually vectors, so they don't need the extra index - node.add(row); + inst.add(row); + inst.add(column); - node.add(column); - - components[c] = node.result; + components[c] = inst; } - spirv_instruction &node = add_instruction(spv::OpCompositeConstruct, convert_type(op.to)); - for (unsigned int c = 0; c < 4 && op.swizzle[c] >= 0; ++c) - node.add(components[c]); - result = node.result; - break; + spirv_instruction &inst = add_instruction(spv::OpCompositeConstruct, convert_type(op.to)); + for (int c = 0; c < 4 && op.swizzle[c] >= 0; ++c) + inst.add(components[c]); + result = inst; } else if (op.from.is_vector()) { - spirv_instruction &node = add_instruction(spv::OpVectorShuffle, convert_type(op.to)) - .add(result) // Vector 1 - .add(result); // Vector 2 - for (unsigned int c = 0; c < 4 && op.swizzle[c] >= 0; ++c) - node.add(op.swizzle[c]); - result = node.result; - break; + spirv_instruction &inst = add_instruction(spv::OpVectorShuffle, convert_type(op.to)); + inst.add(result); // Vector 1 + inst.add(result); // Vector 2 + for (int c = 0; c < 4 && op.swizzle[c] >= 0; ++c) + inst.add(op.swizzle[c]); + result = inst; } else { - spirv_instruction &node = add_instruction(spv::OpCompositeConstruct, convert_type(op.to)); + spirv_instruction &inst = add_instruction(spv::OpCompositeConstruct, convert_type(op.to)); for (unsigned int c = 0; c < op.to.rows; ++c) - node.add(result); - result = node.result; - break; + inst.add(result); + result = inst; } + break; } else if (op.from.is_matrix() && op.to.is_scalar()) { assert(op.swizzle[1] < 0); - spirv_instruction &node = add_instruction(spv::OpCompositeExtract, convert_type(op.to)) - .add(result); // Composite + spirv_instruction &inst = add_instruction(spv::OpCompositeExtract, convert_type(op.to)); + inst.add(result); // Composite if (op.from.rows > 1) { const unsigned int row = op.swizzle[0] / 4; const unsigned int column = op.swizzle[0] - row * 4; - node.add(row); - node.add(column); + inst.add(row); + inst.add(column); } else { - node.add(op.swizzle[0]); + inst.add(op.swizzle[0]); } - result = node.result; // Result ID + result = inst; + break; + } + else + { + assert(false); break; } - assert(false); - break; } } @@ -1660,13 +1856,13 @@ private: size_t i = 0; // Any indexing expressions can be resolved with an 'OpAccessChain' already spv::Id target = emit_access_chain(exp, i); - auto base_type = exp.chain.empty() ? exp.type : i == 0 ? exp.chain[0].from : exp.chain[i - 1].to; + type base_type = exp.chain.empty() ? exp.type : i == 0 ? exp.chain[0].from : exp.chain[i - 1].to; // TODO: Complex access chains like float4x4[0].m00m10[0] = 0; // Work through all remaining operations in the access chain and apply them to the value for (; i < exp.chain.size(); ++i) { - const auto &op = exp.chain[i]; + const expression::operation &op = exp.chain[i]; switch (op.op) { case expression::operation::op_cast: @@ -1679,46 +1875,46 @@ private: break; case expression::operation::op_swizzle: { - spv::Id result = add_instruction(spv::OpLoad, convert_type(base_type)) - .add(target) // Pointer - .result; // Result ID + spv::Id result = + add_instruction(spv::OpLoad, convert_type(base_type)) + .add(target); // Pointer if (base_type.is_vector()) { - spirv_instruction &node = add_instruction(spv::OpVectorShuffle, convert_type(base_type)) - .add(result) // Vector 1 - .add(value); // Vector 2 + spirv_instruction &inst = add_instruction(spv::OpVectorShuffle, convert_type(base_type)); + inst.add(result); // Vector 1 + inst.add(value); // Vector 2 unsigned int shuffle[4] = { 0, 1, 2, 3 }; for (unsigned int c = 0; c < base_type.rows; ++c) if (op.swizzle[c] >= 0) shuffle[op.swizzle[c]] = base_type.rows + c; for (unsigned int c = 0; c < base_type.rows; ++c) - node.add(shuffle[c]); + inst.add(shuffle[c]); - value = node.result; + value = inst; } else if (op.to.is_scalar()) { assert(op.swizzle[1] < 0); - spirv_instruction &node = add_instruction(spv::OpCompositeInsert, convert_type(base_type)) - .add(value) // Object - .add(result); // Composite + spirv_instruction &inst = add_instruction(spv::OpCompositeInsert, convert_type(base_type)); + inst.add(value); // Object + inst.add(result); // Composite if (op.from.is_matrix() && op.from.rows > 1) { const unsigned int row = op.swizzle[0] / 4; const unsigned int column = op.swizzle[0] - row * 4; - node.add(row); - node.add(column); + inst.add(row); + inst.add(column); } else { - node.add(op.swizzle[0]); + inst.add(op.swizzle[0]); } - value = node.result; // Result ID + value = inst; } else { @@ -1773,136 +1969,122 @@ private: return access_chain->result; } + using codegen::emit_constant; id emit_constant(uint32_t value) { return emit_constant({ type::t_uint, 1, 1 }, value); } - id emit_constant(const type &type, uint32_t value) + id emit_constant(const type &data_type, const constant &data) override { - // Create a constant value of the specified type - constant data = {}; // Initialize to zero, so that components not set below still have a defined value for the lookup via std::memcmp - for (unsigned int i = 0; i < type.components(); ++i) - if (type.is_integral()) - data.as_uint[i] = value; - else - data.as_float[i] = static_cast(value); - - return emit_constant(type, data, false); + return emit_constant(data_type, data, false); } - id emit_constant(const type &type, const constant &data) override - { - return emit_constant(type, data, false); - } - id emit_constant(const type &type, const constant &data, bool spec_constant) + id emit_constant(const type &data_type, const constant &data, bool spec_constant) { if (!spec_constant) // Specialization constants cannot reuse other constants { if (const auto it = std::find_if(_constant_lookup.begin(), _constant_lookup.end(), - [&type, &data](auto &x) { - if (!(std::get<0>(x) == type && std::memcmp(&std::get<1>(x).as_uint[0], &data.as_uint[0], sizeof(uint32_t) * 16) == 0 && std::get<1>(x).array_data.size() == data.array_data.size())) - return false; - for (size_t i = 0; i < data.array_data.size(); ++i) - if (std::memcmp(&std::get<1>(x).array_data[i].as_uint[0], &data.array_data[i].as_uint[0], sizeof(uint32_t) * 16) != 0) + [&data_type, &data](std::tuple &x) { + if (!(std::get<0>(x) == data_type && std::memcmp(&std::get<1>(x).as_uint[0], &data.as_uint[0], sizeof(uint32_t) * 16) == 0 && std::get<1>(x).array_data.size() == data.array_data.size())) return false; - return true; - }); + for (size_t i = 0; i < data.array_data.size(); ++i) + if (std::memcmp(&std::get<1>(x).array_data[i].as_uint[0], &data.array_data[i].as_uint[0], sizeof(uint32_t) * 16) != 0) + return false; + return true; + }); it != _constant_lookup.end()) - return std::get<2>(*it); // Re-use existing constant instead of duplicating the definition + return std::get<2>(*it); // Reuse existing constant instead of duplicating the definition } spv::Id result; - if (type.is_array()) + if (data_type.is_array()) { - assert(type.array_length > 0); // Unsized arrays cannot be constants + assert(data_type.is_bounded_array()); // Unbounded arrays cannot be constants - auto elem_type = type; + type elem_type = data_type; elem_type.array_length = 0; std::vector elements; - elements.reserve(type.array_length); + elements.reserve(data_type.array_length); // Fill up elements with constant array data for (const constant &elem : data.array_data) elements.push_back(emit_constant(elem_type, elem, spec_constant)); // Fill up any remaining elements with a default value (when the array data did not specify them) - for (size_t i = elements.size(); i < static_cast(type.array_length); ++i) + for (size_t i = elements.size(); i < static_cast(data_type.array_length); ++i) elements.push_back(emit_constant(elem_type, {}, spec_constant)); - result = add_instruction(spec_constant ? spv::OpSpecConstantComposite : spv::OpConstantComposite, convert_type(type), _types_and_constants) - .add(elements.begin(), elements.end()) - .result; + result = + add_instruction(spec_constant ? spv::OpSpecConstantComposite : spv::OpConstantComposite, convert_type(data_type), _types_and_constants) + .add(elements.begin(), elements.end()); } - else if (type.is_struct()) + else if (data_type.is_struct()) { assert(!spec_constant); // Structures cannot be specialization constants - result = add_instruction(spv::OpConstantNull, convert_type(type), _types_and_constants) - .result; + result = add_instruction(spv::OpConstantNull, convert_type(data_type), _types_and_constants); } - else if (type.is_vector() || type.is_matrix()) + else if (data_type.is_vector() || data_type.is_matrix()) { - auto elem_type = type; - elem_type.rows = type.cols; + type elem_type = data_type; + elem_type.rows = data_type.cols; elem_type.cols = 1; spv::Id rows[4] = {}; // Construct matrix constant out of row vector constants // Construct vector constant out of scalar constants for each element - for (unsigned int i = 0; i < type.rows; ++i) + for (unsigned int i = 0; i < data_type.rows; ++i) { constant row_data = {}; - for (unsigned int k = 0; k < type.cols; ++k) - row_data.as_uint[k] = data.as_uint[i * type.cols + k]; + for (unsigned int k = 0; k < data_type.cols; ++k) + row_data.as_uint[k] = data.as_uint[i * data_type.cols + k]; rows[i] = emit_constant(elem_type, row_data, spec_constant); } - if (type.rows == 1) + if (data_type.rows == 1) { result = rows[0]; } else { - spirv_instruction &node = add_instruction(spec_constant ? spv::OpSpecConstantComposite : spv::OpConstantComposite, convert_type(type), _types_and_constants); - for (unsigned int i = 0; i < type.rows; ++i) - node.add(rows[i]); - - result = node.result; + spirv_instruction &inst = add_instruction(spec_constant ? spv::OpSpecConstantComposite : spv::OpConstantComposite, convert_type(data_type), _types_and_constants); + for (unsigned int i = 0; i < data_type.rows; ++i) + inst.add(rows[i]); + result = inst; } } - else if (type.is_boolean()) + else if (data_type.is_boolean()) { result = add_instruction(data.as_uint[0] ? (spec_constant ? spv::OpSpecConstantTrue : spv::OpConstantTrue) : - (spec_constant ? spv::OpSpecConstantFalse : spv::OpConstantFalse), convert_type(type), _types_and_constants) - .result; + (spec_constant ? spv::OpSpecConstantFalse : spv::OpConstantFalse), convert_type(data_type), _types_and_constants); } else { - assert(type.is_scalar()); + assert(data_type.is_scalar()); - result = add_instruction(spec_constant ? spv::OpSpecConstant : spv::OpConstant, convert_type(type), _types_and_constants) - .add(data.as_uint[0]) - .result; + result = + add_instruction(spec_constant ? spv::OpSpecConstant : spv::OpConstant, convert_type(data_type), _types_and_constants) + .add(data.as_uint[0]); } if (spec_constant) // Keep track of all specialization constants _spec_constants.insert(result); else - _constant_lookup.push_back({ type, data, result }); + _constant_lookup.push_back({ data_type, data, result }); return result; } - id emit_unary_op(const location &loc, tokenid op, const type &type, id val) override + id emit_unary_op(const location &loc, tokenid op, const type &res_type, id val) override { spv::Op spv_op = spv::OpNop; switch (op) { case tokenid::minus: - spv_op = type.is_floating_point() ? spv::OpFNegate : spv::OpSNegate; + spv_op = res_type.is_floating_point() ? spv::OpFNegate : spv::OpSNegate; break; case tokenid::tilde: spv_op = spv::OpNot; @@ -1916,12 +2098,12 @@ private: add_location(loc, *_current_block_data); - spirv_instruction &inst = add_instruction(spv_op, convert_type(type)); + spirv_instruction &inst = add_instruction(spv_op, convert_type(res_type)); inst.add(val); // Operand - return inst.result; + return inst; } - id emit_binary_op(const location &loc, tokenid op, const type &res_type, const type &type, id lhs, id rhs) override + id emit_binary_op(const location &loc, tokenid op, const type &res_type, const type &exp_type, id lhs, id rhs) override { spv::Op spv_op = spv::OpNop; @@ -1930,24 +2112,24 @@ private: case tokenid::plus: case tokenid::plus_plus: case tokenid::plus_equal: - spv_op = type.is_floating_point() ? spv::OpFAdd : spv::OpIAdd; + spv_op = exp_type.is_floating_point() ? spv::OpFAdd : spv::OpIAdd; break; case tokenid::minus: case tokenid::minus_minus: case tokenid::minus_equal: - spv_op = type.is_floating_point() ? spv::OpFSub : spv::OpISub; + spv_op = exp_type.is_floating_point() ? spv::OpFSub : spv::OpISub; break; case tokenid::star: case tokenid::star_equal: - spv_op = type.is_floating_point() ? spv::OpFMul : spv::OpIMul; + spv_op = exp_type.is_floating_point() ? spv::OpFMul : spv::OpIMul; break; case tokenid::slash: case tokenid::slash_equal: - spv_op = type.is_floating_point() ? spv::OpFDiv : type.is_signed() ? spv::OpSDiv : spv::OpUDiv; + spv_op = exp_type.is_floating_point() ? spv::OpFDiv : exp_type.is_signed() ? spv::OpSDiv : spv::OpUDiv; break; case tokenid::percent: case tokenid::percent_equal: - spv_op = type.is_floating_point() ? spv::OpFRem : type.is_signed() ? spv::OpSRem : spv::OpUMod; + spv_op = exp_type.is_floating_point() ? spv::OpFRem : exp_type.is_signed() ? spv::OpSRem : spv::OpUMod; break; case tokenid::caret: case tokenid::caret_equal: @@ -1967,7 +2149,7 @@ private: break; case tokenid::greater_greater: case tokenid::greater_greater_equal: - spv_op = type.is_signed() ? spv::OpShiftRightArithmetic : spv::OpShiftRightLogical; + spv_op = exp_type.is_signed() ? spv::OpShiftRightArithmetic : spv::OpShiftRightLogical; break; case tokenid::pipe_pipe: spv_op = spv::OpLogicalOr; @@ -1976,28 +2158,28 @@ private: spv_op = spv::OpLogicalAnd; break; case tokenid::less: - spv_op = type.is_floating_point() ? spv::OpFOrdLessThan : - type.is_signed() ? spv::OpSLessThan : spv::OpULessThan; + spv_op = exp_type.is_floating_point() ? spv::OpFOrdLessThan : + exp_type.is_signed() ? spv::OpSLessThan : spv::OpULessThan; break; case tokenid::less_equal: - spv_op = type.is_floating_point() ? spv::OpFOrdLessThanEqual : - type.is_signed() ? spv::OpSLessThanEqual : spv::OpULessThanEqual; + spv_op = exp_type.is_floating_point() ? spv::OpFOrdLessThanEqual : + exp_type.is_signed() ? spv::OpSLessThanEqual : spv::OpULessThanEqual; break; case tokenid::greater: - spv_op = type.is_floating_point() ? spv::OpFOrdGreaterThan : - type.is_signed() ? spv::OpSGreaterThan : spv::OpUGreaterThan; + spv_op = exp_type.is_floating_point() ? spv::OpFOrdGreaterThan : + exp_type.is_signed() ? spv::OpSGreaterThan : spv::OpUGreaterThan; break; case tokenid::greater_equal: - spv_op = type.is_floating_point() ? spv::OpFOrdGreaterThanEqual : - type.is_signed() ? spv::OpSGreaterThanEqual : spv::OpUGreaterThanEqual; + spv_op = exp_type.is_floating_point() ? spv::OpFOrdGreaterThanEqual : + exp_type.is_signed() ? spv::OpSGreaterThanEqual : spv::OpUGreaterThanEqual; break; case tokenid::equal_equal: - spv_op = type.is_floating_point() ? spv::OpFOrdEqual : - type.is_boolean() ? spv::OpLogicalEqual : spv::OpIEqual; + spv_op = exp_type.is_floating_point() ? spv::OpFOrdEqual : + exp_type.is_boolean() ? spv::OpLogicalEqual : spv::OpIEqual; break; case tokenid::exclaim_equal: - spv_op = type.is_floating_point() ? spv::OpFOrdNotEqual : - type.is_boolean() ? spv::OpLogicalNotEqual : spv::OpINotEqual; + spv_op = exp_type.is_floating_point() ? spv::OpFOrdNotEqual : + exp_type.is_boolean() ? spv::OpLogicalNotEqual : spv::OpINotEqual; break; default: return assert(false), 0; @@ -2006,42 +2188,40 @@ private: add_location(loc, *_current_block_data); // Binary operators generally only work on scalars and vectors in SPIR-V, so need to apply them to matrices component-wise - if (type.is_matrix() && type.rows != 1) + if (exp_type.is_matrix() && exp_type.rows != 1) { std::vector ids; - ids.reserve(type.cols); + ids.reserve(exp_type.cols); - auto vector_type = type; - vector_type.rows = type.cols; + type vector_type = exp_type; + vector_type.rows = exp_type.cols; vector_type.cols = 1; - for (unsigned int row = 0; row < type.rows; ++row) + for (unsigned int row = 0; row < exp_type.rows; ++row) { const spv::Id lhs_elem = add_instruction(spv::OpCompositeExtract, convert_type(vector_type)) .add(lhs) - .add(row) - .result; + .add(row); const spv::Id rhs_elem = add_instruction(spv::OpCompositeExtract, convert_type(vector_type)) .add(rhs) - .add(row) - .result; + .add(row); spirv_instruction &inst = add_instruction(spv_op, convert_type(vector_type)); inst.add(lhs_elem); // Operand 1 inst.add(rhs_elem); // Operand 2 if (res_type.has(type::q_precise)) - add_decoration(inst.result, spv::DecorationNoContraction); + add_decoration(inst, spv::DecorationNoContraction); if (!_enable_16bit_types && res_type.precision() < 32) - add_decoration(inst.result, spv::DecorationRelaxedPrecision); + add_decoration(inst, spv::DecorationRelaxedPrecision); - ids.push_back(inst.result); + ids.push_back(inst); } spirv_instruction &inst = add_instruction(spv::OpCompositeConstruct, convert_type(res_type)); inst.add(ids.begin(), ids.end()); - return inst.result; + return inst; } else { @@ -2050,26 +2230,26 @@ private: inst.add(rhs); // Operand 2 if (res_type.has(type::q_precise)) - add_decoration(inst.result, spv::DecorationNoContraction); + add_decoration(inst, spv::DecorationNoContraction); if (!_enable_16bit_types && res_type.precision() < 32) - add_decoration(inst.result, spv::DecorationRelaxedPrecision); + add_decoration(inst, spv::DecorationRelaxedPrecision); - return inst.result; + return inst; } } - id emit_ternary_op(const location &loc, tokenid op, const type &type, id condition, id true_value, id false_value) override + id emit_ternary_op(const location &loc, tokenid op, const type &res_type, id condition, id true_value, id false_value) override { if (op != tokenid::question) return assert(false), 0; add_location(loc, *_current_block_data); - spirv_instruction &inst = add_instruction(spv::OpSelect, convert_type(type)); + spirv_instruction &inst = add_instruction(spv::OpSelect, convert_type(res_type)); inst.add(condition); // Condition inst.add(true_value); // Object 1 inst.add(false_value); // Object 2 - return inst.result; + return inst; } id emit_call(const location &loc, id function, const type &res_type, const std::vector &args) override { @@ -2085,7 +2265,7 @@ private: for (const expression &arg : args) inst.add(arg.base); // Arguments - return inst.result; + return inst; } id emit_call_intrinsic(const location &loc, id intrinsic, const type &res_type, const std::vector &args) override { @@ -2109,11 +2289,11 @@ private: return assert(false), 0; } } - id emit_construct(const location &loc, const type &type, const std::vector &args) override + id emit_construct(const location &loc, const type &res_type, const std::vector &args) override { #ifndef NDEBUG for (const expression &arg : args) - assert((arg.type.is_scalar() || type.is_array()) && arg.chain.empty() && arg.base != 0); + assert((arg.type.is_scalar() || res_type.is_array()) && arg.chain.empty() && arg.base != 0); #endif add_location(loc, *_current_block_data); @@ -2121,10 +2301,10 @@ private: ids.reserve(args.size()); // There must be exactly one constituent for each top-level component of the result - if (type.is_matrix()) + if (res_type.is_matrix()) { - auto vector_type = type; - vector_type.rows = type.cols; + type vector_type = res_type; + vector_type.rows = res_type.cols; vector_type.cols = 1; // Turn the list of scalar arguments into a list of column vectors @@ -2134,22 +2314,22 @@ private: for (unsigned row = 0; row < vector_type.rows; ++row) inst.add(args[arg + row].base); - ids.push_back(inst.result); + ids.push_back(inst); } } else { - assert(type.is_vector() || type.is_array()); + assert(res_type.is_vector() || res_type.is_array()); // The exception is that for constructing a vector, a contiguous subset of the scalars consumed can be represented by a vector operand instead for (const expression &arg : args) ids.push_back(arg.base); } - spirv_instruction &inst = add_instruction(spv::OpCompositeConstruct, convert_type(type)); + spirv_instruction &inst = add_instruction(spv::OpCompositeConstruct, convert_type(res_type)); inst.add(ids.begin(), ids.end()); - return inst.result; + return inst; } void emit_if(const location &loc, id, id condition_block, id true_statement_block, id false_statement_block, unsigned int selection_control) override @@ -2168,7 +2348,7 @@ private: // Add structured control flow instruction add_location(loc, *_current_block_data); add_instruction_without_result(spv::OpSelectionMerge) - .add(merge_label.result) + .add(merge_label) .add(selection_control & 0x3); // 'SelectionControl' happens to match the flags produced by the parser // Append all blocks belonging to the branch @@ -2178,7 +2358,7 @@ private: _current_block_data->instructions.push_back(merge_label); } - id emit_phi(const location &loc, id, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &type) override + id emit_phi(const location &loc, id, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &res_type) override { spirv_instruction merge_label = _current_block_data->instructions.back(); assert(merge_label.op == spv::OpLabel); @@ -2197,13 +2377,13 @@ private: add_location(loc, *_current_block_data); // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpPhi - spirv_instruction &inst = add_instruction(spv::OpPhi, convert_type(type)) + spirv_instruction &inst = add_instruction(spv::OpPhi, convert_type(res_type)) .add(true_value) // Variable 0 .add(true_statement_block) // Parent 0 .add(false_value) // Variable 1 .add(false_statement_block); // Parent 1 - return inst.result; + return inst; } void emit_loop(const location &loc, id, id prev_block, id header_block, id condition_block, id loop_block, id continue_block, unsigned int loop_control) override { @@ -2222,7 +2402,7 @@ private: // Add structured control flow instruction add_location(loc, *_current_block_data); add_instruction_without_result(spv::OpLoopMerge) - .add(merge_label.result) + .add(merge_label) .add(continue_block) .add(loop_control & 0x3); // 'LoopControl' happens to match the flags produced by the parser @@ -2257,7 +2437,7 @@ private: // Add structured control flow instruction add_location(loc, *_current_block_data); add_instruction_without_result(spv::OpSelectionMerge) - .add(merge_label.result) + .add(merge_label) .add(selection_control & 0x3); // 'SelectionControl' happens to match the flags produced by the parser // Update switch instruction to contain all case labels @@ -2268,7 +2448,7 @@ private: _current_block_data->instructions.push_back(switch_inst); std::vector blocks = case_blocks; - if (default_label != merge_label.result) + if (default_label != merge_label) blocks.push_back(default_block); // Eliminate duplicates (because of multiple case labels pointing to the same block) std::sort(blocks.begin(), blocks.end()); @@ -2279,7 +2459,7 @@ private: _current_block_data->instructions.push_back(merge_label); } - bool is_in_function() const override { return _current_function != nullptr; } + bool is_in_function() const { return _current_function_blocks != nullptr; } id set_block(id id) override { @@ -2297,8 +2477,7 @@ private: set_block(id); - add_instruction_without_result(spv::OpLabel) - .result = id; + add_instruction_without_result(spv::OpLabel).result = id; } id leave_block_and_kill() override { @@ -2318,14 +2497,14 @@ private: if (!is_in_block()) // Might already have left the last block in which case this has to be ignored return 0; - if (_current_function->return_type.is_void()) + if (_current_function_blocks->return_type.is_void()) { add_instruction_without_result(spv::OpReturn); } else { if (0 == value) // The implicit return statement needs this - value = add_instruction(spv::OpUndef, convert_type(_current_function->return_type), _types_and_constants).result; + value = add_instruction(spv::OpUndef, convert_type(_current_function_blocks->return_type), _types_and_constants); add_instruction_without_result(spv::OpReturnValue) .add(value); @@ -2379,12 +2558,13 @@ private: { assert(is_in_function()); // Can only leave if there was a function to begin with - _current_function->definition = _block_data[_last_block]; + _current_function_blocks->definition = _block_data[_last_block]; // Append function end instruction - add_instruction_without_result(spv::OpFunctionEnd, _current_function->definition); + add_instruction_without_result(spv::OpFunctionEnd, _current_function_blocks->definition); _current_function = nullptr; + _current_function_blocks = nullptr; } }; diff --git a/dep/reshadefx/src/effect_expression.cpp b/dep/reshadefx/src/effect_expression.cpp index 1073c7000..37e44f01e 100644 --- a/dep/reshadefx/src/effect_expression.cpp +++ b/dep/reshadefx/src/effect_expression.cpp @@ -3,16 +3,16 @@ * SPDX-License-Identifier: BSD-3-Clause */ -#include "effect_lexer.hpp" -#include "effect_codegen.hpp" -#include // fmod +#include "effect_expression.hpp" +#include // std::fmod #include -#include // memcpy, memset -#include // std::min, std::max +#include // std::memcpy, std::memset +#include // std::max, std::min reshadefx::type reshadefx::type::merge(const type &lhs, const type &rhs) { - type result = { std::max(lhs.base, rhs.base) }; + type result; + result.base = std::max(lhs.base, rhs.base); // Non-numeric types cannot be vectors or matrices if (!result.is_numeric()) @@ -35,11 +35,14 @@ reshadefx::type reshadefx::type::merge(const type &lhs, const type &rhs) // Some qualifiers propagate to the result result.qualifiers = (lhs.qualifiers & type::q_precise) | (rhs.qualifiers & type::q_precise); - // In case this is a structure, assume they are the same - result.definition = rhs.definition; - assert(lhs.definition == rhs.definition || lhs.definition == 0); + // Cannot merge array types, assume no arrays + result.array_length = 0; assert(lhs.array_length == 0 && rhs.array_length == 0); + // In case this is a structure, assume they are the same + result.struct_definition = rhs.struct_definition; + assert(lhs.struct_definition == rhs.struct_definition || lhs.struct_definition == 0); + return result; } @@ -48,101 +51,101 @@ std::string reshadefx::type::description() const std::string result; switch (base) { - case reshadefx::type::t_void: + case t_void: result = "void"; break; - case reshadefx::type::t_bool: + case t_bool: result = "bool"; break; - case reshadefx::type::t_min16int: + case t_min16int: result = "min16int"; break; - case reshadefx::type::t_int: + case t_int: result = "int"; break; - case reshadefx::type::t_min16uint: + case t_min16uint: result = "min16uint"; break; - case reshadefx::type::t_uint: + case t_uint: result = "uint"; break; - case reshadefx::type::t_min16float: + case t_min16float: result = "min16float"; break; - case reshadefx::type::t_float: + case t_float: result = "float"; break; - case reshadefx::type::t_string: + case t_string: result = "string"; break; - case reshadefx::type::t_struct: + case t_struct: result = "struct"; break; - case reshadefx::type::t_texture1d: + case t_texture1d: result = "texture1D"; break; - case reshadefx::type::t_texture2d: + case t_texture2d: result = "texture2D"; break; - case reshadefx::type::t_texture3d: + case t_texture3d: result = "texture3D"; break; - case reshadefx::type::t_sampler1d_int: + case t_sampler1d_int: result = "sampler1D'; break; - case reshadefx::type::t_sampler2d_int: + case t_sampler2d_int: result = "sampler2D'; break; - case reshadefx::type::t_sampler3d_int: + case t_sampler3d_int: result = "sampler3D'; break; - case reshadefx::type::t_sampler1d_uint: + case t_sampler1d_uint: result = "sampler1D'; break; - case reshadefx::type::t_sampler2d_uint: + case t_sampler2d_uint: result = "sampler2D'; break; - case reshadefx::type::t_sampler3d_uint: + case t_sampler3d_uint: result = "sampler3D'; break; - case reshadefx::type::t_sampler1d_float: + case t_sampler1d_float: result = "sampler1D'; break; - case reshadefx::type::t_sampler2d_float: + case t_sampler2d_float: result = "sampler2D'; break; - case reshadefx::type::t_sampler3d_float: + case t_sampler3d_float: result = "sampler3D'; break; - case reshadefx::type::t_storage1d_int: + case t_storage1d_int: result = "storage1D'; break; - case reshadefx::type::t_storage2d_int: + case t_storage2d_int: result = "storage2D'; break; - case reshadefx::type::t_storage3d_int: + case t_storage3d_int: result = "storage3D'; break; - case reshadefx::type::t_storage1d_uint: + case t_storage1d_uint: result = "storage1D'; break; - case reshadefx::type::t_storage2d_uint: + case t_storage2d_uint: result = "storage2D'; break; - case reshadefx::type::t_storage3d_uint: + case t_storage3d_uint: result = "storage3D'; break; - case reshadefx::type::t_storage1d_float: + case t_storage1d_float: result = "storage1D'; break; - case reshadefx::type::t_storage2d_float: + case t_storage2d_float: result = "storage2D'; break; - case reshadefx::type::t_storage3d_float: + case t_storage3d_float: result = "storage3D'; break; - case reshadefx::type::t_function: - result = "function"; + case t_function: + assert(false); break; } @@ -157,7 +160,7 @@ std::string reshadefx::type::description() const if (is_array()) { result += '['; - if (array_length > 0) + if (is_bounded_array()) result += std::to_string(array_length); result += ']'; } @@ -179,7 +182,7 @@ void reshadefx::expression::reset_to_lvalue(const reshadefx::location &loc, uint type.qualifiers |= type::q_const; // Strip away global variable qualifiers - type.qualifiers &= ~(reshadefx::type::q_extern | reshadefx::type::q_static | reshadefx::type::q_uniform | reshadefx::type::q_groupshared); + type.qualifiers &= ~(type::q_extern | type::q_static | type::q_uniform | type::q_groupshared); } void reshadefx::expression::reset_to_rvalue(const reshadefx::location &loc, uint32_t in_base, const reshadefx::type &in_type) { @@ -192,7 +195,7 @@ void reshadefx::expression::reset_to_rvalue(const reshadefx::location &loc, uint chain.clear(); // Strip away global variable qualifiers - type.qualifiers &= ~(reshadefx::type::q_extern | reshadefx::type::q_static | reshadefx::type::q_uniform | reshadefx::type::q_groupshared); + type.qualifiers &= ~(type::q_extern | type::q_static | type::q_uniform | type::q_groupshared); } void reshadefx::expression::reset_to_rvalue_constant(const reshadefx::location &loc, bool data) @@ -290,7 +293,7 @@ void reshadefx::expression::add_cast_operation(const reshadefx::type &cast_type) constant.as_float[i] = static_cast(constant.as_int[i]); }; - for (auto &element : constant.array_data) + for (struct constant &element : constant.array_data) cast_constant(element, type, cast_type); cast_constant(constant, type, cast_type); @@ -320,7 +323,7 @@ void reshadefx::expression::add_dynamic_index_access(uint32_t index_expression) assert(!is_constant); // Cannot have dynamic indexing into constant in SPIR-V assert(type.is_array() || (type.is_numeric() && !type.is_scalar())); - auto prev_type = type; + struct type prev_type = type; if (type.is_array()) { @@ -342,11 +345,11 @@ void reshadefx::expression::add_constant_index_access(unsigned int index) { assert(type.is_array() || (type.is_numeric() && !type.is_scalar())); - auto prev_type = type; + struct type prev_type = type; if (type.is_array()) { - assert(type.array_length < 0 || index < static_cast(type.array_length)); + assert(index < type.array_length); type.array_length = 0; } @@ -389,7 +392,7 @@ void reshadefx::expression::add_swizzle_access(const signed char swizzle[4], uns { assert(type.is_numeric() && !type.is_array()); - const auto prev_type = type; + const struct type prev_type = type; type.rows = length; type.cols = 1; diff --git a/dep/reshadefx/src/effect_lexer.cpp b/dep/reshadefx/src/effect_lexer.cpp index 796cc1bd9..4383de160 100644 --- a/dep/reshadefx/src/effect_lexer.cpp +++ b/dep/reshadefx/src/effect_lexer.cpp @@ -18,7 +18,7 @@ enum token_type }; // Lookup table which translates a given char to a token type -static const unsigned type_lookup[256] = { +static const unsigned int s_type_lookup[256] = { 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, SPACE, '\n', SPACE, SPACE, SPACE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -35,7 +35,7 @@ static const unsigned type_lookup[256] = { }; // Lookup tables which translate a given string literal to a token and backwards -static const std::unordered_map token_lookup = { +static const std::unordered_map s_token_lookup = { { tokenid::end_of_file, "end of file" }, { tokenid::exclaim, "!" }, { tokenid::hash, "#" }, @@ -205,7 +205,7 @@ static const std::unordered_map token_lookup = { { tokenid::storage2d, "storage2D" }, { tokenid::storage3d, "storage3D" }, }; -static const std::unordered_map keyword_lookup = { +static const std::unordered_map s_keyword_lookup = { { "asm", tokenid::reserved }, { "asm_fragment", tokenid::reserved }, { "auto", tokenid::reserved }, @@ -439,7 +439,7 @@ static const std::unordered_map keyword_lookup = { { "volatile", tokenid::volatile_ }, { "while", tokenid::while_ } }; -static const std::unordered_map pp_directive_lookup = { +static const std::unordered_map s_pp_directive_lookup = { { "define", tokenid::hash_def }, { "undef", tokenid::hash_undef }, { "if", tokenid::hash_if }, @@ -454,15 +454,15 @@ static const std::unordered_map pp_directive_lookup = { "include", tokenid::hash_include }, }; -static inline bool is_octal_digit(char c) +static bool is_octal_digit(char c) { return static_cast(c - '0') < 8; } -static inline bool is_decimal_digit(char c) +static bool is_decimal_digit(char c) { return static_cast(c - '0') < 10; } -static inline bool is_hexadecimal_digit(char c) +static bool is_hexadecimal_digit(char c) { return is_decimal_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } @@ -504,8 +504,8 @@ static long long octal_to_decimal(long long n) std::string reshadefx::token::id_to_name(tokenid id) { - const auto it = token_lookup.find(id); - if (it != token_lookup.end()) + const auto it = s_token_lookup.find(id); + if (it != s_token_lookup.end()) return std::string(it->second); return "unknown"; } @@ -526,7 +526,7 @@ next_token: assert(_cur <= _end); // Do a character type lookup for the current character - switch (type_lookup[uint8_t(*_cur)]) + switch (s_type_lookup[uint8_t(*_cur)]) { case 0xFF: // EOF tok.id = tokenid::end_of_file; @@ -635,7 +635,7 @@ next_token: tok.id = tokenid::minus; break; case '.': - if (type_lookup[uint8_t(_cur[1])] == DIGIT) + if (s_type_lookup[uint8_t(_cur[1])] == DIGIT) parse_numeric_literal(tok); else if (_cur[1] == '.' && _cur[2] == '.') tok.id = tokenid::ellipsis, @@ -805,7 +805,7 @@ void reshadefx::lexer::skip_space() continue; } - if (type_lookup[uint8_t(*_cur)] == SPACE) + if (s_type_lookup[uint8_t(*_cur)] == SPACE) skip(1); else break; @@ -841,7 +841,7 @@ void reshadefx::lexer::parse_identifier(token &tok) const auto *const begin = _cur, *end = begin; // Skip to the end of the identifier sequence - while (type_lookup[uint8_t(*end)] == IDENT || type_lookup[uint8_t(*end)] == DIGIT) + while (s_type_lookup[uint8_t(*end)] == IDENT || s_type_lookup[uint8_t(*end)] == DIGIT) end++; tok.id = tokenid::identifier; @@ -852,8 +852,8 @@ void reshadefx::lexer::parse_identifier(token &tok) const if (_ignore_keywords) return; - if (const auto it = keyword_lookup.find(tok.literal_as_string); - it != keyword_lookup.end()) + if (const auto it = s_keyword_lookup.find(tok.literal_as_string); + it != s_keyword_lookup.end()) tok.id = it->second; } bool reshadefx::lexer::parse_pp_directive(token &tok) @@ -862,8 +862,8 @@ bool reshadefx::lexer::parse_pp_directive(token &tok) skip_space(); // Skip any space between the '#' and directive parse_identifier(tok); - if (const auto it = pp_directive_lookup.find(tok.literal_as_string); - it != pp_directive_lookup.end()) + if (const auto it = s_pp_directive_lookup.find(tok.literal_as_string); + it != s_pp_directive_lookup.end()) { tok.id = it->second; return true; @@ -999,6 +999,9 @@ void reshadefx::lexer::parse_string_literal(token &tok, bool escape) tok.id = tokenid::string_literal; tok.length = end - begin + 1; + + // Free up unused memory + tok.literal_as_string.shrink_to_fit(); } void reshadefx::lexer::parse_numeric_literal(token &tok) const { diff --git a/dep/reshadefx/src/effect_parser_exp.cpp b/dep/reshadefx/src/effect_parser_exp.cpp index 7fe436467..914a2e264 100644 --- a/dep/reshadefx/src/effect_parser_exp.cpp +++ b/dep/reshadefx/src/effect_parser_exp.cpp @@ -7,6 +7,8 @@ #include "effect_parser.hpp" #include "effect_codegen.hpp" #include +#include // std::back_inserter +#include // std::lower_bound, std::set_union #define RESHADEFX_SHORT_CIRCUIT 0 @@ -20,16 +22,22 @@ reshadefx::parser::~parser() void reshadefx::parser::error(const location &location, unsigned int code, const std::string &message) { _errors += location.source; - _errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": error"; - _errors += (code == 0) ? ": " : " X" + std::to_string(code) + ": "; + _errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')'; + _errors += ": error"; + if (code != 0) + _errors += " X" + std::to_string(code); + _errors += ": "; _errors += message; _errors += '\n'; } void reshadefx::parser::warning(const location &location, unsigned int code, const std::string &message) { _errors += location.source; - _errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": warning"; - _errors += (code == 0) ? ": " : " X" + std::to_string(code) + ": "; + _errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')'; + _errors += ": warning"; + if (code != 0) + _errors += " X" + std::to_string(code); + _errors += ": "; _errors += message; _errors += '\n'; } @@ -37,11 +45,10 @@ void reshadefx::parser::warning(const location &location, unsigned int code, con void reshadefx::parser::backup() { _token_backup = _token_next; - _lexer_backup_offset = _lexer->input_offset(); } void reshadefx::parser::restore() { - _lexer->reset_to_offset(_lexer_backup_offset); + _lexer->reset_to_offset(_token_backup.offset + _token_backup.length); _token_next = _token_backup; // Copy instead of move here, since restore may be called twice (from 'accept_type_class' and then again from 'parse_expression_unary') } @@ -103,8 +110,9 @@ bool reshadefx::parser::accept_symbol(std::string &identifier, scoped_symbol &sy } // Figure out which scope to start searching in - struct scope scope = { "::", 0, 0 }; - if (!exclusive) scope = current_scope(); + scope scope = { "::", 0, 0 }; + if (!exclusive) + scope = current_scope(); // Lookup name in the symbol table symbol = find_symbol(identifier, scope, exclusive); @@ -127,7 +135,7 @@ bool reshadefx::parser::accept_type_class(type &type) { if (symbol.id && symbol.op == symbol_type::structure) { - type.definition = symbol.id; + type.struct_definition = symbol.id; return true; } } @@ -145,14 +153,25 @@ bool reshadefx::parser::accept_type_class(type &type) if (accept('<')) { if (!accept_type_class(type)) // This overwrites the base type again - return error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected vector element type"), false; + { + error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected vector element type"); + return false; + } else if (!type.is_scalar()) - return error(_token.location, 3122, "vector element type must be a scalar type"), false; + { + error(_token.location, 3122, "vector element type must be a scalar type"); + return false; + } if (!expect(',') || !expect(tokenid::int_literal)) + { return false; + } else if (_token.literal_as_int < 1 || _token.literal_as_int > 4) - return error(_token.location, 3052, "vector dimension must be between 1 and 4"), false; + { + error(_token.location, 3052, "vector dimension must be between 1 and 4"); + return false; + } type.rows = static_cast(_token.literal_as_int); @@ -170,21 +189,37 @@ bool reshadefx::parser::accept_type_class(type &type) if (accept('<')) { if (!accept_type_class(type)) // This overwrites the base type again - return error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected matrix element type"), false; + { + error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected matrix element type"); + return false; + } else if (!type.is_scalar()) - return error(_token.location, 3123, "matrix element type must be a scalar type"), false; + { + error(_token.location, 3123, "matrix element type must be a scalar type"); + return false; + } if (!expect(',') || !expect(tokenid::int_literal)) + { return false; + } else if (_token.literal_as_int < 1 || _token.literal_as_int > 4) - return error(_token.location, 3053, "matrix dimensions must be between 1 and 4"), false; + { + error(_token.location, 3053, "matrix dimensions must be between 1 and 4"); + return false; + } type.rows = static_cast(_token.literal_as_int); if (!expect(',') || !expect(tokenid::int_literal)) + { return false; + } else if (_token.literal_as_int < 1 || _token.literal_as_int > 4) - return error(_token.location, 3053, "matrix dimensions must be between 1 and 4"), false; + { + error(_token.location, 3053, "matrix dimensions must be between 1 and 4"); + return false; + } type.cols = static_cast(_token.literal_as_int); @@ -202,11 +237,20 @@ bool reshadefx::parser::accept_type_class(type &type) if (accept('<')) { if (!accept_type_class(type)) - return error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected sampler element type"), false; + { + error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected sampler element type"); + return false; + } if (type.is_object()) - return error(_token.location, 3124, "object element type cannot be an object type"), false; + { + error(_token.location, 3124, "object element type cannot be an object type"); + return false; + } if (!type.is_numeric() || type.is_matrix()) - return error(_token.location, 3521, "sampler element type must fit in four 32-bit quantities"), false; + { + error(_token.location, 3521, "sampler element type must fit in four 32-bit quantities"); + return false; + } if (type.is_integral() && type.is_signed()) type.base = static_cast(type::t_sampler1d_int + texture_dimension); @@ -234,11 +278,20 @@ bool reshadefx::parser::accept_type_class(type &type) if (accept('<')) { if (!accept_type_class(type)) - return error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected storage element type"), false; + { + error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected storage element type"); + return false; + } if (type.is_object()) - return error(_token.location, 3124, "object element type cannot be an object type"), false; + { + error(_token.location, 3124, "object element type cannot be an object type"); + return false; + } if (!type.is_numeric() || type.is_matrix()) - return error(_token.location, 3521, "storage element type must fit in four 32-bit quantities"), false; + { + error(_token.location, 3521, "storage element type must fit in four 32-bit quantities"); + return false; + } if (type.is_integral() && type.is_signed()) type.base = static_cast(type::t_storage1d_int + texture_dimension); @@ -543,16 +596,18 @@ bool reshadefx::parser::parse_expression(expression &exp) // Continue parsing if an expression sequence is next (in the form "a, b, c, ...") while (accept(',')) + { // Overwrite 'exp' since conveniently the last expression in the sequence is the result if (!parse_expression_assignment(exp)) return false; + } return true; } bool reshadefx::parser::parse_expression_unary(expression &exp) { - auto location = _token_next.location; + location location = _token_next.location; // Check if a prefix operator exists if (accept_unary_op()) @@ -566,22 +621,25 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Unary operators are only valid on basic types if (!exp.type.is_scalar() && !exp.type.is_vector() && !exp.type.is_matrix()) - return error(exp.location, 3022, "scalar, vector, or matrix expected"), false; + { + error(exp.location, 3022, "scalar, vector, or matrix expected"); + return false; + } // Special handling for the "++" and "--" operators if (op == tokenid::plus_plus || op == tokenid::minus_minus) { if (exp.type.has(type::q_const) || !exp.is_lvalue) - return error(location, 3025, "l-value specifies const object"), false; + { + error(location, 3025, "l-value specifies const object"); + return false; + } // Create a constant one in the type of the expression - constant one = {}; - for (unsigned int i = 0; i < exp.type.components(); ++i) - if (exp.type.is_floating_point()) one.as_float[i] = 1.0f; else one.as_uint[i] = 1u; + const codegen::id constant_one = _codegen->emit_constant(exp.type, 1); - const auto value = _codegen->emit_load(exp); - const auto result = _codegen->emit_binary_op(location, op, exp.type, value, - _codegen->emit_constant(exp.type, one)); + const codegen::id value = _codegen->emit_load(exp); + const codegen::id result = _codegen->emit_binary_op(location, op, exp.type, value, constant_one); // The "++" and "--" operands modify the source variable, so store result back into it _codegen->emit_store(exp, result); @@ -590,16 +648,20 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) { // The "~" bitwise operator is only valid on integral types if (op == tokenid::tilde && !exp.type.is_integral()) - return error(exp.location, 3082, "int or unsigned int type required"), false; + { + error(exp.location, 3082, "int or unsigned int type required"); + return false; + } + // The logical not operator expects a boolean type as input, so perform cast if necessary if (op == tokenid::exclaim && !exp.type.is_boolean()) - exp.add_cast_operation({ type::t_bool, exp.type.rows, exp.type.cols }); // Note: The result will be boolean as well + exp.add_cast_operation({ type::t_bool, exp.type.rows, exp.type.cols }); // The result will be boolean as well // Constant expressions can be evaluated at compile time if (!exp.evaluate_constant_expression(op)) { - const auto value = _codegen->emit_load(exp); - const auto result = _codegen->emit_unary_op(location, op, exp.type, value); + const codegen::id value = _codegen->emit_load(exp); + const codegen::id result = _codegen->emit_unary_op(location, op, exp.type, value); exp.reset_to_rvalue(location, result, exp.type); } @@ -607,11 +669,11 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) } else if (accept('(')) { - // Note: This backup may get overridden in 'accept_type_class', but should point to the same token still + // This backup may get overridden in 'accept_type_class', but should point to the same token still backup(); // Check if this is a C-style cast expression - if (type cast_type; accept_type_class(cast_type)) + if (type cast_type = {}; accept_type_class(cast_type)) { if (peek('(')) { @@ -630,7 +692,10 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Check if a cast between these types is valid if (!type::rank(exp.type, cast_type)) - return error(location, 3017, "cannot convert these types (from " + exp.type.description() + " to " + cast_type.description() + ')'), false; + { + error(location, 3017, "cannot convert these types (from " + exp.type.description() + " to " + cast_type.description() + ')'); + return false; + } exp.add_cast_operation(cast_type); return true; @@ -656,54 +721,68 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) { // There should be a comma between arguments if (!elements.empty() && !expect(',')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } // Initializer lists might contain a comma at the end, so break out of the loop if nothing follows afterwards if (peek('}')) break; - expression &element = elements.emplace_back(); + expression &element_exp = elements.emplace_back(); // Parse the argument expression - if (!parse_expression_assignment(element)) - return consume_until('}'), false; + if (!parse_expression_assignment(element_exp)) + { + consume_until('}'); + return false; + } - if (element.type.is_array()) - return error(element.location, 3119, "arrays cannot be multi-dimensional"), consume_until('}'), false; - if (composite_type.base != type::t_void && element.type.definition != composite_type.definition) - return error(element.location, 3017, "cannot convert these types (from " + element.type.description() + " to " + composite_type.description() + ')'), false; + if (element_exp.type.is_array()) + { + error(element_exp.location, 3119, "arrays cannot be multi-dimensional"); + consume_until('}'); + return false; + } + if (composite_type.base != type::t_void && element_exp.type.struct_definition != composite_type.struct_definition) + { + error(element_exp.location, 3017, "cannot convert these types (from " + element_exp.type.description() + " to " + composite_type.description() + ')'); + consume_until('}'); + return false; + } - is_constant &= element.is_constant; // Result is only constant if all arguments are constant - composite_type = type::merge(composite_type, element.type); + is_constant &= element_exp.is_constant; // Result is only constant if all arguments are constant + composite_type = type::merge(composite_type, element_exp.type); } // Constant arrays can be constructed at compile time if (is_constant) { - constant res = {}; - for (expression &element : elements) + constant result = {}; + for (expression &element_exp : elements) { - element.add_cast_operation(composite_type); - res.array_data.push_back(element.constant); + element_exp.add_cast_operation(composite_type); + result.array_data.push_back(element_exp.constant); } - composite_type.array_length = static_cast(elements.size()); + composite_type.array_length = static_cast(elements.size()); - exp.reset_to_rvalue_constant(location, std::move(res), composite_type); + exp.reset_to_rvalue_constant(location, std::move(result), composite_type); } else { // Resolve all access chains - for (expression &element : elements) + for (expression &element_exp : elements) { - element.add_cast_operation(composite_type); - element.reset_to_rvalue(element.location, _codegen->emit_load(element), composite_type); + element_exp.add_cast_operation(composite_type); + const codegen::id element_value = _codegen->emit_load(element_exp); + element_exp.reset_to_rvalue(element_exp.location, element_value, composite_type); } - composite_type.array_length = static_cast(elements.size()); - - const auto result = _codegen->emit_construct(location, composite_type, elements); + composite_type.array_length = static_cast(elements.size()); + const codegen::id result = _codegen->emit_construct(location, composite_type, elements); exp.reset_to_rvalue(location, result, composite_type); } @@ -746,16 +825,23 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) exp.reset_to_rvalue_constant(location, std::move(value)); } - else if (type type; accept_type_class(type)) // Check if this is a constructor call expression + else if (type type = {}; accept_type_class(type)) // Check if this is a constructor call expression { if (!expect('(')) return false; + if (!type.is_numeric()) - return error(location, 3037, "constructors only defined for numeric base types"), false; + { + error(location, 3037, "constructors only defined for numeric base types"); + return false; + } // Empty constructors do not exist if (accept(')')) - return error(location, 3014, "incorrect number of arguments to numeric-type constructor"), false; + { + error(location, 3014, "incorrect number of arguments to numeric-type constructor"); + return false; + } // Parse entire argument expression list bool is_constant = true; @@ -768,18 +854,21 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) if (!arguments.empty() && !expect(',')) return false; - expression &argument = arguments.emplace_back(); + expression &argument_exp = arguments.emplace_back(); // Parse the argument expression - if (!parse_expression_assignment(argument)) + if (!parse_expression_assignment(argument_exp)) return false; // Constructors are only defined for numeric base types - if (!argument.type.is_numeric()) - return error(argument.location, 3017, "cannot convert non-numeric types"), false; + if (!argument_exp.type.is_numeric()) + { + error(argument_exp.location, 3017, "cannot convert non-numeric types"); + return false; + } - is_constant &= argument.is_constant; // Result is only constant if all arguments are constant - num_components += argument.type.components(); + is_constant &= argument_exp.is_constant; // Result is only constant if all arguments are constant + num_components += argument_exp.type.components(); } // The list should be terminated with a parenthesis @@ -788,22 +877,26 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // The total number of argument elements needs to match the number of elements in the result type if (num_components != type.components()) - return error(location, 3014, "incorrect number of arguments to numeric-type constructor"), false; + { + error(location, 3014, "incorrect number of arguments to numeric-type constructor"); + return false; + } assert(num_components > 0 && num_components <= 16 && !type.is_array()); if (is_constant) // Constants can be converted at compile time { - constant res = {}; + constant result = {}; unsigned int i = 0; - for (expression &argument : arguments) + for (expression &argument_exp : arguments) { - argument.add_cast_operation({ type.base, argument.type.rows, argument.type.cols }); - for (unsigned int k = 0; k < argument.type.components(); ++k) - res.as_uint[i++] = argument.constant.as_uint[k]; + argument_exp.add_cast_operation({ type.base, argument_exp.type.rows, argument_exp.type.cols }); + + for (unsigned int k = 0; k < argument_exp.type.components(); ++k) + result.as_uint[i++] = argument_exp.constant.as_uint[k]; } - exp.reset_to_rvalue_constant(location, std::move(res), type); + exp.reset_to_rvalue_constant(location, std::move(result), type); } else if (arguments.size() > 1) { @@ -813,31 +906,31 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Argument is a scalar already, so only need to cast it if (it->type.is_scalar()) { - expression &argument = *it++; + expression &argument_exp = *it++; - auto scalar_type = argument.type; + struct type scalar_type = argument_exp.type; scalar_type.base = type.base; - argument.add_cast_operation(scalar_type); + argument_exp.add_cast_operation(scalar_type); - argument.reset_to_rvalue(argument.location, _codegen->emit_load(argument), scalar_type); + argument_exp.reset_to_rvalue(argument_exp.location, _codegen->emit_load(argument_exp), scalar_type); } else { - const expression argument = *it; + const expression argument_exp = std::move(*it); it = arguments.erase(it); // Convert to a scalar value and re-enter the loop in the next iteration (in case a cast is necessary too) - for (unsigned int i = argument.type.components(); i > 0; --i) + for (unsigned int i = argument_exp.type.components(); i > 0; --i) { - expression scalar = argument; - scalar.add_constant_index_access(i - 1); + expression argument_scalar_exp = argument_exp; + argument_scalar_exp.add_constant_index_access(i - 1); - it = arguments.insert(it, scalar); + it = arguments.insert(it, argument_scalar_exp); } } } - const auto result = _codegen->emit_construct(location, type, arguments); + const codegen::id result = _codegen->emit_construct(location, type, arguments); exp.reset_to_rvalue(location, result, type); } @@ -862,7 +955,10 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) { // Can only call symbols that are functions, but do not abort yet if no symbol was found since the identifier may reference an intrinsic if (symbol.id && symbol.op != symbol_type::function) - return error(location, 3005, "identifier '" + identifier + "' represents a variable, not a function"), false; + { + error(location, 3005, "identifier '" + identifier + "' represents a variable, not a function"); + return false; + } // Parse entire argument expression list std::vector arguments; @@ -873,10 +969,10 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) if (!arguments.empty() && !expect(',')) return false; - expression &argument = arguments.emplace_back(); + expression &argument_exp = arguments.emplace_back(); // Parse the argument expression - if (!parse_expression_assignment(argument)) + if (!parse_expression_assignment(argument_exp)) return false; } @@ -886,7 +982,10 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Function calls can only be made from within functions if (!_codegen->is_in_function()) - return error(location, 3005, "invalid function call outside of a function"), false; + { + error(location, 3005, "invalid function call outside of a function"); + return false; + } // Try to resolve the call by searching through both function symbols and intrinsics bool undeclared = !symbol.id, ambiguous = false; @@ -904,7 +1003,7 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) assert(symbol.function != nullptr); - std::vector parameters(arguments.size()); + std::vector parameters(symbol.function->parameter_list.size()); // We need to allocate some temporary variables to pass in and load results from pointer parameters for (size_t i = 0; i < arguments.size(); ++i) @@ -912,7 +1011,10 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) const auto ¶m_type = symbol.function->parameter_list[i].type; if (param_type.has(type::q_out) && (!arguments[i].is_lvalue || (arguments[i].type.has(type::q_const) && !arguments[i].type.is_object()))) - return error(arguments[i].location, 3025, "l-value specifies const object for an 'out' parameter"), false; + { + error(arguments[i].location, 3025, "l-value specifies const object for an 'out' parameter"); + return false; + } if (arguments[i].type.components() > param_type.components()) warning(arguments[i].location, 3206, "implicit truncation of vector type"); @@ -922,13 +1024,16 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) if (param_type.is_object() || param_type.has(type::q_groupshared) /* Special case for atomic intrinsics */) { if (arguments[i].type != param_type) - return error(location, 3004, "no matching intrinsic overload for '" + identifier + '\''), false; + { + error(location, 3004, "no matching intrinsic overload for '" + identifier + '\''); + return false; + } assert(arguments[i].is_lvalue); // Do not shadow object or pointer parameters to function calls size_t chain_index = 0; - const auto access_chain = _codegen->emit_access_chain(arguments[i], chain_index); + const codegen::id access_chain = _codegen->emit_access_chain(arguments[i], chain_index); parameters[i].reset_to_lvalue(arguments[i].location, access_chain, param_type); assert(chain_index == arguments[i].chain.size()); @@ -938,18 +1043,19 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) else { // All user-defined functions actually accept pointers as arguments, same applies to intrinsics with 'out' parameters - const auto temp_variable = _codegen->define_variable(arguments[i].location, param_type); + const codegen::id temp_variable = _codegen->define_variable(arguments[i].location, param_type); parameters[i].reset_to_lvalue(arguments[i].location, temp_variable, param_type); } } else { - expression arg = arguments[i]; - arg.add_cast_operation(param_type); - parameters[i].reset_to_rvalue(arg.location, _codegen->emit_load(arg), param_type); + expression argument_exp = arguments[i]; + argument_exp.add_cast_operation(param_type); + const codegen::id argument_value = _codegen->emit_load(argument_exp); + parameters[i].reset_to_rvalue(argument_exp.location, argument_value, param_type); // Keep track of whether the parameter is a constant for code generation (this makes the expression invalid for all other uses) - parameters[i].is_constant = arg.is_constant; + parameters[i].is_constant = argument_exp.is_constant; } } @@ -959,14 +1065,28 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Only do this for pointer parameters as discovered above if (parameters[i].is_lvalue && parameters[i].type.has(type::q_in) && !parameters[i].type.is_object()) { - expression arg = arguments[i]; - arg.add_cast_operation(parameters[i].type); - _codegen->emit_store(parameters[i], _codegen->emit_load(arg)); + expression argument_exp = arguments[i]; + argument_exp.add_cast_operation(parameters[i].type); + const codegen::id argument_value = _codegen->emit_load(argument_exp); + _codegen->emit_store(parameters[i], argument_value); } } + // Add remaining default arguments + for (size_t i = arguments.size(); i < parameters.size(); ++i) + { + const auto ¶m = symbol.function->parameter_list[i]; + assert(param.has_default_value || !_errors.empty()); + + const codegen::id argument_value = _codegen->emit_constant(param.type, param.default_value); + parameters[i].reset_to_rvalue(param.location, argument_value, param.type); + + // Keep track of whether the parameter is a constant for code generation (this makes the expression invalid for all other uses) + parameters[i].is_constant = true; + } + // Check if the call resolving found an intrinsic or function and invoke the corresponding code - const auto result = symbol.op == symbol_type::function ? + const codegen::id result = (symbol.op == symbol_type::function) ? _codegen->emit_call(location, symbol.id, symbol.type, parameters) : _codegen->emit_call_intrinsic(location, symbol.id, symbol.type, parameters); @@ -978,23 +1098,47 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Only do this for pointer parameters as discovered above if (parameters[i].is_lvalue && parameters[i].type.has(type::q_out) && !parameters[i].type.is_object()) { - expression arg = parameters[i]; - arg.add_cast_operation(arguments[i].type); - _codegen->emit_store(arguments[i], _codegen->emit_load(arg)); + expression argument_exp = parameters[i]; + argument_exp.add_cast_operation(arguments[i].type); + const codegen::id argument_value = _codegen->emit_load(argument_exp); + _codegen->emit_store(arguments[i], argument_value); } } - if (_current_function != nullptr) + if (_codegen->_current_function != nullptr && symbol.op == symbol_type::function) { // Calling a function makes the caller inherit all sampler and storage object references from the callee - _current_function->referenced_samplers.insert(symbol.function->referenced_samplers.begin(), symbol.function->referenced_samplers.end()); - _current_function->referenced_storages.insert(symbol.function->referenced_storages.begin(), symbol.function->referenced_storages.end()); + if (!symbol.function->referenced_samplers.empty()) + { + std::vector referenced_samplers; + referenced_samplers.reserve(_codegen->_current_function->referenced_samplers.size() + symbol.function->referenced_samplers.size()); + std::set_union(_codegen->_current_function->referenced_samplers.begin(), _codegen->_current_function->referenced_samplers.end(), symbol.function->referenced_samplers.begin(), symbol.function->referenced_samplers.end(), std::back_inserter(referenced_samplers)); + _codegen->_current_function->referenced_samplers = std::move(referenced_samplers); + } + if (!symbol.function->referenced_storages.empty()) + { + std::vector referenced_storages; + referenced_storages.reserve(_codegen->_current_function->referenced_storages.size() + symbol.function->referenced_storages.size()); + std::set_union(_codegen->_current_function->referenced_storages.begin(), _codegen->_current_function->referenced_storages.end(), symbol.function->referenced_storages.begin(), symbol.function->referenced_storages.end(), std::back_inserter(referenced_storages)); + _codegen->_current_function->referenced_storages = std::move(referenced_storages); + } + + // Add callee and all its function references to the callers function references + { + std::vector referenced_functions; + std::set_union(_codegen->_current_function->referenced_functions.begin(), _codegen->_current_function->referenced_functions.end(), symbol.function->referenced_functions.begin(), symbol.function->referenced_functions.end(), std::back_inserter(referenced_functions)); + const auto it = std::lower_bound(referenced_functions.begin(), referenced_functions.end(), symbol.id); + if (it == referenced_functions.end() || *it != symbol.id) + referenced_functions.insert(it, symbol.id); + _codegen->_current_function->referenced_functions = std::move(referenced_functions); + } } } else if (symbol.op == symbol_type::invalid) { // Show error if no symbol matching the identifier was found - return error(location, 3004, "undeclared identifier '" + identifier + '\''), false; + error(location, 3004, "undeclared identifier '" + identifier + '\''); + return false; } else if (symbol.op == symbol_type::variable) { @@ -1002,14 +1146,24 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) // Simply return the pointer to the variable, dereferencing is done on site where necessary exp.reset_to_lvalue(location, symbol.id, symbol.type); - if (_current_function != nullptr && - symbol.scope.level == symbol.scope.namespace_level && symbol.id != 0xFFFFFFFF) // Ignore invalid symbols that were added during error recovery + if (_codegen->_current_function != nullptr && + symbol.scope.level == symbol.scope.namespace_level && + // Ignore invalid symbols that were added during error recovery + symbol.id != 0xFFFFFFFF) { // Keep track of any global sampler or storage objects referenced in the current function if (symbol.type.is_sampler()) - _current_function->referenced_samplers.insert(symbol.id); + { + const auto it = std::lower_bound(_codegen->_current_function->referenced_samplers.begin(), _codegen->_current_function->referenced_samplers.end(), symbol.id); + if (it == _codegen->_current_function->referenced_samplers.end() || *it != symbol.id) + _codegen->_current_function->referenced_samplers.insert(it, symbol.id); + } if (symbol.type.is_storage()) - _current_function->referenced_storages.insert(symbol.id); + { + const auto it = std::lower_bound(_codegen->_current_function->referenced_storages.begin(), _codegen->_current_function->referenced_storages.end(), symbol.id); + if (it == _codegen->_current_function->referenced_storages.end() || *it != symbol.id) + _codegen->_current_function->referenced_storages.insert(it, symbol.id); + } } } else if (symbol.op == symbol_type::constant) @@ -1020,7 +1174,8 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) else { // Can only reference variables and constants by name, functions need to be called - return error(location, 3005, "identifier '" + identifier + "' represents a function, not a variable"), false; + error(location, 3005, "identifier '" + identifier + "' represents a function, not a variable"); + return false; } } @@ -1033,17 +1188,21 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) { // Unary operators are only valid on basic types if (!exp.type.is_scalar() && !exp.type.is_vector() && !exp.type.is_matrix()) - return error(exp.location, 3022, "scalar, vector, or matrix expected"), false; + { + error(exp.location, 3022, "scalar, vector, or matrix expected"); + return false; + } if (exp.type.has(type::q_const) || !exp.is_lvalue) - return error(exp.location, 3025, "l-value specifies const object"), false; + { + error(exp.location, 3025, "l-value specifies const object"); + return false; + } // Create a constant one in the type of the expression - constant one = {}; - for (unsigned int i = 0; i < exp.type.components(); ++i) - if (exp.type.is_floating_point()) one.as_float[i] = 1.0f; else one.as_uint[i] = 1u; + const codegen::id constant_one = _codegen->emit_constant(exp.type, 1); - const auto value = _codegen->emit_load(exp, true); - const auto result = _codegen->emit_binary_op(location, _token.id, exp.type, value, _codegen->emit_constant(exp.type, one)); + const codegen::id value = _codegen->emit_load(exp, true); + const codegen::id result = _codegen->emit_binary_op(location, _token.id, exp.type, value, constant_one); // The "++" and "--" operands modify the source variable, so store result back into it _codegen->emit_store(exp, result); @@ -1057,7 +1216,7 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) return false; location = std::move(_token.location); - const auto subscript = std::move(_token.literal_as_string); + const std::string subscript = std::move(_token.literal_as_string); if (accept('(')) // Methods (function calls on types) are not supported right now { @@ -1074,15 +1233,18 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) } else if (exp.type.is_vector()) { - const size_t length = subscript.size(); + const int length = static_cast(subscript.size()); if (length > 4) - return error(location, 3018, "invalid subscript '" + subscript + "', swizzle too long"), false; + { + error(location, 3018, "invalid subscript '" + subscript + "', swizzle too long"); + return false; + } bool is_const = false; signed char offsets[4] = { -1, -1, -1, -1 }; enum { xyzw, rgba, stpq } set[4]; - for (size_t i = 0; i < length; ++i) + for (int i = 0; i < length; ++i) { switch (subscript[i]) { @@ -1099,20 +1261,30 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) case 'p': offsets[i] = 2, set[i] = stpq; break; case 'q': offsets[i] = 3, set[i] = stpq; break; default: - return error(location, 3018, "invalid subscript '" + subscript + '\''), false; + error(location, 3018, "invalid subscript '" + subscript + '\''); + return false; } if (i > 0 && (set[i] != set[i - 1])) - return error(location, 3018, "invalid subscript '" + subscript + "', mixed swizzle sets"), false; + { + error(location, 3018, "invalid subscript '" + subscript + "', mixed swizzle sets"); + return false; + } if (static_cast(offsets[i]) >= exp.type.rows) - return error(location, 3018, "invalid subscript '" + subscript + "', swizzle out of range"), false; + { + error(location, 3018, "invalid subscript '" + subscript + "', swizzle out of range"); + return false; + } // The result is not modifiable if a swizzle appears multiple times - for (size_t k = 0; k < i; ++k) - if (offsets[k] == offsets[i]) { + for (int k = 0; k < i; ++k) + { + if (offsets[k] == offsets[i]) + { is_const = true; break; } + } } // Add swizzle to current access chain @@ -1123,40 +1295,55 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) } else if (exp.type.is_matrix()) { - const size_t length = subscript.size(); + const int length = static_cast(subscript.size()); if (length < 3) - return error(location, 3018, "invalid subscript '" + subscript + '\''), false; + { + error(location, 3018, "invalid subscript '" + subscript + '\''); + return false; + } bool is_const = false; signed char offsets[4] = { -1, -1, -1, -1 }; - const unsigned int set = subscript[1] == 'm'; + const int set = subscript[1] == 'm'; const int coefficient = !set; - for (size_t i = 0, j = 0; i < length; i += 3 + set, ++j) + for (int i = 0, j = 0; i < length; i += 3 + set, ++j) { if (subscript[i] != '_' || subscript[i + set + 1] < '0' + coefficient || subscript[i + set + 1] > '3' + coefficient || subscript[i + set + 2] < '0' + coefficient || subscript[i + set + 2] > '3' + coefficient) - return error(location, 3018, "invalid subscript '" + subscript + '\''), false; + { + error(location, 3018, "invalid subscript '" + subscript + '\''); + return false; + } if (set && subscript[i + 1] != 'm') - return error(location, 3018, "invalid subscript '" + subscript + "', mixed swizzle sets"), false; + { + error(location, 3018, "invalid subscript '" + subscript + "', mixed swizzle sets"); + return false; + } - const unsigned int row = static_cast((subscript[i + set + 1] - '0') - coefficient); - const unsigned int col = static_cast((subscript[i + set + 2] - '0') - coefficient); + const auto row = static_cast((subscript[i + set + 1] - '0') - coefficient); + const auto col = static_cast((subscript[i + set + 2] - '0') - coefficient); if ((row >= exp.type.rows || col >= exp.type.cols) || j > 3) - return error(location, 3018, "invalid subscript '" + subscript + "', swizzle out of range"), false; + { + error(location, 3018, "invalid subscript '" + subscript + "', swizzle out of range"); + return false; + } offsets[j] = static_cast(row * 4 + col); // The result is not modifiable if a swizzle appears multiple times - for (size_t k = 0; k < j; ++k) - if (offsets[k] == offsets[j]) { + for (int k = 0; k < j; ++k) + { + if (offsets[k] == offsets[j]) + { is_const = true; break; } + } } // Add swizzle to current access chain @@ -1167,31 +1354,43 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) } else if (exp.type.is_struct()) { - const auto &member_list = _codegen->get_struct(exp.type.definition).member_list; + const std::vector &member_list = _codegen->get_struct(exp.type.struct_definition).member_list; // Find member with matching name is structure definition uint32_t member_index = 0; - for (const struct_member_info &member : member_list) { + for (const member_type &member : member_list) + { if (member.name == subscript) break; ++member_index; } if (member_index >= member_list.size()) - return error(location, 3018, "invalid subscript '" + subscript + '\''), false; + { + error(location, 3018, "invalid subscript '" + subscript + '\''); + return false; + } // Add field index to current access chain exp.add_member_access(member_index, member_list[member_index].type); } else if (exp.type.is_scalar()) { - const size_t length = subscript.size(); + const int length = static_cast(subscript.size()); if (length > 4) - return error(location, 3018, "invalid subscript '" + subscript + "', swizzle too long"), false; + { + error(location, 3018, "invalid subscript '" + subscript + "', swizzle too long"); + return false; + } - for (size_t i = 0; i < length; ++i) + for (int i = 0; i < length; ++i) + { if ((subscript[i] != 'x' && subscript[i] != 'r' && subscript[i] != 's') || i > 3) - return error(location, 3018, "invalid subscript '" + subscript + '\''), false; + { + error(location, 3018, "invalid subscript '" + subscript + '\''); + return false; + } + } // Promote scalar to vector type using cast auto target_type = exp.type; @@ -1211,34 +1410,44 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) else if (accept('[')) { if (!exp.type.is_array() && !exp.type.is_vector() && !exp.type.is_matrix()) - return error(_token.location, 3121, "array, matrix, vector, or indexable object type expected in index expression"), false; + { + error(_token.location, 3121, "array, matrix, vector, or indexable object type expected in index expression"); + return false; + } // Parse index expression - expression index; - if (!parse_expression(index) || !expect(']')) + expression index_exp; + if (!parse_expression(index_exp) || !expect(']')) return false; - else if (!index.type.is_scalar() || !index.type.is_integral()) - return error(index.location, 3120, "invalid type for index - index must be an integer scalar"), false; + + if (!index_exp.type.is_scalar() || !index_exp.type.is_integral()) + { + error(index_exp.location, 3120, "invalid type for index - index must be an integer scalar"); + return false; + } // Add index expression to current access chain - if (index.is_constant) + if (index_exp.is_constant) { // Check array bounds if known - if (exp.type.array_length > 0 && index.constant.as_uint[0] >= static_cast(exp.type.array_length)) - return error(index.location, 3504, "array index out of bounds"), false; + if (exp.type.is_bounded_array() && index_exp.constant.as_uint[0] >= exp.type.array_length) + { + error(index_exp.location, 3504, "array index out of bounds"); + return false; + } - exp.add_constant_index_access(index.constant.as_uint[0]); + exp.add_constant_index_access(index_exp.constant.as_uint[0]); } else { if (exp.is_constant) { // To handle a dynamic index into a constant means we need to create a local variable first or else any of the indexing instructions do not work - const auto temp_variable = _codegen->define_variable(location, exp.type, std::string(), false, _codegen->emit_constant(exp.type, exp.constant)); + const codegen::id temp_variable = _codegen->define_variable(location, exp.type, std::string(), false, _codegen->emit_constant(exp.type, exp.constant)); exp.reset_to_lvalue(exp.location, temp_variable, exp.type); } - exp.add_dynamic_index_access(_codegen->emit_load(index)); + exp.add_dynamic_index_access(_codegen->emit_load(index_exp)); } } else @@ -1250,10 +1459,10 @@ bool reshadefx::parser::parse_expression_unary(expression &exp) return true; } -bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int left_precedence) +bool reshadefx::parser::parse_expression_multary(expression &lhs_exp, unsigned int left_precedence) { // Parse left hand side of the expression - if (!parse_expression_unary(lhs)) + if (!parse_expression_unary(lhs_exp)) return false; // Check if an operator exists so that this is a binary or ternary expression @@ -1289,12 +1498,12 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l } #endif // Parse the right hand side of the binary operation - expression rhs; - if (!parse_expression_multary(rhs, right_precedence)) + expression rhs_exp; + if (!parse_expression_multary(rhs_exp, right_precedence)) return false; // Deduce the result base type based on implicit conversion rules - type type = type::merge(lhs.type, rhs.type); + type type = type::merge(lhs_exp.type, rhs_exp.type); bool is_bool_result = false; // Do some error checking depending on the operator @@ -1304,8 +1513,11 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l is_bool_result = true; // Cannot check equality between incompatible types - if (lhs.type.is_array() || rhs.type.is_array() || lhs.type.definition != rhs.type.definition) - return error(rhs.location, 3020, "type mismatch"), false; + if (lhs_exp.type.is_array() || rhs_exp.type.is_array() || lhs_exp.type.struct_definition != rhs_exp.type.struct_definition) + { + error(rhs_exp.location, 3020, "type mismatch"); + return false; + } } else if (op == tokenid::ampersand || op == tokenid::pipe || op == tokenid::caret) { @@ -1313,10 +1525,16 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l type.base = type::t_int; // Cannot perform bitwise operations on non-integral types - if (!lhs.type.is_integral()) - return error(lhs.location, 3082, "int or unsigned int type required"), false; - if (!rhs.type.is_integral()) - return error(rhs.location, 3082, "int or unsigned int type required"), false; + if (!lhs_exp.type.is_integral()) + { + error(lhs_exp.location, 3082, "int or unsigned int type required"); + return false; + } + if (!rhs_exp.type.is_integral()) + { + error(rhs_exp.location, 3082, "int or unsigned int type required"); + return false; + } } else { @@ -1328,20 +1546,26 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l type.base = type::t_int; // Arithmetic with boolean values treats the operands as integers // Cannot perform arithmetic operations on non-basic types - if (!lhs.type.is_scalar() && !lhs.type.is_vector() && !lhs.type.is_matrix()) - return error(lhs.location, 3022, "scalar, vector, or matrix expected"), false; - if (!rhs.type.is_scalar() && !rhs.type.is_vector() && !rhs.type.is_matrix()) - return error(rhs.location, 3022, "scalar, vector, or matrix expected"), false; + if (!lhs_exp.type.is_scalar() && !lhs_exp.type.is_vector() && !lhs_exp.type.is_matrix()) + { + error(lhs_exp.location, 3022, "scalar, vector, or matrix expected"); + return false; + } + if (!rhs_exp.type.is_scalar() && !rhs_exp.type.is_vector() && !rhs_exp.type.is_matrix()) + { + error(rhs_exp.location, 3022, "scalar, vector, or matrix expected"); + return false; + } } // Perform implicit type conversion - if (lhs.type.components() > type.components()) - warning(lhs.location, 3206, "implicit truncation of vector type"); - if (rhs.type.components() > type.components()) - warning(rhs.location, 3206, "implicit truncation of vector type"); + if (lhs_exp.type.components() > type.components()) + warning(lhs_exp.location, 3206, "implicit truncation of vector type"); + if (rhs_exp.type.components() > type.components()) + warning(rhs_exp.location, 3206, "implicit truncation of vector type"); - lhs.add_cast_operation(type); - rhs.add_cast_operation(type); + lhs_exp.add_cast_operation(type); + rhs_exp.add_cast_operation(type); #if RESHADEFX_SHORT_CIRCUIT // Reset block to left-hand side since the load of the left-hand side value has to happen in there @@ -1350,10 +1574,10 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l #endif // Constant expressions can be evaluated at compile time - if (rhs.is_constant && lhs.evaluate_constant_expression(op, rhs.constant)) + if (rhs_exp.is_constant && lhs_exp.evaluate_constant_expression(op, rhs_exp.constant)) continue; - const auto lhs_value = _codegen->emit_load(lhs); + const codegen::id lhs_value = _codegen->emit_load(lhs_exp); #if RESHADEFX_SHORT_CIRCUIT // Short circuit for logical && and || operators @@ -1363,38 +1587,41 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l codegen::id condition_value = lhs_value; // Emit "if (!lhs) result = rhs" for || expression if (op == tokenid::pipe_pipe) - condition_value = _codegen->emit_unary_op(lhs.location, tokenid::exclaim, type, lhs_value); + condition_value = _codegen->emit_unary_op(lhs_exp.location, tokenid::exclaim, type, lhs_value); _codegen->leave_block_and_branch_conditional(condition_value, rhs_block, merge_block); _codegen->set_block(rhs_block); // Only load value of right hand side expression after entering the second block - const auto rhs_value = _codegen->emit_load(rhs); + const codegen::id rhs_value = _codegen->emit_load(rhs_exp); _codegen->leave_block_and_branch(merge_block); _codegen->enter_block(merge_block); - const auto result_value = _codegen->emit_phi(lhs.location, condition_value, lhs_block, rhs_value, rhs_block, lhs_value, lhs_block, type); + const codegen::id result_value = _codegen->emit_phi(lhs_exp.location, condition_value, lhs_block, rhs_value, rhs_block, lhs_value, lhs_block, type); - lhs.reset_to_rvalue(lhs.location, result_value, type); + lhs_exp.reset_to_rvalue(lhs_exp.location, result_value, type); continue; } #endif - const auto rhs_value = _codegen->emit_load(rhs); + const codegen::id rhs_value = _codegen->emit_load(rhs_exp); // Certain operations return a boolean type instead of the type of the input expressions if (is_bool_result) type = { type::t_bool, type.rows, type.cols }; - const auto result_value = _codegen->emit_binary_op(lhs.location, op, type, lhs.type, lhs_value, rhs_value); + const codegen::id result_value = _codegen->emit_binary_op(lhs_exp.location, op, type, lhs_exp.type, lhs_value, rhs_value); - lhs.reset_to_rvalue(lhs.location, result_value, type); + lhs_exp.reset_to_rvalue(lhs_exp.location, result_value, type); } else { // A conditional expression needs a scalar or vector type condition - if (!lhs.type.is_scalar() && !lhs.type.is_vector()) - return error(lhs.location, 3022, "boolean or vector expression expected"), false; + if (!lhs_exp.type.is_scalar() && !lhs_exp.type.is_vector()) + { + error(lhs_exp.location, 3022, "boolean or vector expression expected"); + return false; + } #if RESHADEFX_SHORT_CIRCUIT // Switch block to a new one before parsing first part in case it needs to be skipped during short-circuiting @@ -1424,12 +1651,18 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l return false; // Check that the condition dimension matches that of at least one side - if (lhs.type.rows != true_exp.type.rows && lhs.type.cols != true_exp.type.cols) - return error(lhs.location, 3020, "dimension of conditional does not match value"), false; + if (lhs_exp.type.rows != true_exp.type.rows && lhs_exp.type.cols != true_exp.type.cols) + { + error(lhs_exp.location, 3020, "dimension of conditional does not match value"); + return false; + } // Check that the two value expressions can be converted between each other - if (true_exp.type.array_length != false_exp.type.array_length || true_exp.type.definition != false_exp.type.definition) - return error(false_exp.location, 3020, "type mismatch between conditional values"), false; + if (true_exp.type.array_length != false_exp.type.array_length || true_exp.type.struct_definition != false_exp.type.struct_definition) + { + error(false_exp.location, 3020, "type mismatch between conditional values"); + return false; + } // Deduce the result base type based on implicit conversion rules const type type = type::merge(true_exp.type, false_exp.type); @@ -1444,47 +1677,47 @@ bool reshadefx::parser::parse_expression_multary(expression &lhs, unsigned int l _codegen->set_block(condition_block); #else // The conditional operator instruction expects the condition to be a boolean type - lhs.add_cast_operation({ type::t_bool, type.rows, 1 }); + lhs_exp.add_cast_operation({ type::t_bool, type.rows, 1 }); #endif true_exp.add_cast_operation(type); false_exp.add_cast_operation(type); // Load condition value from expression - const auto condition_value = _codegen->emit_load(lhs); + const codegen::id condition_value = _codegen->emit_load(lhs_exp); #if RESHADEFX_SHORT_CIRCUIT _codegen->leave_block_and_branch_conditional(condition_value, true_block, false_block); _codegen->set_block(true_block); // Only load true expression value after entering the first block - const auto true_value = _codegen->emit_load(true_exp); + const codegen::id true_value = _codegen->emit_load(true_exp); true_block = _codegen->leave_block_and_branch(merge_block); _codegen->set_block(false_block); // Only load false expression value after entering the second block - const auto false_value = _codegen->emit_load(false_exp); + const codegen::id false_value = _codegen->emit_load(false_exp); false_block = _codegen->leave_block_and_branch(merge_block); _codegen->enter_block(merge_block); - const auto result_value = _codegen->emit_phi(lhs.location, condition_value, condition_block, true_value, true_block, false_value, false_block, type); + const codegen::id result_value = _codegen->emit_phi(lhs_exp.location, condition_value, condition_block, true_value, true_block, false_value, false_block, type); #else - const auto true_value = _codegen->emit_load(true_exp); - const auto false_value = _codegen->emit_load(false_exp); + const codegen::id true_value = _codegen->emit_load(true_exp); + const codegen::id false_value = _codegen->emit_load(false_exp); - const auto result_value = _codegen->emit_ternary_op(lhs.location, op, type, condition_value, true_value, false_value); + const codegen::id result_value = _codegen->emit_ternary_op(lhs_exp.location, op, type, condition_value, true_value, false_value); #endif - lhs.reset_to_rvalue(lhs.location, result_value, type); + lhs_exp.reset_to_rvalue(lhs_exp.location, result_value, type); } } return true; } -bool reshadefx::parser::parse_expression_assignment(expression &lhs) +bool reshadefx::parser::parse_expression_assignment(expression &lhs_exp) { // Parse left hand side of the expression - if (!parse_expression_multary(lhs)) + if (!parse_expression_multary(lhs_exp)) return false; // Check if an operator exists so that this is an assignment @@ -1495,43 +1728,52 @@ bool reshadefx::parser::parse_expression_assignment(expression &lhs) // Parse right hand side of the assignment expression // This may be another assignment expression to support chains like "a = b = c = 0;" - expression rhs; - if (!parse_expression_assignment(rhs)) + expression rhs_exp; + if (!parse_expression_assignment(rhs_exp)) return false; // Check if the assignment is valid - if (lhs.type.has(type::q_const) || !lhs.is_lvalue) - return error(lhs.location, 3025, "l-value specifies const object"), false; - if (!type::rank(lhs.type, rhs.type)) - return error(rhs.location, 3020, "cannot convert these types (from " + rhs.type.description() + " to " + lhs.type.description() + ')'), false; + if (lhs_exp.type.has(type::q_const) || !lhs_exp.is_lvalue) + { + error(lhs_exp.location, 3025, "l-value specifies const object"); + return false; + } + if (!type::rank(lhs_exp.type, rhs_exp.type)) + { + error(rhs_exp.location, 3020, "cannot convert these types (from " + rhs_exp.type.description() + " to " + lhs_exp.type.description() + ')'); + return false; + } // Cannot perform bitwise operations on non-integral types - if (!lhs.type.is_integral() && (op == tokenid::ampersand_equal || op == tokenid::pipe_equal || op == tokenid::caret_equal)) - return error(lhs.location, 3082, "int or unsigned int type required"), false; + if (!lhs_exp.type.is_integral() && (op == tokenid::ampersand_equal || op == tokenid::pipe_equal || op == tokenid::caret_equal)) + { + error(lhs_exp.location, 3082, "int or unsigned int type required"); + return false; + } // Perform implicit type conversion of right hand side value - if (rhs.type.components() > lhs.type.components()) - warning(rhs.location, 3206, "implicit truncation of vector type"); + if (rhs_exp.type.components() > lhs_exp.type.components()) + warning(rhs_exp.location, 3206, "implicit truncation of vector type"); - rhs.add_cast_operation(lhs.type); + rhs_exp.add_cast_operation(lhs_exp.type); - auto result = _codegen->emit_load(rhs); + codegen::id result_value = _codegen->emit_load(rhs_exp); // Check if this is an assignment with an additional arithmetic instruction if (op != tokenid::equal) { // Load value for modification - const auto value = _codegen->emit_load(lhs); + const codegen::id lhs_value = _codegen->emit_load(lhs_exp); // Handle arithmetic assignment operation - result = _codegen->emit_binary_op(lhs.location, op, lhs.type, value, result); + result_value = _codegen->emit_binary_op(lhs_exp.location, op, lhs_exp.type, lhs_value, result_value); } // Write result back to variable - _codegen->emit_store(lhs, result); + _codegen->emit_store(lhs_exp, result_value); // Return the result value since you can write assignments within expressions - lhs.reset_to_rvalue(lhs.location, result, lhs.type); + lhs_exp.reset_to_rvalue(lhs_exp.location, result_value, lhs_exp.type); } return true; diff --git a/dep/reshadefx/src/effect_parser_stmt.cpp b/dep/reshadefx/src/effect_parser_stmt.cpp index f447adb06..ff85d8e09 100644 --- a/dep/reshadefx/src/effect_parser_stmt.cpp +++ b/dep/reshadefx/src/effect_parser_stmt.cpp @@ -8,22 +8,25 @@ #include "effect_codegen.hpp" #include // std::toupper #include +#include +#include // std::max, std::replace, std::transform #include -#include #include -struct on_scope_exit +template +struct scope_guard { - template - explicit on_scope_exit(F lambda) : leave(lambda) { } - ~on_scope_exit() { leave(); } + explicit scope_guard(ENTER_TYPE &&enter_lambda, LEAVE_TYPE &&leave_lambda) : + leave_lambda(std::forward(leave_lambda)) { enter_lambda(); } + ~scope_guard() { leave_lambda(); } - std::function leave; +private: + LEAVE_TYPE leave_lambda; }; bool reshadefx::parser::parse(std::string input, codegen *backend) { - _lexer.reset(new lexer(std::move(input))); + _lexer = std::make_unique(std::move(input)); // Set backend for subsequent code-generation _codegen = backend; @@ -36,32 +39,30 @@ bool reshadefx::parser::parse(std::string input, codegen *backend) while (!peek(tokenid::end_of_file)) { - parse_top(current_success); + if (!parse_top(current_success)) + return false; if (!current_success) parse_success = false; } + if (parse_success) + backend->optimize_bindings(); + return parse_success; } -void reshadefx::parser::parse_top(bool &parse_success) +bool reshadefx::parser::parse_top(bool &parse_success) { if (accept(tokenid::namespace_)) { // Anonymous namespaces are not supported right now, so an identifier is a must if (!expect(tokenid::identifier)) - { - parse_success = false; - return; - } + return false; - const auto name = std::move(_token.literal_as_string); + const std::string name = std::move(_token.literal_as_string); if (!expect('{')) - { - parse_success = false; - return; - } + return false; enter_namespace(name); @@ -71,7 +72,8 @@ void reshadefx::parser::parse_top(bool &parse_success) // Recursively parse top level statements until the namespace is closed again while (!peek('}')) // Empty namespaces are valid { - parse_top(current_success); + if (!parse_top(current_success)) + return false; if (!current_success) parse_success_namespace = false; } @@ -91,45 +93,124 @@ void reshadefx::parser::parse_top(bool &parse_success) } else { - if (type type; parse_type(type)) // Type found, this can be either a variable or a function declaration + location attribute_location; + shader_type stype = shader_type::unknown; + int num_threads[3] = { 0, 0, 0 }; + + // Read any function attributes first + while (accept('[')) + { + if (!expect(tokenid::identifier)) + return false; + + const std::string attribute = std::move(_token.literal_as_string); + + if (attribute == "shader") + { + attribute_location = _token_next.location; + + if (!expect('(') || !expect(tokenid::string_literal)) + return false; + + if (_token.literal_as_string == "vertex") + stype = shader_type::vertex; + else if (_token.literal_as_string == "pixel") + stype = shader_type::pixel; + else if (_token.literal_as_string == "compute") + stype = shader_type::compute; + + if (!expect(')')) + return false; + } + else if (attribute == "numthreads") + { + attribute_location = _token_next.location; + + expression x, y, z; + if (!expect('(') || !parse_expression_multary(x, 8) || !expect(',') || !parse_expression_multary(y, 8) || !expect(',') || !parse_expression_multary(z, 8) || !expect(')')) + return false; + + if (!x.is_constant) + { + error(x.location, 3011, "value must be a literal expression"); + parse_success = false; + } + if (!y.is_constant) + { + error(y.location, 3011, "value must be a literal expression"); + parse_success = false; + } + if (!z.is_constant) + { + error(z.location, 3011, "value must be a literal expression"); + parse_success = false; + } + x.add_cast_operation({ type::t_int, 1, 1 }); + y.add_cast_operation({ type::t_int, 1, 1 }); + z.add_cast_operation({ type::t_int, 1, 1 }); + num_threads[0] = x.constant.as_int[0]; + num_threads[1] = y.constant.as_int[0]; + num_threads[2] = z.constant.as_int[0]; + } + else + { + warning(_token.location, 0, "unknown attribute '" + attribute + "'"); + } + + if (!expect(']')) + return false; + } + + if (type type = {}; parse_type(type)) // Type found, this can be either a variable or a function declaration { parse_success = expect(tokenid::identifier); if (!parse_success) - return; + return true; if (peek('(')) { - const auto name = std::move(_token.literal_as_string); + const std::string name = std::move(_token.literal_as_string); + // This is definitely a function declaration, so parse it - if (!parse_function(type, name)) + if (!parse_function(type, name, stype, num_threads)) { // Insert dummy function into symbol table, so later references can be resolved despite the error - insert_symbol(name, { symbol_type::function, ~0u, { type::t_function } }, true); + insert_symbol(name, { symbol_type::function, UINT32_MAX, { type::t_function } }, true); parse_success = false; - return; + return true; } } else { + if (!attribute_location.source.empty()) + { + error(attribute_location, 0, "attribute is valid only on functions"); + parse_success = false; + } + // There may be multiple variable names after the type, handle them all unsigned int count = 0; - do { + do + { if (count++ > 0 && !(expect(',') && expect(tokenid::identifier))) { parse_success = false; - return; + return false; } - const auto name = std::move(_token.literal_as_string); + + const std::string name = std::move(_token.literal_as_string); + if (!parse_variable(type, name, true)) { // Insert dummy variable into symbol table, so later references can be resolved despite the error - insert_symbol(name, { symbol_type::variable, ~0u, type }, true); + insert_symbol(name, { symbol_type::variable, UINT32_MAX, type }, true); // Skip the rest of the statement consume_until(';'); parse_success = false; - return; + return true; } - } while (!peek(';')); + } + while (!peek(';')); // Variable declarations are terminated with a semicolon parse_success = expect(';'); @@ -150,12 +231,17 @@ void reshadefx::parser::parse_top(bool &parse_success) parse_success = false; } } + + return true; } bool reshadefx::parser::parse_statement(bool scoped) { if (!_codegen->is_in_block()) - return error(_token_next.location, 0, "unreachable code"), false; + { + error(_token_next.location, 0, "unreachable code"); + return false; + } unsigned int loop_control = 0; unsigned int selection_control = 0; @@ -173,7 +259,7 @@ bool reshadefx::parser::parse_statement(bool scoped) switch_call = (0x8 << 4) }; - const auto attribute = std::move(_token_next.literal_as_string); + const std::string attribute = std::move(_token_next.literal_as_string); if (!expect(tokenid::identifier) || !expect(']')) return false; @@ -191,12 +277,18 @@ bool reshadefx::parser::parse_statement(bool scoped) else if (attribute == "call") selection_control |= switch_call; else - warning(_token.location, 0, "unknown attribute"); + warning(_token.location, 0, "unknown attribute '" + attribute + "'"); if ((loop_control & (unroll | dont_unroll)) == (unroll | dont_unroll)) - return error(_token.location, 3524, "can't use loop and unroll attributes together"), false; + { + error(_token.location, 3524, "can't use loop and unroll attributes together"); + return false; + } if ((selection_control & (flatten | dont_flatten)) == (flatten | dont_flatten)) - return error(_token.location, 3524, "can't use branch and flatten attributes together"), false; + { + error(_token.location, 3524, "can't use branch and flatten attributes together"); + return false; + } } // Shift by two so that the possible values are 0x01 for 'flatten' and 0x02 for 'dont_flatten', equivalent to 'unroll' and 'dont_unroll' @@ -204,15 +296,14 @@ bool reshadefx::parser::parse_statement(bool scoped) if (peek('{')) // Parse statement block return parse_statement_block(scoped); - else if (accept(';')) // Ignore empty statements + + if (accept(';')) // Ignore empty statements return true; // Most statements with the exception of declarations are only valid inside functions if (_codegen->is_in_function()) { - assert(_current_function != nullptr); - - const auto location = _token_next.location; + const location statement_location = _token_next.location; if (accept(tokenid::if_)) { @@ -220,16 +311,20 @@ bool reshadefx::parser::parse_statement(bool scoped) codegen::id false_block = _codegen->create_block(); // Block which contains the statements executed when the condition is false const codegen::id merge_block = _codegen->create_block(); // Block that is executed after the branch re-merged with the current control flow - expression condition; - if (!expect('(') || !parse_expression(condition) || !expect(')')) + expression condition_exp; + if (!expect('(') || !parse_expression(condition_exp) || !expect(')')) return false; - else if (!condition.type.is_scalar()) - return error(condition.location, 3019, "if statement conditional expressions must evaluate to a scalar"), false; - // Load condition and convert to boolean value as required by 'OpBranchConditional' - condition.add_cast_operation({ type::t_bool, 1, 1 }); + if (!condition_exp.type.is_scalar()) + { + error(condition_exp.location, 3019, "if statement conditional expressions must evaluate to a scalar"); + return false; + } - const codegen::id condition_value = _codegen->emit_load(condition); + // Load condition and convert to boolean value as required by 'OpBranchConditional' in SPIR-V + condition_exp.add_cast_operation({ type::t_bool, 1, 1 }); + + const codegen::id condition_value = _codegen->emit_load(condition_exp); const codegen::id condition_block = _codegen->leave_block_and_branch_conditional(condition_value, true_block, false_block); { // Then block of the if statement @@ -252,7 +347,7 @@ bool reshadefx::parser::parse_statement(bool scoped) _codegen->enter_block(merge_block); // Emit structured control flow for an if statement and connect all basic blocks - _codegen->emit_if(location, condition_value, condition_block, true_block, false_block, selection_control); + _codegen->emit_if(statement_location, condition_value, condition_block, true_block, false_block, selection_control); return true; } @@ -264,20 +359,29 @@ bool reshadefx::parser::parse_statement(bool scoped) expression selector_exp; if (!expect('(') || !parse_expression(selector_exp) || !expect(')')) return false; - else if (!selector_exp.type.is_scalar()) - return error(selector_exp.location, 3019, "switch statement expression must evaluate to a scalar"), false; + + if (!selector_exp.type.is_scalar()) + { + error(selector_exp.location, 3019, "switch statement expression must evaluate to a scalar"); + return false; + } // Load selector and convert to integral value as required by switch instruction selector_exp.add_cast_operation({ type::t_int, 1, 1 }); - const auto selector_value = _codegen->emit_load(selector_exp); - const auto selector_block = _codegen->leave_block_and_switch(selector_value, merge_block); + const codegen::id selector_value = _codegen->emit_load(selector_exp); + const codegen::id selector_block = _codegen->leave_block_and_switch(selector_value, merge_block); if (!expect('{')) return false; - _loop_break_target_stack.push_back(merge_block); - on_scope_exit _([this]() { _loop_break_target_stack.pop_back(); }); + scope_guard _( + [this, merge_block]() { + _loop_break_target_stack.push_back(merge_block); + }, + [this]() { + _loop_break_target_stack.pop_back(); + }); bool parse_success = true; // The default case jumps to the end of the switch statement if not overwritten @@ -297,9 +401,17 @@ bool reshadefx::parser::parse_statement(bool scoped) { expression case_label; if (!parse_expression(case_label)) - return consume_until('}'), false; - else if (!case_label.type.is_scalar() || !case_label.type.is_integral() || !case_label.is_constant) - return error(case_label.location, 3020, "invalid type for case expression - value must be an integer scalar"), consume_until('}'), false; + { + consume_until('}'); + return false; + } + + if (!case_label.type.is_scalar() || !case_label.type.is_integral() || !case_label.is_constant) + { + error(case_label.location, 3020, "invalid type for case expression - value must be an integer scalar"); + consume_until('}'); + return false; + } // Check for duplicate case values for (size_t i = 0; i < case_literal_and_labels.size(); i += 2) @@ -330,14 +442,20 @@ bool reshadefx::parser::parse_statement(bool scoped) } if (!expect(':')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } } // It is valid for no statement to follow if this is the last label in the switch body const bool end_of_switch = peek('}'); if (!end_of_switch && !parse_statement(true)) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } // Handle fall-through case and end of switch statement if (peek(tokenid::case_) || peek(tokenid::default_) || end_of_switch) @@ -369,10 +487,10 @@ bool reshadefx::parser::parse_statement(bool scoped) } if (case_literal_and_labels.empty() && default_label == merge_block) - warning(location, 5002, "switch statement contains no 'case' or 'default' labels"); + warning(statement_location, 5002, "switch statement contains no 'case' or 'default' labels"); // Emit structured control flow for a switch statement and connect all basic blocks - _codegen->emit_switch(location, selector_value, selector_block, default_label, default_block, case_literal_and_labels, case_blocks, selection_control); + _codegen->emit_switch(statement_location, selector_value, selector_block, default_label, default_block, case_literal_and_labels, case_blocks, selection_control); return expect('}') && parse_success; } @@ -382,27 +500,32 @@ bool reshadefx::parser::parse_statement(bool scoped) if (!expect('(')) return false; - enter_scope(); - on_scope_exit _([this]() { leave_scope(); }); + scope_guard _( + [this]() { enter_scope(); }, + [this]() { leave_scope(); }); // Parse initializer first - if (type type; parse_type(type)) + if (type type = {}; parse_type(type)) { unsigned int count = 0; - do { // There may be multiple declarations behind a type, so loop through them + do + { + // There may be multiple declarations behind a type, so loop through them if (count++ > 0 && !expect(',')) return false; + if (!expect(tokenid::identifier) || !parse_variable(type, std::move(_token.literal_as_string))) return false; - } while (!peek(';')); + } + while (!peek(';')); } else { // Initializer can also contain an expression if not a variable declaration list and not empty if (!peek(';')) { - expression expression; - if (!parse_expression(expression)) + expression initializer_exp; + if (!parse_expression(initializer_exp)) return false; } } @@ -431,18 +554,20 @@ bool reshadefx::parser::parse_statement(bool scoped) if (!peek(';')) { - expression condition; - if (!parse_expression(condition)) + expression condition_exp; + if (!parse_expression(condition_exp)) return false; - if (!condition.type.is_scalar()) - return error(condition.location, 3019, "scalar value expected"), false; + if (!condition_exp.type.is_scalar()) + { + error(condition_exp.location, 3019, "scalar value expected"); + return false; + } // Evaluate condition and branch to the right target - condition.add_cast_operation({ type::t_bool, 1, 1 }); - - condition_value = _codegen->emit_load(condition); + condition_exp.add_cast_operation({ type::t_bool, 1, 1 }); + condition_value = _codegen->emit_load(condition_exp); condition_block = _codegen->leave_block_and_branch_conditional(condition_value, loop_block, merge_block); } else // It is valid for there to be no condition expression @@ -492,15 +617,16 @@ bool reshadefx::parser::parse_statement(bool scoped) _codegen->enter_block(merge_block); // Emit structured control flow for a loop statement and connect all basic blocks - _codegen->emit_loop(location, condition_value, prev_block, header_label, condition_block, loop_block, continue_label, loop_control); + _codegen->emit_loop(statement_location, condition_value, prev_block, header_label, condition_block, loop_block, continue_label, loop_control); return true; } if (accept(tokenid::while_)) { - enter_scope(); - on_scope_exit _([this]() { leave_scope(); }); + scope_guard _( + [this]() { enter_scope(); }, + [this]() { leave_scope(); }); const codegen::id merge_block = _codegen->create_block(); const codegen::id header_label = _codegen->create_block(); @@ -521,17 +647,20 @@ bool reshadefx::parser::parse_statement(bool scoped) { // Parse condition block _codegen->enter_block(condition_block); - expression condition; - if (!expect('(') || !parse_expression(condition) || !expect(')')) + expression condition_exp; + if (!expect('(') || !parse_expression(condition_exp) || !expect(')')) return false; - else if (!condition.type.is_scalar()) - return error(condition.location, 3019, "scalar value expected"), false; + + if (!condition_exp.type.is_scalar()) + { + error(condition_exp.location, 3019, "scalar value expected"); + return false; + } // Evaluate condition and branch to the right target - condition.add_cast_operation({ type::t_bool, 1, 1 }); - - condition_value = _codegen->emit_load(condition); + condition_exp.add_cast_operation({ type::t_bool, 1, 1 }); + condition_value = _codegen->emit_load(condition_exp); condition_block = _codegen->leave_block_and_branch_conditional(condition_value, loop_block, merge_block); } @@ -562,7 +691,7 @@ bool reshadefx::parser::parse_statement(bool scoped) _codegen->enter_block(merge_block); // Emit structured control flow for a loop statement and connect all basic blocks - _codegen->emit_loop(location, condition_value, prev_block, header_label, condition_block, loop_block, continue_label, loop_control); + _codegen->emit_loop(statement_location, condition_value, prev_block, header_label, condition_block, loop_block, continue_label, loop_control); return true; } @@ -604,16 +733,20 @@ bool reshadefx::parser::parse_statement(bool scoped) { // Continue block does the condition evaluation _codegen->enter_block(continue_label); - expression condition; - if (!expect(tokenid::while_) || !expect('(') || !parse_expression(condition) || !expect(')') || !expect(';')) + expression condition_exp; + if (!expect(tokenid::while_) || !expect('(') || !parse_expression(condition_exp) || !expect(')') || !expect(';')) return false; - else if (!condition.type.is_scalar()) - return error(condition.location, 3019, "scalar value expected"), false; + + if (!condition_exp.type.is_scalar()) + { + error(condition_exp.location, 3019, "scalar value expected"); + return false; + } // Evaluate condition and branch to the right target - condition.add_cast_operation({ type::t_bool, 1, 1 }); + condition_exp.add_cast_operation({ type::t_bool, 1, 1 }); - condition_value = _codegen->emit_load(condition); + condition_value = _codegen->emit_load(condition_exp); _codegen->leave_block_and_branch_conditional(condition_value, header_label, merge_block); } @@ -622,7 +755,7 @@ bool reshadefx::parser::parse_statement(bool scoped) _codegen->enter_block(merge_block); // Emit structured control flow for a loop statement and connect all basic blocks - _codegen->emit_loop(location, condition_value, prev_block, header_label, 0, loop_block, continue_label, loop_control); + _codegen->emit_loop(statement_location, condition_value, prev_block, header_label, 0, loop_block, continue_label, loop_control); return true; } @@ -630,7 +763,10 @@ bool reshadefx::parser::parse_statement(bool scoped) if (accept(tokenid::break_)) { if (_loop_break_target_stack.empty()) - return error(location, 3518, "break must be inside loop"), false; + { + error(statement_location, 3518, "break must be inside loop"); + return false; + } // Branch to the break target of the inner most loop on the stack _codegen->leave_block_and_branch(_loop_break_target_stack.back(), 1); @@ -641,7 +777,10 @@ bool reshadefx::parser::parse_statement(bool scoped) if (accept(tokenid::continue_)) { if (_loop_continue_target_stack.empty()) - return error(location, 3519, "continue must be inside loop"), false; + { + error(statement_location, 3519, "continue must be inside loop"); + return false; + } // Branch to the continue target of the inner most loop on the stack _codegen->leave_block_and_branch(_loop_continue_target_stack.back(), 2); @@ -651,37 +790,48 @@ bool reshadefx::parser::parse_statement(bool scoped) if (accept(tokenid::return_)) { - const type &ret_type = _current_function->return_type; + const type &return_type = _codegen->_current_function->return_type; if (!peek(';')) { - expression expression; - if (!parse_expression(expression)) - return consume_until(';'), false; + expression return_exp; + if (!parse_expression(return_exp)) + { + consume_until(';'); + return false; + } // Cannot return to void - if (ret_type.is_void()) + if (return_type.is_void()) + { + error(statement_location, 3079, "void functions cannot return a value"); // Consume the semicolon that follows the return expression so that parsing may continue - return error(location, 3079, "void functions cannot return a value"), accept(';'), false; + accept(';'); + return false; + } // Cannot return arrays from a function - if (expression.type.is_array() || !type::rank(expression.type, ret_type)) - return error(location, 3017, "expression (" + expression.type.description() + ") does not match function return type (" + ret_type.description() + ')'), accept(';'), false; + if (return_exp.type.is_array() || !type::rank(return_exp.type, return_type)) + { + error(statement_location, 3017, "expression (" + return_exp.type.description() + ") does not match function return type (" + return_type.description() + ')'); + accept(';'); + return false; + } // Load return value and perform implicit cast to function return type - if (expression.type.components() > ret_type.components()) - warning(expression.location, 3206, "implicit truncation of vector type"); + if (return_exp.type.components() > return_type.components()) + warning(return_exp.location, 3206, "implicit truncation of vector type"); - expression.add_cast_operation(ret_type); + return_exp.add_cast_operation(return_type); - const auto return_value = _codegen->emit_load(expression); + const codegen::id return_value = _codegen->emit_load(return_exp); _codegen->leave_block_and_return(return_value); } - else if (!ret_type.is_void()) + else if (!return_type.is_void()) { // No return value was found, but the function expects one - error(location, 3080, "function must return a value"); + error(statement_location, 3080, "function must return a value"); // Consume the semicolon that follows the return expression so that parsing may continue accept(';'); @@ -706,22 +856,33 @@ bool reshadefx::parser::parse_statement(bool scoped) } // Handle variable declarations - if (type type; parse_type(type)) + if (type type = {}; parse_type(type)) { unsigned int count = 0; - do { // There may be multiple declarations behind a type, so loop through them + do + { + // There may be multiple declarations behind a type, so loop through them if (count++ > 0 && !expect(',')) + { // Try to consume the rest of the declaration so that parsing may continue despite the error - return consume_until(';'), false; + consume_until(';'); + return false; + } + if (!expect(tokenid::identifier) || !parse_variable(type, std::move(_token.literal_as_string))) - return consume_until(';'), false; - } while (!peek(';')); + { + consume_until(';'); + return false; + } + } + while (!peek(';')); return expect(';'); } // Handle expression statements - if (expression expression; parse_expression(expression)) + expression statement_exp; + if (parse_expression(statement_exp)) return expect(';'); // A statement has to be terminated with a semicolon // Gracefully consume any remaining characters until the statement would usually end, so that parsing may continue despite the error @@ -778,20 +939,23 @@ bool reshadefx::parser::parse_statement_block(bool scoped) bool reshadefx::parser::parse_type(type &type) { type.qualifiers = 0; - accept_type_qualifiers(type); if (!accept_type_class(type)) return false; if (type.is_integral() && (type.has(type::q_centroid) || type.has(type::q_noperspective))) - return error(_token.location, 4576, "signature specifies invalid interpolation mode for integer component type"), false; - else if (type.has(type::q_centroid) && !type.has(type::q_noperspective)) + { + error(_token.location, 4576, "signature specifies invalid interpolation mode for integer component type"); + return false; + } + + if (type.has(type::q_centroid) && !type.has(type::q_noperspective)) type.qualifiers |= type::q_linear; return true; } -bool reshadefx::parser::parse_array_size(type &type) +bool reshadefx::parser::parse_array_length(type &type) { // Reset array length to zero before checking if one exists type.array_length = 0; @@ -800,18 +964,24 @@ bool reshadefx::parser::parse_array_size(type &type) { if (accept(']')) { - // No length expression, so this is an unsized array - type.array_length = -1; + // No length expression, so this is an unbounded array + type.array_length = 0xFFFFFFFF; } - else if (expression expression; parse_expression(expression) && expect(']')) + else if (expression length_exp; parse_expression(length_exp) && expect(']')) { - if (!expression.is_constant || !(expression.type.is_scalar() && expression.type.is_integral())) - return error(expression.location, 3058, "array dimensions must be literal scalar expressions"), false; + if (!length_exp.is_constant || !(length_exp.type.is_scalar() && length_exp.type.is_integral())) + { + error(length_exp.location, 3058, "array dimensions must be literal scalar expressions"); + return false; + } - type.array_length = expression.constant.as_uint[0]; + type.array_length = length_exp.constant.as_uint[0]; if (type.array_length < 1 || type.array_length > 65536) - return error(expression.location, 3059, "array dimension must be between 1 and 65536"), false; + { + error(length_exp.location, 3059, "array dimension must be between 1 and 65536"); + return false; + } } else { @@ -821,7 +991,10 @@ bool reshadefx::parser::parse_array_size(type &type) // Multi-dimensional arrays are not supported if (peek('[')) - return error(_token_next.location, 3119, "arrays cannot be multi-dimensional"), false; + { + error(_token_next.location, 3119, "arrays cannot be multi-dimensional"); + return false; + } return true; } @@ -836,21 +1009,33 @@ bool reshadefx::parser::parse_annotations(std::vector &annotations) while (!peek('>')) { - if (type type; accept_type_class(type)) + if (type type /* = {} */; accept_type_class(type)) warning(_token.location, 4717, "type prefixes for annotations are deprecated and ignored"); if (!expect(tokenid::identifier)) - return consume_until('>'), false; + { + consume_until('>'); + return false; + } - auto name = std::move(_token.literal_as_string); + std::string name = std::move(_token.literal_as_string); - if (expression expression; !expect('=') || !parse_expression_multary(expression) || !expect(';')) - return consume_until('>'), false; - else if (expression.is_constant) - annotations.push_back({ expression.type, std::move(name), std::move(expression.constant) }); + expression annotation_exp; + if (!expect('=') || !parse_expression_multary(annotation_exp) || !expect(';')) + { + consume_until('>'); + return false; + } + + if (annotation_exp.is_constant) + { + annotations.push_back({ annotation_exp.type, std::move(name), std::move(annotation_exp.constant) }); + } else // Continue parsing annotations despite this not being a constant, since the syntax is still correct - parse_success = false, - error(expression.location, 3011, "value must be a literal expression"); + { + parse_success = false; + error(annotation_exp.location, 3011, "value must be a literal expression"); + } } return expect('>') && parse_success; @@ -858,14 +1043,14 @@ bool reshadefx::parser::parse_annotations(std::vector &annotations) bool reshadefx::parser::parse_struct() { - const auto location = std::move(_token.location); + const location struct_location = std::move(_token.location); - struct_info info; + struct_type info; // The structure name is optional if (accept(tokenid::identifier)) info.name = std::move(_token.literal_as_string); else - info.name = "_anonymous_struct_" + std::to_string(location.line) + '_' + std::to_string(location.column); + info.name = "_anonymous_struct_" + std::to_string(struct_location.line) + '_' + std::to_string(struct_location.column); info.unique_name = 'S' + current_scope().name + info.name; std::replace(info.unique_name.begin(), info.unique_name.end(), ':', '_'); @@ -877,60 +1062,94 @@ bool reshadefx::parser::parse_struct() while (!peek('}')) // Empty structures are possible { - struct_member_info member; + member_type member; if (!parse_type(member.type)) - return error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected struct member type"), consume_until('}'), accept(';'), false; + { + error(_token_next.location, 3000, "syntax error: unexpected '" + token::id_to_name(_token_next.id) + "', expected struct member type"); + consume_until('}'); + accept(';'); + return false; + } unsigned int count = 0; - do { - if (count++ > 0 && !expect(',')) - return consume_until('}'), accept(';'), false; - - if (!expect(tokenid::identifier)) - return consume_until('}'), accept(';'), false; + do + { + if ((count++ > 0 && !expect(',')) || !expect(tokenid::identifier)) + { + consume_until('}'); + accept(';'); + return false; + } member.name = std::move(_token.literal_as_string); member.location = std::move(_token.location); if (member.type.is_void()) - parse_success = false, + { + parse_success = false; error(member.location, 3038, '\'' + member.name + "': struct members cannot be void"); + } if (member.type.is_struct()) // Nesting structures would make input/output argument flattening more complicated, so prevent it for now - parse_success = false, + { + parse_success = false; error(member.location, 3090, '\'' + member.name + "': nested struct members are not supported"); + } if (member.type.has(type::q_in) || member.type.has(type::q_out)) - parse_success = false, + { + parse_success = false; error(member.location, 3055, '\'' + member.name + "': struct members cannot be declared 'in' or 'out'"); + } if (member.type.has(type::q_const)) - parse_success = false, + { + parse_success = false; error(member.location, 3035, '\'' + member.name + "': struct members cannot be declared 'const'"); + } if (member.type.has(type::q_extern)) - parse_success = false, + { + parse_success = false; error(member.location, 3006, '\'' + member.name + "': struct members cannot be declared 'extern'"); + } if (member.type.has(type::q_static)) - parse_success = false, + { + parse_success = false; error(member.location, 3007, '\'' + member.name + "': struct members cannot be declared 'static'"); + } if (member.type.has(type::q_uniform)) - parse_success = false, + { + parse_success = false; error(member.location, 3047, '\'' + member.name + "': struct members cannot be declared 'uniform'"); + } if (member.type.has(type::q_groupshared)) - parse_success = false, + { + parse_success = false; error(member.location, 3010, '\'' + member.name + "': struct members cannot be declared 'groupshared'"); + } // Modify member specific type, so that following members in the declaration list are not affected by this - if (!parse_array_size(member.type)) - return consume_until('}'), accept(';'), false; - else if (member.type.array_length < 0) - parse_success = false, + if (!parse_array_length(member.type)) + { + consume_until('}'); + accept(';'); + return false; + } + + if (member.type.is_unbounded_array()) + { + parse_success = false; error(member.location, 3072, '\'' + member.name + "': array dimensions of struct members must be explicit"); + } // Structure members may have semantics to use them as input/output types if (accept(':')) { if (!expect(tokenid::identifier)) - return consume_until('}'), accept(';'), false; + { + consume_until('}'); + accept(';'); + return false; + } member.semantic = std::move(_token.literal_as_string); // Make semantic upper case to simplify comparison later on @@ -959,57 +1178,76 @@ bool reshadefx::parser::parse_struct() } } - // Save member name and type for book keeping + // Save member name and type for bookkeeping info.member_list.push_back(member); - } while (!peek(';')); + } + while (!peek(';')); if (!expect(';')) - return consume_until('}'), accept(';'), false; + { + consume_until('}'); + accept(';'); + return false; + } } // Empty structures are valid, but not usually intended, so emit a warning if (info.member_list.empty()) - warning(location, 5001, "struct has no members"); + warning(struct_location, 5001, "struct has no members"); // Define the structure now that information about all the member types was gathered - const auto id = _codegen->define_struct(location, info); + const codegen::id id = _codegen->define_struct(struct_location, info); // Insert the symbol into the symbol table - const symbol symbol = { symbol_type::structure, id }; + symbol symbol = { symbol_type::structure, id }; if (!insert_symbol(info.name, symbol, true)) - return error(location, 3003, "redefinition of '" + info.name + '\''), false; + { + error(struct_location, 3003, "redefinition of '" + info.name + '\''); + return false; + } return expect('}') && parse_success; } -bool reshadefx::parser::parse_function(type type, std::string name) +bool reshadefx::parser::parse_function(type type, std::string name, shader_type stype, int num_threads[3]) { - const auto location = std::move(_token.location); + const location function_location = std::move(_token.location); if (!expect('(')) // Functions always have a parameter list return false; - if (type.qualifiers != 0) - return error(location, 3047, '\'' + name + "': function return type cannot have any qualifiers"), false; - function_info info; + if (type.qualifiers != 0) + { + error(function_location, 3047, '\'' + name + "': function return type cannot have any qualifiers"); + return false; + } + + function info; info.name = name; info.unique_name = 'F' + current_scope().name + name; std::replace(info.unique_name.begin(), info.unique_name.end(), ':', '_'); info.return_type = type; - _current_function = &info; + info.type = stype; + info.num_threads[0] = num_threads[0]; + info.num_threads[1] = num_threads[1]; + info.num_threads[2] = num_threads[2]; + + _codegen->_current_function = &info; bool parse_success = true; bool expect_parenthesis = true; - // Enter function scope (and leave it again when finished parsing this function) - enter_scope(); - on_scope_exit _([this]() { - leave_scope(); - _codegen->leave_function(); - _current_function = nullptr; - }); + // Enter function scope (and leave it again when parsing this function finished) + scope_guard _( + [this]() { + enter_scope(); + }, + [this]() { + leave_scope(); + _codegen->leave_function(); + }); while (!peek(')')) { @@ -1021,7 +1259,7 @@ bool reshadefx::parser::parse_function(type type, std::string name) break; } - struct_member_info param; + member_type param; if (!parse_type(param.type)) { @@ -1044,39 +1282,56 @@ bool reshadefx::parser::parse_function(type type, std::string name) param.location = std::move(_token.location); if (param.type.is_void()) - parse_success = false, + { + parse_success = false; error(param.location, 3038, '\'' + param.name + "': function parameters cannot be void"); + } if (param.type.has(type::q_extern)) - parse_success = false, + { + parse_success = false; error(param.location, 3006, '\'' + param.name + "': function parameters cannot be declared 'extern'"); + } if (param.type.has(type::q_static)) - parse_success = false, + { + parse_success = false; error(param.location, 3007, '\'' + param.name + "': function parameters cannot be declared 'static'"); + } if (param.type.has(type::q_uniform)) - parse_success = false, + { + parse_success = false; error(param.location, 3047, '\'' + param.name + "': function parameters cannot be declared 'uniform', consider placing in global scope instead"); + } if (param.type.has(type::q_groupshared)) - parse_success = false, + { + parse_success = false; error(param.location, 3010, '\'' + param.name + "': function parameters cannot be declared 'groupshared'"); + } if (param.type.has(type::q_out) && param.type.has(type::q_const)) - parse_success = false, + { + parse_success = false; error(param.location, 3046, '\'' + param.name + "': output parameters cannot be declared 'const'"); + } else if (!param.type.has(type::q_out)) - param.type.qualifiers |= type::q_in; // Function parameters are implicitly 'in' if not explicitly defined as 'out' + { + // Function parameters are implicitly 'in' if not explicitly defined as 'out' + param.type.qualifiers |= type::q_in; + } - if (!parse_array_size(param.type)) + if (!parse_array_length(param.type)) { parse_success = false; expect_parenthesis = false; consume_until(')'); break; } - else if (param.type.array_length < 0) + + if (param.type.is_unbounded_array()) { parse_success = false; error(param.location, 3072, '\'' + param.name + "': array dimensions of function parameters must be explicit"); + param.type.array_length = 0; } // Handle parameter type semantic @@ -1117,6 +1372,38 @@ bool reshadefx::parser::parse_function(type type, std::string name) } } + // Handle default argument + if (accept('=')) + { + expression default_value_exp; + if (!parse_expression_multary(default_value_exp)) + { + parse_success = false; + expect_parenthesis = false; + consume_until(')'); + break; + } + + default_value_exp.add_cast_operation(param.type); + + if (!default_value_exp.is_constant) + { + parse_success = false; + error(default_value_exp.location, 3011, '\'' + param.name + "': value must be a literal expression"); + } + + param.default_value = std::move(default_value_exp.constant); + param.has_default_value = true; + } + else + { + if (!info.parameter_list.empty() && info.parameter_list.back().has_default_value) + { + parse_success = false; + error(param.location, 3044, '\'' + name + "': missing default value for parameter '" + param.name + '\''); + } + } + info.parameter_list.push_back(std::move(param)); } @@ -1128,8 +1415,12 @@ bool reshadefx::parser::parse_function(type type, std::string name) { if (!expect(tokenid::identifier)) return false; + if (type.is_void()) - return error(_token.location, 3076, '\'' + name + "': void function cannot have a semantic"), false; + { + error(_token.location, 3076, '\'' + name + "': void function cannot have a semantic"); + return false; + } info.return_semantic = std::move(_token.literal_as_string); // Make semantic upper case to simplify comparison later on @@ -1141,21 +1432,32 @@ bool reshadefx::parser::parse_function(type type, std::string name) // Check if this is a function declaration without a body if (accept(';')) - return error(location, 3510, '\'' + name + "': function is missing an implementation"), false; + { + error(function_location, 3510, '\'' + name + "': function is missing an implementation"); + return false; + } // Define the function now that information about the declaration was gathered - const auto id = _codegen->define_function(location, info); + const codegen::id id = _codegen->define_function(function_location, info); // Insert the function and parameter symbols into the symbol table and update current function pointer to the permanent one symbol symbol = { symbol_type::function, id, { type::t_function } }; - symbol.function = _current_function = &_codegen->get_function(id); + symbol.function = &_codegen->get_function(id); if (!insert_symbol(name, symbol, true)) - return error(location, 3003, "redefinition of '" + name + '\''), false; + { + error(function_location, 3003, "redefinition of '" + name + '\''); + return false; + } - for (const struct_member_info ¶m : info.parameter_list) - if (!insert_symbol(param.name, { symbol_type::variable, param.definition, param.type })) - return error(param.location, 3003, "redefinition of '" + param.name + '\''), false; + for (const member_type ¶m : info.parameter_list) + { + if (!insert_symbol(param.name, { symbol_type::variable, param.id, param.type })) + { + error(param.location, 3003, "redefinition of '" + param.name + '\''); + return false; + } + } // A function has to start with a new block _codegen->enter_block(_codegen->create_block()); @@ -1172,12 +1474,18 @@ bool reshadefx::parser::parse_function(type type, std::string name) bool reshadefx::parser::parse_variable(type type, std::string name, bool global) { - const auto location = std::move(_token.location); + const location variable_location = std::move(_token.location); if (type.is_void()) - return error(location, 3038, '\'' + name + "': variables cannot be void"), false; + { + error(variable_location, 3038, '\'' + name + "': variables cannot be void"); + return false; + } if (type.has(type::q_in) || type.has(type::q_out)) - return error(location, 3055, '\'' + name + "': variables cannot be declared 'in' or 'out'"), false; + { + error(variable_location, 3055, '\'' + name + "': variables cannot be declared 'in' or 'out'"); + return false; + } // Local and global variables have different requirements if (global) @@ -1187,23 +1495,32 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) { // Global variables that are 'static' cannot be of another storage class if (type.has(type::q_uniform)) - return error(location, 3007, '\'' + name + "': uniform global variables cannot be declared 'static'"), false; + { + error(variable_location, 3007, '\'' + name + "': uniform global variables cannot be declared 'static'"); + return false; + } // The 'volatile' qualifier is only valid memory object declarations that are storage images or uniform blocks if (type.has(type::q_volatile)) - return error(location, 3008, '\'' + name + "': global variables cannot be declared 'volatile'"), false; + { + error(variable_location, 3008, '\'' + name + "': global variables cannot be declared 'volatile'"); + return false; + } } else if (!type.has(type::q_groupshared)) { // Make all global variables 'uniform' by default, since they should be externally visible without the 'static' keyword if (!type.has(type::q_uniform) && !type.is_object()) - warning(location, 5000, '\'' + name + "': global variables are considered 'uniform' by default"); + warning(variable_location, 5000, '\'' + name + "': global variables are considered 'uniform' by default"); // Global variables that are not 'static' are always 'extern' and 'uniform' type.qualifiers |= type::q_extern | type::q_uniform; // It is invalid to make 'uniform' variables constant, since they can be modified externally if (type.has(type::q_const)) - return error(location, 3035, '\'' + name + "': variables which are 'uniform' cannot be declared 'const'"), false; + { + error(variable_location, 3035, '\'' + name + "': variables which are 'uniform' cannot be declared 'const'"); + return false; + } } } else @@ -1213,32 +1530,48 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) type.qualifiers &= ~type::q_static; if (type.has(type::q_extern)) - return error(location, 3006, '\'' + name + "': local variables cannot be declared 'extern'"), false; + { + error(variable_location, 3006, '\'' + name + "': local variables cannot be declared 'extern'"); + return false; + } if (type.has(type::q_uniform)) - return error(location, 3047, '\'' + name + "': local variables cannot be declared 'uniform'"), false; + { + error(variable_location, 3047, '\'' + name + "': local variables cannot be declared 'uniform'"); + return false; + } if (type.has(type::q_groupshared)) - return error(location, 3010, '\'' + name + "': local variables cannot be declared 'groupshared'"), false; + { + error(variable_location, 3010, '\'' + name + "': local variables cannot be declared 'groupshared'"); + return false; + } if (type.is_object()) - return error(location, 3038, '\'' + name + "': local variables cannot be texture, sampler or storage objects"), false; + { + error(variable_location, 3038, '\'' + name + "': local variables cannot be texture, sampler or storage objects"); + return false; + } } // The variable name may be followed by an optional array size expression - if (!parse_array_size(type)) + if (!parse_array_length(type)) return false; bool parse_success = true; expression initializer; - texture_info texture_info; - sampler_info sampler_info; - storage_info storage_info; + texture texture_info; + sampler sampler_info; + storage storage_info; if (accept(':')) { if (!expect(tokenid::identifier)) return false; - else if (!global) // Only global variables can have a semantic - return error(_token.location, 3043, '\'' + name + "': local variables cannot have semantics"), false; + + if (!global) // Only global variables can have a semantic + { + error(_token.location, 3043, '\'' + name + "': local variables cannot have semantics"); + return false; + } std::string &semantic = texture_info.semantic; semantic = std::move(_token.literal_as_string); @@ -1262,16 +1595,29 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) return false; if (type.has(type::q_groupshared)) - return error(initializer.location, 3009, '\'' + name + "': variables declared 'groupshared' cannot have an initializer"), false; + { + error(initializer.location, 3009, '\'' + name + "': variables declared 'groupshared' cannot have an initializer"); + return false; + } + // TODO: This could be resolved by initializing these at the beginning of the entry point if (global && !initializer.is_constant) - return error(initializer.location, 3011, '\'' + name + "': initial value must be a literal expression"), false; + { + error(initializer.location, 3011, '\'' + name + "': initial value must be a literal expression"); + return false; + } // Check type compatibility - if ((type.array_length >= 0 && initializer.type.array_length != type.array_length) || !type::rank(initializer.type, type)) - return error(initializer.location, 3017, '\'' + name + "': initial value (" + initializer.type.description() + ") does not match variable type (" + type.description() + ')'), false; + if ((!type.is_unbounded_array() && initializer.type.array_length != type.array_length) || !type::rank(initializer.type, type)) + { + error(initializer.location, 3017, '\'' + name + "': initial value (" + initializer.type.description() + ") does not match variable type (" + type.description() + ')'); + return false; + } if ((initializer.type.rows < type.rows || initializer.type.cols < type.cols) && !initializer.type.is_scalar()) - return error(initializer.location, 3017, '\'' + name + "': cannot implicitly convert these vector types (from " + initializer.type.description() + " to " + type.description() + ')'), false; + { + error(initializer.location, 3017, '\'' + name + "': cannot implicitly convert these vector types (from " + initializer.type.description() + " to " + type.description() + ')'); + return false; + } // Deduce array size from the initializer expression if (initializer.type.is_array()) @@ -1289,30 +1635,43 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) else if (type.is_numeric() || type.is_struct()) // Numeric variables without an initializer need special handling { if (type.has(type::q_const)) // Constants have to have an initial value - return error(location, 3012, '\'' + name + "': missing initial value"), false; - else if (!type.has(type::q_uniform)) // Zero initialize all global variables - initializer.reset_to_rvalue_constant(location, {}, type); + { + error(variable_location, 3012, '\'' + name + "': missing initial value"); + return false; + } + + if (!type.has(type::q_uniform)) // Zero initialize all global variables + initializer.reset_to_rvalue_constant(variable_location, {}, type); } else if (global && accept('{')) // Textures and samplers can have a property block attached to their declaration { // Non-numeric variables cannot be constants if (type.has(type::q_const)) - return error(location, 3035, '\'' + name + "': this variable type cannot be declared 'const'"), false; + { + error(variable_location, 3035, '\'' + name + "': this variable type cannot be declared 'const'"); + return false; + } while (!peek('}')) { if (!expect(tokenid::identifier)) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } - const auto property_name = std::move(_token.literal_as_string); - const auto property_location = std::move(_token.location); + location property_location = std::move(_token.location); + const std::string property_name = std::move(_token.literal_as_string); if (!expect('=')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } backup(); - expression expression; + expression property_exp; if (accept(tokenid::identifier)) // Handle special enumeration names for property values { @@ -1325,6 +1684,7 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) static const std::unordered_map s_enum_values = { { "NONE", 0 }, { "POINT", 0 }, { "LINEAR", 1 }, + { "ANISOTROPIC", 0x55 }, { "WRAP", uint32_t(texture_address_mode::wrap) }, { "REPEAT", uint32_t(texture_address_mode::wrap) }, { "MIRROR", uint32_t(texture_address_mode::mirror) }, { "CLAMP", uint32_t(texture_address_mode::clamp) }, @@ -1349,27 +1709,37 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) // Look up identifier in list of possible enumeration names if (const auto it = s_enum_values.find(_token.literal_as_string); it != s_enum_values.end()) - expression.reset_to_rvalue_constant(_token.location, it->second); + property_exp.reset_to_rvalue_constant(_token.location, it->second); else // No match found, so rewind to parser state before the identifier was consumed and try parsing it as a normal expression restore(); } // Parse right hand side as normal expression if no special enumeration name was matched already - if (!expression.is_constant && !parse_expression_multary(expression)) - return consume_until('}'), false; + if (!property_exp.is_constant && !parse_expression_multary(property_exp)) + { + consume_until('}'); + return false; + } if (property_name == "Texture") { // Ignore invalid symbols that were added during error recovery - if (expression.base == 0xFFFFFFFF) - return consume_until('}'), false; + if (property_exp.base == UINT32_MAX) + { + consume_until('}'); + return false; + } - if (!expression.type.is_texture()) - return error(expression.location, 3020, "type mismatch, expected texture name"), consume_until('}'), false; + if (!property_exp.type.is_texture()) + { + error(property_exp.location, 3020, "type mismatch, expected texture name"); + consume_until('}'); + return false; + } if (type.is_sampler() || type.is_storage()) { - reshadefx::texture_info &target_info = _codegen->get_texture(expression.base); + texture &target_info = _codegen->get_texture(property_exp.base); if (type.is_storage()) // Texture is used as storage target_info.storage_access = true; @@ -1381,15 +1751,19 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) } else { - if (!expression.is_constant || !expression.type.is_scalar()) - return error(expression.location, 3538, "value must be a literal scalar expression"), consume_until('}'), false; + if (!property_exp.is_constant || !property_exp.type.is_scalar()) + { + error(property_exp.location, 3538, "value must be a literal scalar expression"); + consume_until('}'); + return false; + } // All states below expect the value to be of an integer type - expression.add_cast_operation({ type::t_int, 1, 1 }); - const int value = expression.constant.as_int[0]; + property_exp.add_cast_operation({ type::t_int, 1, 1 }); + const int value = property_exp.constant.as_int[0]; if (value < 0) // There is little use for negative values, so warn in those cases - warning(expression.location, 3571, "negative value specified for property '" + property_name + '\''); + warning(property_exp.location, 3571, "negative value specified for property '" + property_name + '\''); if (type.is_texture()) { @@ -1405,7 +1779,7 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) else if (property_name == "Format") texture_info.format = static_cast(value); else - return error(property_location, 3004, "unrecognized property '" + property_name + '\''), consume_until('}'), false; + error(property_location, 3004, "unrecognized property '" + property_name + '\''); } else if (type.is_sampler()) { @@ -1418,11 +1792,12 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) else if (property_name == "AddressW") sampler_info.address_w = static_cast(value); else if (property_name == "MinFilter") - sampler_info.filter = static_cast((uint32_t(sampler_info.filter) & 0x0F) | ((value << 4) & 0x30)); // Combine sampler filter components into a single filter enumeration value + // Combine sampler filter components into a single filter enumeration value + sampler_info.filter = static_cast((uint32_t(sampler_info.filter) & 0x4F) | ((value & 0x03) << 4) | (value & 0x40)); else if (property_name == "MagFilter") - sampler_info.filter = static_cast((uint32_t(sampler_info.filter) & 0x33) | ((value << 2) & 0x0C)); + sampler_info.filter = static_cast((uint32_t(sampler_info.filter) & 0x73) | ((value & 0x03) << 2) | (value & 0x40)); else if (property_name == "MipFilter") - sampler_info.filter = static_cast((uint32_t(sampler_info.filter) & 0x3C) | (value & 0x03)); + sampler_info.filter = static_cast((uint32_t(sampler_info.filter) & 0x7C) | ((value & 0x03) ) | (value & 0x40)); else if (property_name == "MinLOD" || property_name == "MaxMipLevel") sampler_info.min_lod = static_cast(value); else if (property_name == "MaxLOD") @@ -1430,19 +1805,22 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) else if (property_name == "MipLODBias" || property_name == "MipMapLodBias") sampler_info.lod_bias = static_cast(value); else - return error(property_location, 3004, "unrecognized property '" + property_name + '\''), consume_until('}'), false; + error(property_location, 3004, "unrecognized property '" + property_name + '\''); } else if (type.is_storage()) { if (property_name == "MipLOD" || property_name == "MipLevel") storage_info.level = value > 0 && value < std::numeric_limits::max() ? static_cast(value) : 0; else - return error(property_location, 3004, "unrecognized property '" + property_name + '\''), consume_until('}'), false; + error(property_location, 3004, "unrecognized property '" + property_name + '\''); } } if (!expect(';')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } } if (!expect('}')) @@ -1451,8 +1829,11 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) } // At this point the array size should be known (either from the declaration or the initializer) - if (type.array_length < 0) - return error(location, 3074, '\'' + name + "': implicit array missing initial value"), false; + if (type.is_unbounded_array()) + { + error(variable_location, 3074, '\'' + name + "': implicit array missing initial value"); + return false; + } symbol symbol; @@ -1476,8 +1857,8 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) texture_info.annotations = std::move(sampler_info.annotations); - symbol = { symbol_type::variable, 0, type }; - symbol.id = _codegen->define_texture(location, texture_info); + const codegen::id id = _codegen->define_texture(variable_location, texture_info); + symbol = { symbol_type::variable, id, type }; } // Samplers are actually combined image samplers else if (type.is_sampler()) @@ -1485,18 +1866,30 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) assert(global); if (sampler_info.texture_name.empty()) - return error(location, 3012, '\'' + name + "': missing 'Texture' property"), false; + { + error(variable_location, 3012, '\'' + name + "': missing 'Texture' property"); + return false; + } if (type.texture_dimension() != static_cast(texture_info.type)) - return error(location, 3521, '\'' + name + "': type mismatch between texture and sampler type"), false; + { + error(variable_location, 3521, '\'' + name + "': type mismatch between texture and sampler type"); + return false; + } if (sampler_info.srgb && texture_info.format != texture_format::rgba8) - return error(location, 4582, '\'' + name + "': texture does not support sRGB sampling (only textures with RGBA8 format do)"), false; + { + error(variable_location, 4582, '\'' + name + "': texture does not support sRGB sampling (only textures with RGBA8 format do)"); + return false; + } if (texture_info.format == texture_format::r32i ? !type.is_integral() || !type.is_signed() : texture_info.format == texture_format::r32u ? !type.is_integral() || !type.is_unsigned() : !type.is_floating_point()) - return error(location, 4582, '\'' + name + "': type mismatch between texture format and sampler element type"), false; + { + error(variable_location, 4582, '\'' + name + "': type mismatch between texture format and sampler element type"); + return false; + } sampler_info.name = name; sampler_info.type = type; @@ -1505,24 +1898,33 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) sampler_info.unique_name = 'V' + current_scope().name + name; std::replace(sampler_info.unique_name.begin(), sampler_info.unique_name.end(), ':', '_'); - symbol = { symbol_type::variable, 0, type }; - symbol.id = _codegen->define_sampler(location, texture_info, sampler_info); + const codegen::id id = _codegen->define_sampler(variable_location, texture_info, sampler_info); + symbol = { symbol_type::variable, id, type }; } else if (type.is_storage()) { assert(global); if (storage_info.texture_name.empty()) - return error(location, 3012, '\'' + name + "': missing 'Texture' property"), false; + { + error(variable_location, 3012, '\'' + name + "': missing 'Texture' property"); + return false; + } if (type.texture_dimension() != static_cast(texture_info.type)) - return error(location, 3521, '\'' + name + "': type mismatch between texture and storage type"), false; + { + error(variable_location, 3521, '\'' + name + "': type mismatch between texture and storage type"); + return false; + } if (texture_info.format == texture_format::r32i ? !type.is_integral() || !type.is_signed() : texture_info.format == texture_format::r32u ? !type.is_integral() || !type.is_unsigned() : !type.is_floating_point()) - return error(location, 4582, '\'' + name + "': type mismatch between texture format and storage element type"), false; + { + error(variable_location, 4582, '\'' + name + "': type mismatch between texture format and storage element type"); + return false; + } storage_info.name = name; storage_info.type = type; @@ -1534,15 +1936,15 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) if (storage_info.level > texture_info.levels - 1) storage_info.level = texture_info.levels - 1; - symbol = { symbol_type::variable, 0, type }; - symbol.id = _codegen->define_storage(location, texture_info, storage_info); + const codegen::id id = _codegen->define_storage(variable_location, texture_info, storage_info); + symbol = { symbol_type::variable, id, type }; } // Uniform variables are put into a global uniform buffer structure else if (type.has(type::q_uniform)) { assert(global); - uniform_info uniform_info; + uniform uniform_info; uniform_info.name = name; uniform_info.type = type; @@ -1551,8 +1953,8 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) uniform_info.initializer_value = std::move(initializer.constant); uniform_info.has_initializer_value = initializer.is_constant; - symbol = { symbol_type::variable, 0, type }; - symbol.id = _codegen->define_uniform(location, uniform_info); + const codegen::id id = _codegen->define_uniform(variable_location, uniform_info); + symbol = { symbol_type::variable, id, type }; } // All other variables are separate entities else @@ -1562,14 +1964,17 @@ bool reshadefx::parser::parse_variable(type type, std::string name, bool global) std::replace(unique_name.begin(), unique_name.end(), ':', '_'); symbol = { symbol_type::variable, 0, type }; - symbol.id = _codegen->define_variable(location, type, std::move(unique_name), global, + symbol.id = _codegen->define_variable(variable_location, type, std::move(unique_name), global, // Shared variables cannot have an initializer type.has(type::q_groupshared) ? 0 : _codegen->emit_load(initializer)); } // Insert the symbol into the symbol table if (!insert_symbol(name, symbol, global)) - return error(location, 3003, "redefinition of '" + name + '\''), false; + { + error(variable_location, 3003, "redefinition of '" + name + '\''); + return false; + } return parse_success; } @@ -1579,7 +1984,7 @@ bool reshadefx::parser::parse_technique() if (!expect(tokenid::identifier)) return false; - technique_info info; + technique info; info.name = std::move(_token.literal_as_string); bool parse_success = parse_annotations(info.annotations); @@ -1589,12 +1994,19 @@ bool reshadefx::parser::parse_technique() while (!peek('}')) { - if (pass_info pass; parse_technique_pass(pass)) + pass pass; + if (parse_technique_pass(pass)) + { info.passes.push_back(std::move(pass)); - else { + } + else + { parse_success = false; if (!peek(tokenid::pass) && !peek('}')) // If there is another pass definition following, try to parse that despite the error - return consume_until('}'), false; + { + consume_until('}'); + return false; + } } } @@ -1602,12 +2014,12 @@ bool reshadefx::parser::parse_technique() return expect('}') && parse_success; } -bool reshadefx::parser::parse_technique_pass(pass_info &info) +bool reshadefx::parser::parse_technique_pass(pass &info) { if (!expect(tokenid::pass)) return false; - const auto pass_location = std::move(_token.location); + const location pass_location = std::move(_token.location); // Passes can have an optional name if (accept(tokenid::identifier)) @@ -1615,7 +2027,7 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) bool parse_success = true; bool targets_support_srgb = true; - function_info vs_info, ps_info, cs_info; + function vs_info = {}, ps_info = {}, cs_info = {}; if (!expect('{')) return false; @@ -1624,16 +2036,22 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) { // Parse pass states if (!expect(tokenid::identifier)) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } - auto location = std::move(_token.location); - const auto state = std::move(_token.literal_as_string); + location state_location = std::move(_token.location); + const std::string state_name = std::move(_token.literal_as_string); if (!expect('=')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } - const bool is_shader_state = state == "VertexShader" || state == "PixelShader" || state == "ComputeShader"; - const bool is_texture_state = state.compare(0, 12, "RenderTarget") == 0 && (state.size() == 12 || (state[12] >= '0' && state[12] < '8')); + const bool is_shader_state = state_name.size() > 6 && state_name.compare(state_name.size() - 6, 6, "Shader") == 0; // VertexShader, PixelShader, ComputeShader, ... + const bool is_texture_state = state_name.compare(0, 12, "RenderTarget") == 0 && (state_name.size() == 12 || (state_name[12] >= '0' && state_name[12] < '8')); // Shader and render target assignment looks up values in the symbol table, so handle those separately from the other states if (is_shader_state || is_texture_state) @@ -1641,28 +2059,49 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) std::string identifier; scoped_symbol symbol; if (!accept_symbol(identifier, symbol)) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } - location = std::move(_token.location); + state_location = std::move(_token.location); - int num_threads[3] = { 1, 1, 1 }; + int num_threads[3] = { 0, 0, 0 }; if (accept('<')) { expression x, y, z; if (!parse_expression_multary(x, 8) || !expect(',') || !parse_expression_multary(y, 8)) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } // Parse optional third dimension (defaults to 1) z.reset_to_rvalue_constant({}, 1); if (accept(',') && !parse_expression_multary(z, 8)) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } if (!x.is_constant) - return error(x.location, 3011, "value must be a literal expression"), consume_until('}'), false; + { + error(x.location, 3011, "value must be a literal expression"); + consume_until('}'); + return false; + } if (!y.is_constant) - return error(y.location, 3011, "value must be a literal expression"), consume_until('}'), false; + { + error(y.location, 3011, "value must be a literal expression"); + consume_until('}'); + return false; + } if (!z.is_constant) - return error(z.location, 3011, "value must be a literal expression"), consume_until('}'), false; + { + error(z.location, 3011, "value must be a literal expression"); + consume_until('}'); + return false; + } x.add_cast_operation({ type::t_int, 1, 1 }); y.add_cast_operation({ type::t_int, 1, 1 }); z.add_cast_operation({ type::t_int, 1, 1 }); @@ -1671,40 +2110,82 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) num_threads[2] = z.constant.as_int[0]; if (!expect('>')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } } // Ignore invalid symbols that were added during error recovery - if (symbol.id != 0xFFFFFFFF) + if (symbol.id != UINT32_MAX) { if (is_shader_state) { if (!symbol.id) - parse_success = false, - error(location, 3501, "undeclared identifier '" + identifier + "', expected function name"); + { + parse_success = false; + error(state_location, 3501, "undeclared identifier '" + identifier + "', expected function name"); + } else if (!symbol.type.is_function()) - parse_success = false, - error(location, 3020, "type mismatch, expected function name"); - else { + { + parse_success = false; + error(state_location, 3020, "type mismatch, expected function name"); + } + else + { // Look up the matching function info for this function definition - function_info &function_info = _codegen->get_function(symbol.id); + const function &function_info = _codegen->get_function(symbol.id); // We potentially need to generate a special entry point function which translates between function parameters and input/output variables - switch (state[0]) + switch (state_name[0]) { case 'V': vs_info = function_info; - _codegen->define_entry_point(vs_info, shader_type::vs); + if (vs_info.type != shader_type::unknown && vs_info.type != shader_type::vertex) + { + parse_success = false; + error(state_location, 3020, "type mismatch, expected vertex shader function"); + break; + } + vs_info.type = shader_type::vertex; + _codegen->define_entry_point(vs_info); info.vs_entry_point = vs_info.unique_name; break; case 'P': ps_info = function_info; - _codegen->define_entry_point(ps_info, shader_type::ps); + if (ps_info.type != shader_type::unknown && ps_info.type != shader_type::pixel) + { + parse_success = false; + error(state_location, 3020, "type mismatch, expected pixel shader function"); + break; + } + ps_info.type = shader_type::pixel; + _codegen->define_entry_point(ps_info); info.ps_entry_point = ps_info.unique_name; break; case 'C': cs_info = function_info; - _codegen->define_entry_point(cs_info, shader_type::cs, num_threads); + if (cs_info.type != shader_type::unknown && cs_info.type != shader_type::compute) + { + parse_success = false; + error(state_location, 3020, "type mismatch, expected compute shader function"); + break; + } + cs_info.type = shader_type::compute; + // Only use number of threads from pass when specified, otherwise fall back to number specified on the function definition with an attribute + if (num_threads[0] != 0) + { + cs_info.num_threads[0] = num_threads[0]; + cs_info.num_threads[1] = num_threads[1]; + cs_info.num_threads[2] = num_threads[2]; + } + else + { + cs_info.num_threads[0] = std::max(cs_info.num_threads[0], 1); + cs_info.num_threads[1] = std::max(cs_info.num_threads[1], 1); + cs_info.num_threads[2] = std::max(cs_info.num_threads[2], 1); + } + _codegen->define_entry_point(cs_info); info.cs_entry_point = cs_info.unique_name; break; } @@ -1715,33 +2196,51 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) assert(is_texture_state); if (!symbol.id) - parse_success = false, - error(location, 3004, "undeclared identifier '" + identifier + "', expected texture name"); + { + parse_success = false; + error(state_location, 3004, "undeclared identifier '" + identifier + "', expected texture name"); + } else if (!symbol.type.is_texture()) - parse_success = false, - error(location, 3020, "type mismatch, expected texture name"); + { + parse_success = false; + error(state_location, 3020, "type mismatch, expected texture name"); + } else if (symbol.type.texture_dimension() != 2) - parse_success = false, - error(location, 3020, "cannot use texture" + std::to_string(symbol.type.texture_dimension()) + "D as render target"); - else { - reshadefx::texture_info &target_info = _codegen->get_texture(symbol.id); - // Texture is used as a render target - target_info.render_target = true; + { + parse_success = false; + error(state_location, 3020, "cannot use texture" + std::to_string(symbol.type.texture_dimension()) + "D as render target"); + } + else + { + texture &target_info = _codegen->get_texture(symbol.id); - // Verify that all render targets in this pass have the same dimensions - if (info.viewport_width != 0 && info.viewport_height != 0 && (target_info.width != info.viewport_width || target_info.height != info.viewport_height)) - parse_success = false, - error(location, 4545, "cannot use multiple render targets with different texture dimensions (is " + std::to_string(target_info.width) + 'x' + std::to_string(target_info.height) + ", but expected " + std::to_string(info.viewport_width) + 'x' + std::to_string(info.viewport_height) + ')'); + if (target_info.semantic.empty()) + { + // Texture is used as a render target + target_info.render_target = true; - info.viewport_width = target_info.width; - info.viewport_height = target_info.height; + // Verify that all render targets in this pass have the same dimensions + if (info.viewport_width != 0 && info.viewport_height != 0 && (target_info.width != info.viewport_width || target_info.height != info.viewport_height)) + { + parse_success = false; + error(state_location, 4545, "cannot use multiple render targets with different texture dimensions (is " + std::to_string(target_info.width) + 'x' + std::to_string(target_info.height) + ", but expected " + std::to_string(info.viewport_width) + 'x' + std::to_string(info.viewport_height) + ')'); + } - const auto target_index = state.size() > 12 ? (state[12] - '0') : 0; - info.render_target_names[target_index] = target_info.unique_name; + info.viewport_width = target_info.width; + info.viewport_height = target_info.height; - // Only RGBA8 format supports sRGB writes across all APIs - if (target_info.format != texture_format::rgba8) - targets_support_srgb = false; + const int target_index = state_name.size() > 12 ? (state_name[12] - '0') : 0; + info.render_target_names[target_index] = target_info.unique_name; + + // Only RGBA8 format supports sRGB writes across all APIs + if (target_info.format != texture_format::rgba8) + targets_support_srgb = false; + } + else + { + parse_success = false; + error(state_location, 3020, "cannot use texture with semantic as render target"); + } } } } @@ -1754,7 +2253,7 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) { backup(); - expression expression; + expression state_exp; if (accept(tokenid::identifier)) // Handle special enumeration names for pass states { @@ -1766,34 +2265,34 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) static const std::unordered_map s_enum_values = { { "NONE", 0 }, { "ZERO", 0 }, { "ONE", 1 }, - { "ADD", uint32_t(pass_blend_op::add) }, - { "SUBTRACT", uint32_t(pass_blend_op::subtract) }, - { "REVSUBTRACT", uint32_t(pass_blend_op::reverse_subtract) }, - { "MIN", uint32_t(pass_blend_op::min) }, - { "MAX", uint32_t(pass_blend_op::max) }, - { "SRCCOLOR", uint32_t(pass_blend_factor::source_color) }, - { "INVSRCCOLOR", uint32_t(pass_blend_factor::one_minus_source_color) }, - { "DESTCOLOR", uint32_t(pass_blend_factor::dest_color) }, - { "INVDESTCOLOR", uint32_t(pass_blend_factor::one_minus_dest_color) }, - { "SRCALPHA", uint32_t(pass_blend_factor::source_alpha) }, - { "INVSRCALPHA", uint32_t(pass_blend_factor::one_minus_source_alpha) }, - { "DESTALPHA", uint32_t(pass_blend_factor::dest_alpha) }, - { "INVDESTALPHA", uint32_t(pass_blend_factor::one_minus_dest_alpha) }, - { "KEEP", uint32_t(pass_stencil_op::keep) }, - { "REPLACE", uint32_t(pass_stencil_op::replace) }, - { "INVERT", uint32_t(pass_stencil_op::invert) }, - { "INCR", uint32_t(pass_stencil_op::increment) }, - { "INCRSAT", uint32_t(pass_stencil_op::increment_saturate) }, - { "DECR", uint32_t(pass_stencil_op::decrement) }, - { "DECRSAT", uint32_t(pass_stencil_op::decrement_saturate) }, - { "NEVER", uint32_t(pass_stencil_func::never) }, - { "EQUAL", uint32_t(pass_stencil_func::equal) }, - { "NEQUAL", uint32_t(pass_stencil_func::not_equal) }, { "NOTEQUAL", uint32_t(pass_stencil_func::not_equal) }, - { "LESS", uint32_t(pass_stencil_func::less) }, - { "GREATER", uint32_t(pass_stencil_func::greater) }, - { "LEQUAL", uint32_t(pass_stencil_func::less_equal) }, { "LESSEQUAL", uint32_t(pass_stencil_func::less_equal) }, - { "GEQUAL", uint32_t(pass_stencil_func::greater_equal) }, { "GREATEREQUAL", uint32_t(pass_stencil_func::greater_equal) }, - { "ALWAYS", uint32_t(pass_stencil_func::always) }, + { "ADD", uint32_t(blend_op::add) }, + { "SUBTRACT", uint32_t(blend_op::subtract) }, + { "REVSUBTRACT", uint32_t(blend_op::reverse_subtract) }, + { "MIN", uint32_t(blend_op::min) }, + { "MAX", uint32_t(blend_op::max) }, + { "SRCCOLOR", uint32_t(blend_factor::source_color) }, + { "INVSRCCOLOR", uint32_t(blend_factor::one_minus_source_color) }, + { "DESTCOLOR", uint32_t(blend_factor::dest_color) }, + { "INVDESTCOLOR", uint32_t(blend_factor::one_minus_dest_color) }, + { "SRCALPHA", uint32_t(blend_factor::source_alpha) }, + { "INVSRCALPHA", uint32_t(blend_factor::one_minus_source_alpha) }, + { "DESTALPHA", uint32_t(blend_factor::dest_alpha) }, + { "INVDESTALPHA", uint32_t(blend_factor::one_minus_dest_alpha) }, + { "KEEP", uint32_t(stencil_op::keep) }, + { "REPLACE", uint32_t(stencil_op::replace) }, + { "INVERT", uint32_t(stencil_op::invert) }, + { "INCR", uint32_t(stencil_op::increment) }, + { "INCRSAT", uint32_t(stencil_op::increment_saturate) }, + { "DECR", uint32_t(stencil_op::decrement) }, + { "DECRSAT", uint32_t(stencil_op::decrement_saturate) }, + { "NEVER", uint32_t(stencil_func::never) }, + { "EQUAL", uint32_t(stencil_func::equal) }, + { "NEQUAL", uint32_t(stencil_func::not_equal) }, { "NOTEQUAL", uint32_t(stencil_func::not_equal) }, + { "LESS", uint32_t(stencil_func::less) }, + { "GREATER", uint32_t(stencil_func::greater) }, + { "LEQUAL", uint32_t(stencil_func::less_equal) }, { "LESSEQUAL", uint32_t(stencil_func::less_equal) }, + { "GEQUAL", uint32_t(stencil_func::greater_equal) }, { "GREATEREQUAL", uint32_t(stencil_func::greater_equal) }, + { "ALWAYS", uint32_t(stencil_func::always) }, { "POINTS", uint32_t(primitive_topology::point_list) }, { "POINTLIST", uint32_t(primitive_topology::point_list) }, { "LINES", uint32_t(primitive_topology::line_list) }, @@ -1807,82 +2306,91 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) // Look up identifier in list of possible enumeration names if (const auto it = s_enum_values.find(_token.literal_as_string); it != s_enum_values.end()) - expression.reset_to_rvalue_constant(_token.location, it->second); + state_exp.reset_to_rvalue_constant(_token.location, it->second); else // No match found, so rewind to parser state before the identifier was consumed and try parsing it as a normal expression restore(); } // Parse right hand side as normal expression if no special enumeration name was matched already - if (!expression.is_constant && !parse_expression_multary(expression)) - return consume_until('}'), false; - else if (!expression.is_constant || !expression.type.is_scalar()) - parse_success = false, - error(expression.location, 3011, "pass state value must be a literal scalar expression"); + if (!state_exp.is_constant && !parse_expression_multary(state_exp)) + { + consume_until('}'); + return false; + } + + if (!state_exp.is_constant || !state_exp.type.is_scalar()) + { + parse_success = false; + error(state_exp.location, 3011, "pass state value must be a literal scalar expression"); + } // All states below expect the value to be of an unsigned integer type - expression.add_cast_operation({ type::t_uint, 1, 1 }); - const unsigned int value = expression.constant.as_uint[0]; + state_exp.add_cast_operation({ type::t_uint, 1, 1 }); + const unsigned int value = state_exp.constant.as_uint[0]; #define SET_STATE_VALUE_INDEXED(name, info_name, value) \ - else if (constexpr size_t name##_len = sizeof(#name) - 1; state.compare(0, name##_len, #name) == 0 && (state.size() == name##_len || (state[name##_len] >= '0' && state[name##_len] < ('0' + static_cast(std::size(info.info_name)))))) \ + else if (constexpr size_t name##_len = sizeof(#name) - 1; state_name.compare(0, name##_len, #name) == 0 && \ + (state_name.size() == name##_len || (state_name[name##_len] >= '0' && state_name[name##_len] < ('0' + static_cast(std::size(info.info_name)))))) \ { \ - if (state.size() != name##_len) \ - info.info_name[state[name##_len] - '0'] = (value); \ + if (state_name.size() != name##_len) \ + info.info_name[state_name[name##_len] - '0'] = (value); \ else \ for (int i = 0; i < static_cast(std::size(info.info_name)); ++i) \ info.info_name[i] = (value); \ } - if (state == "SRGBWriteEnable") + if (state_name == "SRGBWriteEnable") info.srgb_write_enable = (value != 0); SET_STATE_VALUE_INDEXED(BlendEnable, blend_enable, value != 0) - else if (state == "StencilEnable") + else if (state_name == "StencilEnable") info.stencil_enable = (value != 0); - else if (state == "ClearRenderTargets") + else if (state_name == "ClearRenderTargets") info.clear_render_targets = (value != 0); - SET_STATE_VALUE_INDEXED(ColorWriteMask, color_write_mask, value & 0xFF) - SET_STATE_VALUE_INDEXED(RenderTargetWriteMask, color_write_mask, value & 0xFF) - else if (state == "StencilReadMask" || state == "StencilMask") + SET_STATE_VALUE_INDEXED(ColorWriteMask, render_target_write_mask, value & 0xFF) + SET_STATE_VALUE_INDEXED(RenderTargetWriteMask, render_target_write_mask, value & 0xFF) + else if (state_name == "StencilReadMask" || state_name == "StencilMask") info.stencil_read_mask = value & 0xFF; - else if (state == "StencilWriteMask") + else if (state_name == "StencilWriteMask") info.stencil_write_mask = value & 0xFF; - SET_STATE_VALUE_INDEXED(BlendOp, blend_op, static_cast(value)) - SET_STATE_VALUE_INDEXED(BlendOpAlpha, blend_op_alpha, static_cast(value)) - SET_STATE_VALUE_INDEXED(SrcBlend, src_blend, static_cast(value)) - SET_STATE_VALUE_INDEXED(SrcBlendAlpha, src_blend_alpha, static_cast(value)) - SET_STATE_VALUE_INDEXED(DestBlend, dest_blend, static_cast(value)) - SET_STATE_VALUE_INDEXED(DestBlendAlpha, dest_blend_alpha, static_cast(value)) - else if (state == "StencilFunc") - info.stencil_comparison_func = static_cast(value); - else if (state == "StencilRef") + SET_STATE_VALUE_INDEXED(BlendOp, color_blend_op, static_cast(value)) + SET_STATE_VALUE_INDEXED(BlendOpAlpha, alpha_blend_op, static_cast(value)) + SET_STATE_VALUE_INDEXED(SrcBlend, source_color_blend_factor, static_cast(value)) + SET_STATE_VALUE_INDEXED(SrcBlendAlpha, source_alpha_blend_factor, static_cast(value)) + SET_STATE_VALUE_INDEXED(DestBlend, dest_color_blend_factor, static_cast(value)) + SET_STATE_VALUE_INDEXED(DestBlendAlpha, dest_alpha_blend_factor, static_cast(value)) + else if (state_name == "StencilFunc") + info.stencil_comparison_func = static_cast(value); + else if (state_name == "StencilRef") info.stencil_reference_value = value; - else if (state == "StencilPass" || state == "StencilPassOp") - info.stencil_op_pass = static_cast(value); - else if (state == "StencilFail" || state == "StencilFailOp") - info.stencil_op_fail = static_cast(value); - else if (state == "StencilZFail" || state == "StencilDepthFail" || state == "StencilDepthFailOp") - info.stencil_op_depth_fail = static_cast(value); - else if (state == "VertexCount") + else if (state_name == "StencilPass" || state_name == "StencilPassOp") + info.stencil_pass_op = static_cast(value); + else if (state_name == "StencilFail" || state_name == "StencilFailOp") + info.stencil_fail_op = static_cast(value); + else if (state_name == "StencilZFail" || state_name == "StencilDepthFail" || state_name == "StencilDepthFailOp") + info.stencil_depth_fail_op = static_cast(value); + else if (state_name == "VertexCount") info.num_vertices = value; - else if (state == "PrimitiveType" || state == "PrimitiveTopology") + else if (state_name == "PrimitiveType" || state_name == "PrimitiveTopology") info.topology = static_cast(value); - else if (state == "DispatchSizeX") + else if (state_name == "DispatchSizeX") info.viewport_width = value; - else if (state == "DispatchSizeY") + else if (state_name == "DispatchSizeY") info.viewport_height = value; - else if (state == "DispatchSizeZ") + else if (state_name == "DispatchSizeZ") info.viewport_dispatch_z = value; - else if (state == "GenerateMipmaps" || state == "GenerateMipMaps") + else if (state_name == "GenerateMipmaps" || state_name == "GenerateMipMaps") info.generate_mipmaps = (value != 0); else - parse_success = false, - error(location, 3004, "unrecognized pass state '" + state + '\''); + error(state_location, 3004, "unrecognized pass state '" + state_name + '\''); #undef SET_STATE_VALUE_INDEXED } if (!expect(';')) - return consume_until('}'), false; + { + consume_until('}'); + return false; + } } if (parse_success) @@ -1898,24 +2406,16 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) if (!info.vs_entry_point.empty()) warning(pass_location, 3089, "pass is specifying both 'VertexShader' and 'ComputeShader' which cannot be used together"); if (!info.ps_entry_point.empty()) - warning(pass_location, 3089, "pass is specifying both 'PixelShader' and 'ComputeShader' which cannot be used together"); - - for (codegen::id id : cs_info.referenced_samplers) - info.samplers.push_back(_codegen->get_sampler(id)); - for (codegen::id id : cs_info.referenced_storages) - info.storages.push_back(_codegen->get_storage(id)); - } - else if (info.vs_entry_point.empty() || info.ps_entry_point.empty()) - { - parse_success = false; - - if (info.vs_entry_point.empty()) - error(pass_location, 3012, "pass is missing 'VertexShader' property"); - if (info.ps_entry_point.empty()) - error(pass_location, 3012, "pass is missing 'PixelShader' property"); + warning(pass_location, 3089, "pass is specifying both 'PixelShader' and 'ComputeShader' which cannot be used together"); } else { + if (info.vs_entry_point.empty()) + { + parse_success = false; + error(pass_location, 3012, "pass is missing 'VertexShader' property"); + } + // Verify that shader signatures between VS and PS match (both semantics and interpolation qualifiers) std::unordered_map vs_semantic_mapping; if (vs_info.return_semantic.empty()) @@ -1931,7 +2431,7 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) vs_semantic_mapping[vs_info.return_semantic] = vs_info.return_type; } - for (const struct_member_info ¶m : vs_info.parameter_list) + for (const member_type ¶m : vs_info.parameter_list) { if (param.semantic.empty()) { @@ -1959,7 +2459,7 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) } } - for (const struct_member_info ¶m : ps_info.parameter_list) + for (const member_type ¶m : ps_info.parameter_list) { if (param.semantic.empty()) { @@ -1976,29 +2476,292 @@ bool reshadefx::parser::parse_technique_pass(pass_info &info) { if (const auto it = vs_semantic_mapping.find(param.semantic); it == vs_semantic_mapping.end() || it->second != param.type) + { warning(pass_location, 4576, '\'' + ps_info.name + "': input parameter '" + param.name + "' semantic does not match vertex shader one"); + } else if (((it->second.qualifiers ^ param.type.qualifiers) & (type::q_linear | type::q_noperspective | type::q_centroid | type::q_nointerpolation)) != 0) - parse_success = false, + { + parse_success = false; error( pass_location, 4568, '\'' + ps_info.name + "': input parameter '" + param.name + "' interpolation qualifiers do not match vertex shader ones"); + } } } for (codegen::id id : vs_info.referenced_samplers) - info.samplers.push_back(_codegen->get_sampler(id)); + { + const sampler &sampler = _codegen->get_sampler(id); + if (std::find(std::begin(info.render_target_names), std::end(info.render_target_names), sampler.texture_name) != std::end(info.render_target_names)) + error(pass_location, 3020, '\'' + sampler.texture_name + "': cannot sample from texture that is also used as render target in the same pass"); + } for (codegen::id id : ps_info.referenced_samplers) - info.samplers.push_back(_codegen->get_sampler(id)); + { + const sampler &sampler = _codegen->get_sampler(id); + if (std::find(std::begin(info.render_target_names), std::end(info.render_target_names), sampler.texture_name) != std::end(info.render_target_names)) + error(pass_location, 3020, '\'' + sampler.texture_name + "': cannot sample from texture that is also used as render target in the same pass"); + } + if (!vs_info.referenced_storages.empty() || !ps_info.referenced_storages.empty()) { parse_success = false; error(pass_location, 3667, "storage writes are only valid in compute shaders"); } - } - // Verify render target format supports sRGB writes if enabled - if (info.srgb_write_enable && !targets_support_srgb) - parse_success = false, - error(pass_location, 4582, "one or more render target(s) do not support sRGB writes (only textures with RGBA8 format do)"); + // Verify render target format supports sRGB writes if enabled + if (info.srgb_write_enable && !targets_support_srgb) + { + parse_success = false; + error(pass_location, 4582, "one or more render target(s) do not support sRGB writes (only textures with RGBA8 format do)"); + } + } } return expect('}') && parse_success; } + +void reshadefx::codegen::optimize_bindings() +{ + struct sampler_group + { + std::vector bindings; + function *grouped_entry_point = nullptr; + }; + struct entry_point_info + { + std::vector sampler_groups; + + static void compare_and_update_bindings(std::unordered_map &per_entry_point, sampler_group &a, sampler_group &b, size_t binding) + { + for (; binding < std::min(a.bindings.size(), b.bindings.size()); ++binding) + { + if (a.bindings[binding] != b.bindings[binding]) + { + if (a.bindings[binding] == 0) + { + b.bindings.insert(b.bindings.begin() + binding, 0); + + if (b.grouped_entry_point != nullptr) + for (sampler_group &c : per_entry_point.at(b.grouped_entry_point).sampler_groups) + compare_and_update_bindings(per_entry_point, b, c, binding); + continue; + } + + if (b.bindings[binding] == 0) + { + a.bindings.insert(a.bindings.begin() + binding, 0); + + if (a.grouped_entry_point != nullptr) + for (sampler_group &c : per_entry_point.at(a.grouped_entry_point).sampler_groups) + compare_and_update_bindings(per_entry_point, a, c, binding); + continue; + } + } + } + } + }; + + std::unordered_map per_entry_point; + for (const auto &[name, type] : _module.entry_points) + { + per_entry_point.emplace(&get_function(name), entry_point_info {}); + } + + std::unordered_map usage_count; + for (const auto &[entry_point, entry_point_info] : per_entry_point) + { + for (const id sampler_id : entry_point->referenced_samplers) + usage_count[sampler_id]++; + for (const id storage_id : entry_point->referenced_storages) + usage_count[storage_id]++; + } + + // First sort bindings by usage and for each pass arrange them so that VS and PS use matching bindings for the objects they use (so that the same bindings can be used for both entry points). + // If the entry points VS1 and PS1 use the following objects A, B and C: + // - VS1: A B + // - PS1: B C + // Then this generates the following bindings: + // - VS1: C A + // - PS1: C 0 B + + const auto usage_pred = + [&](const id lhs, const id rhs) { + return usage_count.at(lhs) > usage_count.at(rhs) || (usage_count.at(lhs) == usage_count.at(rhs) && lhs < rhs); + }; + + for (const auto &[entry_point, entry_point_info] : per_entry_point) + { + std::sort(entry_point->referenced_samplers.begin(), entry_point->referenced_samplers.end(), usage_pred); + std::sort(entry_point->referenced_storages.begin(), entry_point->referenced_storages.end(), usage_pred); + } + + for (const technique &tech : _module.techniques) + { + for (const pass &pass : tech.passes) + { + if (!pass.cs_entry_point.empty()) + { + function &cs = get_function(pass.cs_entry_point); + + sampler_group cs_sampler_info; + cs_sampler_info.bindings = cs.referenced_samplers; + per_entry_point.at(&cs).sampler_groups.push_back(std::move(cs_sampler_info)); + } + else + { + function &vs = get_function(pass.vs_entry_point); + + sampler_group vs_sampler_info; + vs_sampler_info.bindings = vs.referenced_samplers; + + if (!pass.ps_entry_point.empty()) + { + function &ps = get_function(pass.ps_entry_point); + + vs_sampler_info.grouped_entry_point = &ps; + + sampler_group ps_sampler_info; + ps_sampler_info.bindings = ps.referenced_samplers; + ps_sampler_info.grouped_entry_point = &vs; + + for (size_t binding = 0; binding < std::min(vs_sampler_info.bindings.size(), ps_sampler_info.bindings.size()); ++binding) + { + if (vs_sampler_info.bindings[binding] != ps_sampler_info.bindings[binding]) + { + if (usage_pred(vs_sampler_info.bindings[binding], ps_sampler_info.bindings[binding])) + ps_sampler_info.bindings.insert(ps_sampler_info.bindings.begin() + binding, 0); + else + vs_sampler_info.bindings.insert(vs_sampler_info.bindings.begin() + binding, 0); + } + } + + per_entry_point.at(&ps).sampler_groups.push_back(std::move(ps_sampler_info)); + } + + per_entry_point.at(&vs).sampler_groups.push_back(std::move(vs_sampler_info)); + } + } + } + + // Next walk through all entry point groups and shift bindings as needed so that there are no mismatches across passes. + // If the entry points VS1, PS1 and PS2 use the following bindings (notice the mismatches of VS1 between pass 0 and pass 1, as well as PS2 between pass 1 and pass 2): + // - pass 0 + // - VS1: C A + // - PS1: C 0 B + // - pass 1 + // - VS1: C 0 A + // - PS2: 0 D A + // - pass 2 + // - VS2: D + // - PS2: D A + // Then this generates the following final bindings: + // - pass 0 + // - VS1: C 0 A + // - PS1: C 0 B + // - pass 1 + // - VS1: C 0 A + // - PS2: 0 D A + // - pass 2 + // - VS2: 0 D + // - PS2: 0 D A + + for (auto &[entry_point, entry_point_info] : per_entry_point) + { + while (entry_point_info.sampler_groups.size() > 1) + { + entry_point_info::compare_and_update_bindings(per_entry_point, entry_point_info.sampler_groups[0], entry_point_info.sampler_groups[1], 0); + entry_point_info.sampler_groups.erase(entry_point_info.sampler_groups.begin() + 1); + } + } + + for (auto &[entry_point, entry_point_info] : per_entry_point) + { + if (entry_point_info.sampler_groups.empty()) + continue; + + entry_point->referenced_samplers = std::move(entry_point_info.sampler_groups[0].bindings); + } + + // Finally apply the generated bindings to all passes + + for (technique &tech : _module.techniques) + { + for (pass &pass : tech.passes) + { + std::vector referenced_samplers; + std::vector referenced_storages; + + if (!pass.cs_entry_point.empty()) + { + const function &cs = get_function(pass.cs_entry_point); + + referenced_samplers = cs.referenced_samplers; + referenced_storages = cs.referenced_storages; + } + else + { + const function &vs = get_function(pass.vs_entry_point); + + referenced_samplers = vs.referenced_samplers; + + if (!pass.ps_entry_point.empty()) + { + const function &ps = get_function(pass.ps_entry_point); + + if (ps.referenced_samplers.size() > referenced_samplers.size()) + referenced_samplers.resize(ps.referenced_samplers.size()); + + for (uint32_t binding = 0; binding < ps.referenced_samplers.size(); ++binding) + if (ps.referenced_samplers[binding] != 0) + referenced_samplers[binding] = ps.referenced_samplers[binding]; + } + } + + for (uint32_t binding = 0; binding < referenced_samplers.size(); ++binding) + { + if (referenced_samplers[binding] == 0) + continue; + + const sampler &sampler = get_sampler(referenced_samplers[binding]); + + texture_binding t; + t.texture_name = sampler.texture_name; + t.binding = binding; + t.srgb = sampler.srgb; + pass.texture_bindings.push_back(std::move(t)); + + if (binding >= _module.num_texture_bindings) + _module.num_texture_bindings = binding + 1; + + sampler_binding s; + s.binding = binding; + s.filter = sampler.filter; + s.address_u = sampler.address_u; + s.address_v = sampler.address_v; + s.address_w = sampler.address_w; + s.min_lod = sampler.min_lod; + s.max_lod = sampler.max_lod; + s.lod_bias = sampler.lod_bias; + pass.sampler_bindings.push_back(std::move(s)); + + if (binding >= _module.num_sampler_bindings) + _module.num_sampler_bindings = binding + 1; + } + + for (uint32_t binding = 0; binding < referenced_storages.size(); ++binding) + { + if (referenced_storages[binding] == 0) + continue; + + const storage &storage = get_storage(referenced_storages[binding]); + + storage_binding u; + u.texture_name = storage.texture_name; + u.binding = binding; + u.level = storage.level; + pass.storage_bindings.push_back(std::move(u)); + + if (binding >= _module.num_storage_bindings) + _module.num_storage_bindings = binding + 1; + } + } + } +} diff --git a/dep/reshadefx/src/effect_preprocessor.cpp b/dep/reshadefx/src/effect_preprocessor.cpp index 9a03ec77c..b275c5838 100644 --- a/dep/reshadefx/src/effect_preprocessor.cpp +++ b/dep/reshadefx/src/effect_preprocessor.cpp @@ -5,8 +5,8 @@ #include "effect_lexer.hpp" #include "effect_preprocessor.hpp" +#include // fclose, fopen, fread, fseek #include -#include #include // std::find_if #ifndef _WIN32 @@ -52,7 +52,7 @@ enum macro_replacement macro_replacement_stringize = '\xFE', }; -static const int precedence_lookup[] = { +static const int s_precedence_lookup[] = { 0, 1, 2, 3, 4, // bitwise operators 5, 6, 7, 7, 7, 7, // logical operators 8, 8, // left shift, right shift @@ -61,9 +61,8 @@ static const int precedence_lookup[] = { 11, 11, 11, 11 // unary operators }; -static bool read_file(const std::string &path, std::string &data, reshadefx::preprocessor::include_read_file_callback &cb) +static bool read_file(const std::string &path, std::string &file_data, reshadefx::preprocessor::include_read_file_callback &cb) { - std::string file_data; if (!cb(path, file_data)) return false; @@ -77,29 +76,33 @@ static bool read_file(const std::string &path, std::string &data, reshadefx::pre static_cast(file_data[2]) == 0xbf) file_data.erase(0, 3); - data = std::move(file_data); return true; } bool reshadefx::preprocessor::stdfs_read_file_callback(const std::string &path, std::string &data) { - std::ifstream file(std::filesystem::path(path), std::ios::binary); - if (!file) - return false; + // Read file contents into memory + const std::filesystem::path fspath(path); +#ifndef _WIN32 + FILE *const file = fopen(fspath.c_str(), "rb"); +#else + FILE *const file = _wfsopen(fspath.generic_wstring().c_str(), L"rb", SH_DENYWR); +#endif + if (file == nullptr) + return false; - // Read file contents into memory - std::error_code ec; - const uintmax_t file_size = std::filesystem::file_size(path, ec); - if (ec) - return false; + fseek(file, 0, SEEK_END); + const size_t file_size = ftell(file); + fseek(file, 0, SEEK_SET); - data.reserve(file_size + 1); - data.resize(static_cast(file_size), '\0'); - if (!file.read(data.data(), file_size)) - return false; + const size_t file_size_read = fread(data.data(), 1, file_size, file); + + // No longer need to have a handle open to the file, since all data was read, so can safely close it + fclose(file); + + if (file_size_read != file_size) + return false; - // No longer need to have a handle open to the file, since all data was read, so can safely close it - file.close(); return true; } @@ -156,7 +159,8 @@ bool reshadefx::preprocessor::append_string(std::string source_code, const std:: // Enforce all input strings to end with a line feed assert(!source_code.empty() && source_code.back() == '\n'); - _success = true; // Clear success flag before parsing a new string + // Only consider new errors added below for the success of this call + const size_t errors_offset = _errors.length(); // Give this push a name, so that lexer location starts at a new line // This is necessary in case this string starts with a preprocessor directive, since the lexer only reports those as such if they appear at the beginning of a new line @@ -164,15 +168,15 @@ bool reshadefx::preprocessor::append_string(std::string source_code, const std:: push(std::move(source_code), path.empty() ? "unknown" : path); parse(); - return _success; + return _errors.find(": preprocessor error: ", errors_offset) == std::string::npos; } std::vector reshadefx::preprocessor::included_files() const { std::vector files; files.reserve(_file_cache.size()); - for (const auto &it : _file_cache) - files.push_back(std::filesystem::u8path(it.first)); + for (const std::pair &cache_entry : _file_cache) + files.push_back(std::filesystem::u8path(cache_entry.first)); return files; } std::vector> reshadefx::preprocessor::used_macro_definitions() const @@ -189,12 +193,19 @@ std::vector> reshadefx::preprocessor::used_m void reshadefx::preprocessor::error(const location &location, const std::string &message) { - _errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor error: " + message + '\n'; - _success = false; // Unset success flag + _errors += location.source; + _errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')'; + _errors += ": preprocessor error: "; + _errors += message; + _errors += '\n'; } void reshadefx::preprocessor::warning(const location &location, const std::string &message) { - _errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor warning: " + message + '\n'; + _errors += location.source; + _errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')'; + _errors += ": preprocessor warning: "; + _errors += message; + _errors += '\n'; } void reshadefx::preprocessor::push(std::string input, const std::string &name) @@ -357,32 +368,32 @@ void reshadefx::preprocessor::parse() { case tokenid::hash_if: parse_if(); - if (!expect(tokenid::end_of_line)) + if (!skip && !expect(tokenid::end_of_line)) consume_until(tokenid::end_of_line); continue; case tokenid::hash_ifdef: parse_ifdef(); - if (!expect(tokenid::end_of_line)) + if (!skip && !expect(tokenid::end_of_line)) consume_until(tokenid::end_of_line); continue; case tokenid::hash_ifndef: parse_ifndef(); - if (!expect(tokenid::end_of_line)) + if (!skip && !expect(tokenid::end_of_line)) consume_until(tokenid::end_of_line); continue; case tokenid::hash_else: parse_else(); - if (!expect(tokenid::end_of_line)) + if (!skip && !expect(tokenid::end_of_line)) consume_until(tokenid::end_of_line); continue; case tokenid::hash_elif: parse_elif(); - if (!expect(tokenid::end_of_line)) + if (!skip && !expect(tokenid::end_of_line)) consume_until(tokenid::end_of_line); continue; case tokenid::hash_endif: parse_endif(); - if (!expect(tokenid::end_of_line)) + if (!skip && !expect(tokenid::end_of_line)) consume_until(tokenid::end_of_line); continue; default: @@ -511,11 +522,18 @@ void reshadefx::preprocessor::parse_if() level.pp_token = _token; level.input_index = _current_input_index; - // Evaluate expression after updating 'pp_token', so that it points at the beginning # token - level.value = evaluate_expression(); - const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping; - level.skipping = parent_skipping || !level.value; + if (parent_skipping) + { + level.value = false; + level.skipping = true; + } + else + { + // Evaluate expression after updating 'pp_token', so that it points at the beginning # token + level.value = evaluate_expression(); + level.skipping = !level.value; + } _if_stack.push_back(std::move(level)); } @@ -528,16 +546,23 @@ void reshadefx::preprocessor::parse_ifdef() if (!expect(tokenid::identifier)) return; - level.value = is_defined(_token.literal_as_string); - const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping; - level.skipping = parent_skipping || !level.value; + if (parent_skipping) + { + level.value = false; + level.skipping = true; + } + else + { + level.value = is_defined(_token.literal_as_string); + level.skipping = !level.value; - _if_stack.push_back(std::move(level)); - // Only add to used macro list if this #ifdef is active and the macro was not defined before - if (!parent_skipping) + // Only add to used macro list if this #ifdef is active and the macro was not defined before if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined) _used_macros.emplace(_token.literal_as_string); + } + + _if_stack.push_back(std::move(level)); } void reshadefx::preprocessor::parse_ifndef() { @@ -548,16 +573,23 @@ void reshadefx::preprocessor::parse_ifndef() if (!expect(tokenid::identifier)) return; - level.value = !is_defined(_token.literal_as_string); - const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping; - level.skipping = parent_skipping || !level.value; + if (parent_skipping) + { + level.value = false; + level.skipping = true; + } + else + { + level.value = !is_defined(_token.literal_as_string); + level.skipping = !level.value; - _if_stack.push_back(std::move(level)); - // Only add to used macro list if this #ifndef is active and the macro was not defined before - if (!parent_skipping) + // Only add to used macro list if this #ifndef is active and the macro was not defined before if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined) _used_macros.emplace(_token.literal_as_string); + } + + _if_stack.push_back(std::move(level)); } void reshadefx::preprocessor::parse_elif() { @@ -573,10 +605,19 @@ void reshadefx::preprocessor::parse_elif() level.input_index = _current_input_index; const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping; - const bool condition_result = evaluate_expression(); - level.skipping = parent_skipping || level.value || !condition_result; + if (parent_skipping) + { + level.value = false; + level.skipping = true; + } + else + { + const bool condition_result = evaluate_expression(); + level.skipping = level.value || !condition_result; - if (!level.value) level.value = condition_result; + if (!level.value) + level.value = condition_result; + } } void reshadefx::preprocessor::parse_else() { @@ -591,16 +632,25 @@ void reshadefx::preprocessor::parse_else() level.input_index = _current_input_index; const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping; - level.skipping = parent_skipping || level.value; + if (parent_skipping) + { + level.value = false; + level.skipping = true; + } + else + { + level.skipping = parent_skipping || level.value; - if (!level.value) level.value = true; + if (!level.value) + level.value = true; + } } void reshadefx::preprocessor::parse_endif() { if (_if_stack.empty()) - error(_token.location, "missing #if for #endif"); - else - _if_stack.pop_back(); + return error(_token.location, "missing #if for #endif"); + + _if_stack.pop_back(); } void reshadefx::preprocessor::parse_error() @@ -927,8 +977,8 @@ bool reshadefx::preprocessor::evaluate_expression() break; if (left_associative ? - (precedence_lookup[op] > precedence_lookup[prev_op]) : - (precedence_lookup[op] >= precedence_lookup[prev_op])) + (s_precedence_lookup[op] > s_precedence_lookup[prev_op]) : + (s_precedence_lookup[op] >= s_precedence_lookup[prev_op])) break; stack_index--; @@ -1016,9 +1066,13 @@ bool reshadefx::preprocessor::evaluate_expression() BINARY_OPERATION(-); break; case op_modulo: + if (stack[stack_index - 1] == 0) + return error(_token.location, "right operand of '%' is zero"), 0; BINARY_OPERATION(%); break; case op_divide: + if (stack[stack_index - 1] == 0) + return error(_token.location, "division by zero"), 0; BINARY_OPERATION(/); break; case op_multiply: @@ -1068,12 +1122,24 @@ bool reshadefx::preprocessor::evaluate_identifier_as_macro() push(escape_string(file_stem.u8string())); return true; } + if (_token.literal_as_string == "__FILE_STEM_HASH__") + { + const std::filesystem::path file_stem = std::filesystem::u8path(_token.location.source).stem(); + push(std::to_string(std::hash()(file_stem.u8string()) & 0xFFFFFFFF)); + return true; + } if (_token.literal_as_string == "__FILE_NAME__") { const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename(); push(escape_string(file_name.u8string())); return true; } + if (_token.literal_as_string == "__FILE_NAME_HASH__") + { + const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename(); + push(std::to_string(std::hash()(file_name.u8string()) & 0xFFFFFFFF)); + return true; + } const auto it = _macros.find(_token.literal_as_string); if (it == _macros.end()) diff --git a/dep/reshadefx/src/effect_symbol_table.cpp b/dep/reshadefx/src/effect_symbol_table.cpp index 8ba800f5c..e9e840b53 100644 --- a/dep/reshadefx/src/effect_symbol_table.cpp +++ b/dep/reshadefx/src/effect_symbol_table.cpp @@ -13,25 +13,23 @@ #include // std::upper_bound, std::sort #include // std::greater -enum class intrinsic_id : uint32_t +enum class intrinsic_id { #define IMPLEMENT_INTRINSIC_SPIRV(name, i, code) name##i, #include "effect_symbol_table_intrinsics.inl" }; -struct intrinsic +struct intrinsic : reshadefx::function { - intrinsic(const char *name, intrinsic_id id, const reshadefx::type &ret_type, std::initializer_list arg_types) : id(id) + intrinsic(const char *name, intrinsic_id id, const reshadefx::type &ret_type, std::initializer_list arg_types) { - function.name = name; - function.return_type = ret_type; - function.parameter_list.reserve(arg_types.size()); + function::return_type = ret_type; + function::id = static_cast(id); + function::name = name; + function::parameter_list.reserve(arg_types.size()); for (const reshadefx::type &arg_type : arg_types) - function.parameter_list.push_back({ arg_type }); + function::parameter_list.push_back({ arg_type }); } - - intrinsic_id id; - reshadefx::function_info function; }; #define void { reshadefx::type::t_void } @@ -130,48 +128,17 @@ static const intrinsic s_intrinsics[] = #undef uint2 #undef uint3 #undef uint4 -#undef float1 +#undef float #undef float2 #undef float3 #undef float4 -#undef float2x2 -#undef float3x3 -#undef float4x4 -#undef out_float -#undef out_float2 -#undef out_float3 -#undef out_float4 -#undef sampler1d_int -#undef sampler2d_int -#undef sampler3d_int -#undef sampler1d_uint -#undef sampler2d_uint -#undef sampler3d_uint -#undef sampler1d_float4 -#undef sampler2d_float4 -#undef sampler3d_float4 -#undef storage1d_int -#undef storage2d_int -#undef storage3d_int -#undef storage1d_uint -#undef storage2d_uint -#undef storage3d_uint -#undef storage1d_float4 -#undef storage2d_float4 -#undef storage3d_float4 -#undef inout_storage1d_int -#undef inout_storage2d_int -#undef inout_storage3d_int -#undef inout_storage1d_uint -#undef inout_storage2d_uint -#undef inout_storage3d_uint unsigned int reshadefx::type::rank(const type &src, const type &dst) { - if (src.is_array() != dst.is_array() || (src.array_length != dst.array_length && src.array_length > 0 && dst.array_length > 0)) + if (src.is_array() != dst.is_array() || (src.array_length != dst.array_length && src.is_bounded_array() && dst.is_bounded_array())) return 0; // Arrays of different sizes are not compatible if (src.is_struct() || dst.is_struct()) - return src.definition == dst.definition ? 32 : 0; // Structs are only compatible if they are the same type + return src.struct_definition == dst.struct_definition ? 32 : 0; // Structs are only compatible if they are the same type if (!src.is_numeric() || !dst.is_numeric()) return src.base == dst.base && src.rows == dst.rows && src.cols == dst.cols ? 32 : 0; // Numeric values are not compatible with other types if (src.is_matrix() && (!dst.is_matrix() || src.rows != dst.rows || src.cols != dst.cols)) @@ -181,7 +148,7 @@ unsigned int reshadefx::type::rank(const type &src, const type &dst) // - Floating point has a higher rank than integer types // - Integer to floating point promotion has a higher rank than floating point to integer conversion // - Signed to unsigned integer conversion has a higher rank than unsigned to signed integer conversion - static const int ranks[7][7] = { + static const unsigned int ranks[7][7] = { { 5, 4, 4, 4, 4, 4, 4 }, // bool { 3, 5, 5, 2, 2, 4, 4 }, // min16int { 3, 5, 5, 2, 2, 4, 4 }, // int @@ -194,7 +161,7 @@ unsigned int reshadefx::type::rank(const type &src, const type &dst) assert(src.base > 0 && src.base <= 7); // bool - float assert(dst.base > 0 && dst.base <= 7); - const int rank = ranks[src.base - 1][dst.base - 1] << 2; + const unsigned int rank = ranks[src.base - 1][dst.base - 1] << 2; if ((src.is_scalar() && dst.is_vector())) return rank >> 1; // Scalar to vector promotion has a lower rank @@ -284,7 +251,7 @@ bool reshadefx::symbol_table::insert_symbol(const std::string &name, const symbo { // Extract scope name scope.name = _current_scope.name.substr(0, pos += 2); - const auto previous_scope_name = _current_scope.name.substr(pos); + const std::string previous_scope_name = _current_scope.name.substr(pos); // Insert symbol into this scope insert_sorted(_symbol_stack[previous_scope_name + name], scoped_symbol { symbol, scope }); @@ -335,7 +302,7 @@ reshadefx::scoped_symbol reshadefx::symbol_table::find_symbol(const std::string return result; } -static int compare_functions(const std::vector &arguments, const reshadefx::function_info *function1, const reshadefx::function_info *function2) +static int compare_functions(const std::vector &arguments, const reshadefx::function *function1, const reshadefx::function *function2) { const size_t num_arguments = arguments.size(); @@ -388,7 +355,7 @@ bool reshadefx::symbol_table::resolve_function_call(const std::string &name, con { out_data.op = symbol_type::function; - const function_info *result = nullptr; + const function *result = nullptr; unsigned int num_overloads = 0; unsigned int overload_namespace = scope.namespace_level; @@ -405,7 +372,7 @@ bool reshadefx::symbol_table::resolve_function_call(const std::string &name, con it->scope.namespace_level > scope.namespace_level || (it->scope.namespace_level == scope.namespace_level && it->scope.name != scope.name)) continue; - const function_info *const function = it->function; + const function *const function = it->function; if (function == nullptr) continue; @@ -425,7 +392,7 @@ bool reshadefx::symbol_table::resolve_function_call(const std::string &name, con continue; } } - else if (arguments.size() != function->parameter_list.size()) + else if (arguments.size() > function->parameter_list.size() || (arguments.size() < function->parameter_list.size() && !function->parameter_list[arguments.size()].has_default_value)) { continue; } @@ -453,18 +420,18 @@ bool reshadefx::symbol_table::resolve_function_call(const std::string &name, con { for (const intrinsic &intrinsic : s_intrinsics) { - if (intrinsic.function.name != name || intrinsic.function.parameter_list.size() != arguments.size()) + if (intrinsic.name != name || intrinsic.parameter_list.size() != arguments.size()) continue; // A new possibly-matching intrinsic function was found, compare it against the current result - const int comparison = compare_functions(arguments, &intrinsic.function, result); + const int comparison = compare_functions(arguments, &intrinsic, result); if (comparison < 0) // The new function is a better match { out_data.op = symbol_type::intrinsic; - out_data.id = static_cast(intrinsic.id); - out_data.type = intrinsic.function.return_type; - out_data.function = &intrinsic.function; + out_data.id = intrinsic.id; + out_data.type = intrinsic.return_type; + out_data.function = &intrinsic; result = out_data.function; num_overloads = 1; } diff --git a/dep/reshadefx/src/effect_symbol_table_intrinsics.inl b/dep/reshadefx/src/effect_symbol_table_intrinsics.inl index c15937e84..4fc4f3661 100644 --- a/dep/reshadefx/src/effect_symbol_table_intrinsics.inl +++ b/dep/reshadefx/src/effect_symbol_table_intrinsics.inl @@ -42,16 +42,14 @@ IMPLEMENT_INTRINSIC_SPIRV(abs, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450SAbs) - .add(args[0].base) - .result; + .add(args[0].base); }) IMPLEMENT_INTRINSIC_SPIRV(abs, 1, { return add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450FAbs) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret all(x) @@ -77,8 +75,7 @@ IMPLEMENT_INTRINSIC_SPIRV(all, 0, { IMPLEMENT_INTRINSIC_SPIRV(all, 1, { return add_instruction(spv::OpAll, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret any(x) @@ -104,8 +101,7 @@ IMPLEMENT_INTRINSIC_SPIRV(any, 0, { IMPLEMENT_INTRINSIC_SPIRV(any, 1, { return add_instruction(spv::OpAny, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret asin(x) @@ -124,8 +120,7 @@ IMPLEMENT_INTRINSIC_SPIRV(asin, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Asin) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret acos(x) @@ -144,8 +139,7 @@ IMPLEMENT_INTRINSIC_SPIRV(acos, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Acos) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret atan(x) @@ -164,8 +158,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atan, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Atan) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret atan2(x, y) @@ -185,8 +178,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atan2, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Atan2) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret sin(x) @@ -205,8 +197,7 @@ IMPLEMENT_INTRINSIC_SPIRV(sin, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Sin) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret sinh(x) @@ -225,8 +216,7 @@ IMPLEMENT_INTRINSIC_SPIRV(sinh, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Sinh) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret cos(x) @@ -245,8 +235,7 @@ IMPLEMENT_INTRINSIC_SPIRV(cos, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Cos) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret cosh(x) @@ -265,8 +254,7 @@ IMPLEMENT_INTRINSIC_SPIRV(cosh, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Cosh) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret tan(x) @@ -285,8 +273,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tan, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Tan) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret tanh(x) @@ -305,8 +292,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tanh, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Tanh) - .add(args[0].base) - .result; + .add(args[0].base); }) // sincos(x, out s, out c) @@ -324,13 +310,11 @@ IMPLEMENT_INTRINSIC_SPIRV(sincos, 0, { const spv::Id sin_result = add_instruction(spv::OpExtInst, convert_type(args[0].type)) .add(_glsl_ext) .add(spv::GLSLstd450Sin) - .add(args[0].base) - .result; + .add(args[0].base); const spv::Id cos_result = add_instruction(spv::OpExtInst, convert_type(args[0].type)) .add(_glsl_ext) .add(spv::GLSLstd450Cos) - .add(args[0].base) - .result; + .add(args[0].base); add_instruction_without_result(spv::OpStore) .add(args[1].base) @@ -359,8 +343,7 @@ IMPLEMENT_INTRINSIC_HLSL(asint, 0, { IMPLEMENT_INTRINSIC_SPIRV(asint, 0, { return add_instruction(spv::OpBitcast, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret asuint(x) @@ -380,8 +363,7 @@ IMPLEMENT_INTRINSIC_HLSL(asuint, 0, { IMPLEMENT_INTRINSIC_SPIRV(asuint, 0, { return add_instruction(spv::OpBitcast, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret asfloat(x) @@ -414,14 +396,130 @@ IMPLEMENT_INTRINSIC_HLSL(asfloat, 1, { IMPLEMENT_INTRINSIC_SPIRV(asfloat, 0, { return add_instruction(spv::OpBitcast, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) IMPLEMENT_INTRINSIC_SPIRV(asfloat, 1, { return add_instruction(spv::OpBitcast, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); + }) + +// ret f16tof32(x) +DEFINE_INTRINSIC(f16tof32, 0, float, uint) +DEFINE_INTRINSIC(f16tof32, 0, float2, uint2) +DEFINE_INTRINSIC(f16tof32, 0, float3, uint3) +DEFINE_INTRINSIC(f16tof32, 0, float4, uint4) +IMPLEMENT_INTRINSIC_GLSL(f16tof32, 0, { + if (args[0].type.rows > 1) + code += "vec" + std::to_string(args[0].type.rows) + '('; + for (unsigned int i = 0; i < args[0].type.rows; ++i) + { + assert(i < 4); + code += "unpackHalf2x16(" + id_to_name(args[0].base) + '.' + "xyzw"[i] + ").x"; + if (i < args[0].type.rows - 1) + code += ", "; + } + if (args[0].type.rows > 1) + code += ')'; + }) +IMPLEMENT_INTRINSIC_HLSL(f16tof32, 0, { + code += "f16tof32(" + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_SPIRV(f16tof32, 0, { + type res_scalar_type = res_type; + res_scalar_type.rows = 1; + type res_vector_type = res_type; + res_vector_type.rows = 2; + type arg_scalar_type = args[0].type; + arg_scalar_type.rows = 1; + + spv::Id res[4] = {}; + for (unsigned int i = 0; i < args[0].type.rows; ++i) + { + assert(i < 4); + + spv::Id arg_scalar = args[0].base; + if (args[0].type.rows > 1) + arg_scalar = add_instruction(spv::OpCompositeExtract, convert_type(arg_scalar_type)) + .add(arg_scalar) + .add(i); + + spv::Id arg_vector = add_instruction(spv::OpExtInst, convert_type(res_vector_type)) + .add(_glsl_ext) + .add(spv::GLSLstd450UnpackHalf2x16) + .add(arg_scalar); + + res[i] = add_instruction(spv::OpCompositeExtract, convert_type(res_scalar_type)) + .add(arg_vector) + .add(0u); + } + + if (res_type.rows > 1) + return + add_instruction(spv::OpCompositeConstruct, convert_type(res_type)) + .add(res, res + res_type.rows); + else + return res[0]; + }) + +// ret f32tof16(x) +DEFINE_INTRINSIC(f32tof16, 0, uint, float) +DEFINE_INTRINSIC(f32tof16, 0, uint2, float2) +DEFINE_INTRINSIC(f32tof16, 0, uint3, float3) +DEFINE_INTRINSIC(f32tof16, 0, uint4, float4) +IMPLEMENT_INTRINSIC_GLSL(f32tof16, 0, { + if (args[0].type.rows > 1) + code += "uvec" + std::to_string(args[0].type.rows) + '('; + for (unsigned int i = 0; i < args[0].type.rows; ++i) + { + assert(i < 4); + code += "packHalf2x16(vec2(" + id_to_name(args[0].base) + '.' + "xyzw"[i] + ", 0.0))"; + if (i < args[0].type.rows - 1) + code += ", "; + } + if (args[0].type.rows > 1) + code += ')'; + }) +IMPLEMENT_INTRINSIC_HLSL(f32tof16, 0, { + code += "f32tof16(" + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_SPIRV(f32tof16, 0, { + type res_scalar_type = res_type; + res_scalar_type.rows = 1; + type arg_scalar_type = args[0].type; + arg_scalar_type.rows = 1; + type arg_vector_type = args[0].type; + arg_vector_type.rows = 2; + + const spv::Id constant_zero = emit_constant(arg_scalar_type, 0u); + + spv::Id res[4] = {}; + for (unsigned int i = 0; i < args[0].type.rows; ++i) + { + assert(i < 4); + + spv::Id arg_scalar = args[0].base; + if (args[0].type.rows > 1) + arg_scalar = add_instruction(spv::OpCompositeExtract, convert_type(arg_scalar_type)) + .add(arg_scalar) + .add(i); + + spv::Id arg_vector = add_instruction(spv::OpCompositeConstruct, convert_type(arg_vector_type)) + .add(arg_scalar) + .add(constant_zero); + + res[i] = add_instruction(spv::OpExtInst, convert_type(res_scalar_type)) + .add(_glsl_ext) + .add(spv::GLSLstd450PackHalf2x16) + .add(arg_vector); + } + + if (res_type.rows > 1) + return + add_instruction(spv::OpCompositeConstruct, convert_type(res_type)) + .add(res, res + res_type.rows); + else + return res[0]; }) // ret firstbitlow @@ -433,6 +531,7 @@ IMPLEMENT_INTRINSIC_GLSL(firstbitlow, 0, { code += "findLSB(" + id_to_name(args[0].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(firstbitlow, 0, { + _uses_bitwise_intrinsics = true; if (_shader_model < 50) code += "__"; code += "firstbitlow(" + id_to_name(args[0].base) + ')'; @@ -442,8 +541,7 @@ IMPLEMENT_INTRINSIC_SPIRV(firstbitlow, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450FindILsb) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret firstbithigh @@ -462,11 +560,13 @@ IMPLEMENT_INTRINSIC_GLSL(firstbithigh, 1, { code += "findMSB(" + id_to_name(args[0].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(firstbithigh, 0, { + _uses_bitwise_intrinsics = true; if (_shader_model < 50) code += "__"; code += "firstbithigh(" + id_to_name(args[0].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(firstbithigh, 1, { + _uses_bitwise_intrinsics = true; if (_shader_model < 50) code += "__"; code += "firstbithigh(" + id_to_name(args[0].base) + ')'; @@ -476,16 +576,14 @@ IMPLEMENT_INTRINSIC_SPIRV(firstbithigh, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450FindSMsb) - .add(args[0].base) - .result; + .add(args[0].base); }) IMPLEMENT_INTRINSIC_SPIRV(firstbithigh, 1, { return add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450FindUMsb) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret countbits @@ -497,6 +595,7 @@ IMPLEMENT_INTRINSIC_GLSL(countbits, 0, { code += "bitCount(" + id_to_name(args[0].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(countbits, 0, { + _uses_bitwise_intrinsics = true; if (_shader_model < 50) code += "__"; code += "countbits(" + id_to_name(args[0].base) + ')'; @@ -504,8 +603,7 @@ IMPLEMENT_INTRINSIC_HLSL(countbits, 0, { IMPLEMENT_INTRINSIC_SPIRV(countbits, 0, { return add_instruction(spv::OpBitCount, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret reversebits @@ -517,6 +615,7 @@ IMPLEMENT_INTRINSIC_GLSL(reversebits, 0, { code += "bitfieldReverse(" + id_to_name(args[0].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(reversebits, 0, { + _uses_bitwise_intrinsics = true; if (_shader_model < 50) code += "__"; code += "reversebits(" + id_to_name(args[0].base) + ')'; @@ -524,8 +623,7 @@ IMPLEMENT_INTRINSIC_HLSL(reversebits, 0, { IMPLEMENT_INTRINSIC_SPIRV(reversebits, 0, { return add_instruction(spv::OpBitReverse, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret ceil(x) @@ -544,8 +642,7 @@ IMPLEMENT_INTRINSIC_SPIRV(ceil, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Ceil) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret floor(x) @@ -567,8 +664,7 @@ IMPLEMENT_INTRINSIC_SPIRV(floor, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Floor) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret clamp(x, min, max) @@ -609,8 +705,7 @@ IMPLEMENT_INTRINSIC_SPIRV(clamp, 0, { .add(spv::GLSLstd450SClamp) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(clamp, 1, { return @@ -619,8 +714,7 @@ IMPLEMENT_INTRINSIC_SPIRV(clamp, 1, { .add(spv::GLSLstd450UClamp) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(clamp, 2, { return @@ -629,8 +723,7 @@ IMPLEMENT_INTRINSIC_SPIRV(clamp, 2, { .add(spv::GLSLstd450FClamp) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret saturate(x) @@ -654,8 +747,7 @@ IMPLEMENT_INTRINSIC_SPIRV(saturate, 0, { .add(spv::GLSLstd450FClamp) .add(args[0].base) .add(constant_zero) - .add(constant_one) - .result; + .add(constant_one); }) // ret mad(mvalue, avalue, bvalue) @@ -679,8 +771,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mad, 0, { .add(spv::GLSLstd450Fma) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret rcp(x) @@ -703,8 +794,7 @@ IMPLEMENT_INTRINSIC_SPIRV(rcp, 0, { return add_instruction(spv::OpFDiv, convert_type(res_type)) .add(constant_one) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret pow(x, y) @@ -724,8 +814,7 @@ IMPLEMENT_INTRINSIC_SPIRV(pow, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Pow) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret exp(x) @@ -744,8 +833,7 @@ IMPLEMENT_INTRINSIC_SPIRV(exp, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Exp) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret exp2(x) @@ -764,8 +852,7 @@ IMPLEMENT_INTRINSIC_SPIRV(exp2, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Exp2) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret log(x) @@ -784,8 +871,7 @@ IMPLEMENT_INTRINSIC_SPIRV(log, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Log) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret log2(x) @@ -804,8 +890,7 @@ IMPLEMENT_INTRINSIC_SPIRV(log2, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Log2) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret log10(x) @@ -823,8 +908,7 @@ IMPLEMENT_INTRINSIC_SPIRV(log10, 0, { const spv::Id log2 = add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Log2) - .add(args[0].base) - .result; + .add(args[0].base); const spv::Id log10 = emit_constant(args[0].type, /* log2(10) */ constant { { 3.321928f, 3.321928f, 3.321928f, 3.321928f } }); @@ -832,8 +916,7 @@ IMPLEMENT_INTRINSIC_SPIRV(log10, 0, { return add_instruction(spv::OpFDiv, convert_type(res_type)) .add(log2) - .add(log10) - .result; }) + .add(log10); }) // ret sign(x) DEFINE_INTRINSIC(sign, 0, int, int) @@ -861,16 +944,14 @@ IMPLEMENT_INTRINSIC_SPIRV(sign, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450SSign) - .add(args[0].base) - .result; + .add(args[0].base); }) IMPLEMENT_INTRINSIC_SPIRV(sign, 1, { return add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450FSign) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret sqrt(x) @@ -889,8 +970,7 @@ IMPLEMENT_INTRINSIC_SPIRV(sqrt, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Sqrt) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret rsqrt(x) @@ -909,8 +989,7 @@ IMPLEMENT_INTRINSIC_SPIRV(rsqrt, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450InverseSqrt) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret lerp(x, y, s) @@ -931,8 +1010,7 @@ IMPLEMENT_INTRINSIC_SPIRV(lerp, 0, { .add(spv::GLSLstd450FMix) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret step(y, x) @@ -952,8 +1030,7 @@ IMPLEMENT_INTRINSIC_SPIRV(step, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Step) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret smoothstep(min, max, x) @@ -974,8 +1051,7 @@ IMPLEMENT_INTRINSIC_SPIRV(smoothstep, 0, { .add(spv::GLSLstd450SmoothStep) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret frac(x) @@ -994,8 +1070,7 @@ IMPLEMENT_INTRINSIC_SPIRV(frac, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Fract) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret ldexp(x, exp) @@ -1015,8 +1090,7 @@ IMPLEMENT_INTRINSIC_SPIRV(ldexp, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Ldexp) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret modf(x, out ip) @@ -1036,8 +1110,7 @@ IMPLEMENT_INTRINSIC_SPIRV(modf, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Modf) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret frexp(x, out exp) @@ -1057,8 +1130,7 @@ IMPLEMENT_INTRINSIC_SPIRV(frexp, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Frexp) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret trunc(x) @@ -1077,8 +1149,7 @@ IMPLEMENT_INTRINSIC_SPIRV(trunc, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Trunc) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret round(x) @@ -1097,8 +1168,7 @@ IMPLEMENT_INTRINSIC_SPIRV(round, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Round) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret min(x, y) @@ -1128,8 +1198,7 @@ IMPLEMENT_INTRINSIC_SPIRV(min, 0, { .add(_glsl_ext) .add(spv::GLSLstd450SMin) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(min, 1, { return @@ -1137,8 +1206,7 @@ IMPLEMENT_INTRINSIC_SPIRV(min, 1, { .add(_glsl_ext) .add(spv::GLSLstd450FMin) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret max(x, y) @@ -1168,8 +1236,7 @@ IMPLEMENT_INTRINSIC_SPIRV(max, 0, { .add(_glsl_ext) .add(spv::GLSLstd450SMax) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(max, 1, { return @@ -1177,11 +1244,10 @@ IMPLEMENT_INTRINSIC_SPIRV(max, 1, { .add(_glsl_ext) .add(spv::GLSLstd450FMax) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) -// ret degree(x) +// ret degrees(x) DEFINE_INTRINSIC(degrees, 0, float, float) DEFINE_INTRINSIC(degrees, 0, float2, float2) DEFINE_INTRINSIC(degrees, 0, float3, float3) @@ -1197,8 +1263,7 @@ IMPLEMENT_INTRINSIC_SPIRV(degrees, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Degrees) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret radians(x) @@ -1217,8 +1282,7 @@ IMPLEMENT_INTRINSIC_SPIRV(radians, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Radians) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret ddx(x) @@ -1235,8 +1299,39 @@ IMPLEMENT_INTRINSIC_HLSL(ddx, 0, { IMPLEMENT_INTRINSIC_SPIRV(ddx, 0, { return add_instruction(spv::OpDPdx, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); + }) +// ret ddx_coarse(x) +DEFINE_INTRINSIC(ddx_coarse, 0, float, float) +DEFINE_INTRINSIC(ddx_coarse, 0, float2, float2) +DEFINE_INTRINSIC(ddx_coarse, 0, float3, float3) +DEFINE_INTRINSIC(ddx_coarse, 0, float4, float4) +IMPLEMENT_INTRINSIC_GLSL(ddx_coarse, 0, { + code += "dFdxCoarse(" + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_HLSL(ddx_coarse, 0, { + code += (_shader_model >= 50 ? "ddx_coarse(" : "ddx(") + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_SPIRV(ddx_coarse, 0, { + return + add_instruction(spv::OpDPdxCoarse, convert_type(res_type)) + .add(args[0].base); + }) +// ret ddx_fine(x) +DEFINE_INTRINSIC(ddx_fine, 0, float, float) +DEFINE_INTRINSIC(ddx_fine, 0, float2, float2) +DEFINE_INTRINSIC(ddx_fine, 0, float3, float3) +DEFINE_INTRINSIC(ddx_fine, 0, float4, float4) +IMPLEMENT_INTRINSIC_GLSL(ddx_fine, 0, { + code += "dFdxFine(" + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_HLSL(ddx_fine, 0, { + code += (_shader_model >= 50 ? "ddx_fine(" : "ddx(") + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_SPIRV(ddx_fine, 0, { + return + add_instruction(spv::OpDPdxFine, convert_type(res_type)) + .add(args[0].base); }) // ret ddy(x) @@ -1253,8 +1348,39 @@ IMPLEMENT_INTRINSIC_HLSL(ddy, 0, { IMPLEMENT_INTRINSIC_SPIRV(ddy, 0, { return add_instruction(spv::OpDPdy, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); + }) +// ret ddy_coarse(x) +DEFINE_INTRINSIC(ddy_coarse, 0, float, float) +DEFINE_INTRINSIC(ddy_coarse, 0, float2, float2) +DEFINE_INTRINSIC(ddy_coarse, 0, float3, float3) +DEFINE_INTRINSIC(ddy_coarse, 0, float4, float4) +IMPLEMENT_INTRINSIC_GLSL(ddy_coarse, 0, { + code += "dFdyCoarse(" + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_HLSL(ddy_coarse, 0, { + code += (_shader_model >= 50 ? "ddy_coarse(" : "ddy(") + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_SPIRV(ddy_coarse, 0, { + return + add_instruction(spv::OpDPdyCoarse, convert_type(res_type)) + .add(args[0].base); + }) +// ret ddy_fine(x) +DEFINE_INTRINSIC(ddy_fine, 0, float, float) +DEFINE_INTRINSIC(ddy_fine, 0, float2, float2) +DEFINE_INTRINSIC(ddy_fine, 0, float3, float3) +DEFINE_INTRINSIC(ddy_fine, 0, float4, float4) +IMPLEMENT_INTRINSIC_GLSL(ddy_fine, 0, { + code += "dFdyFine(" + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_HLSL(ddy_fine, 0, { + code += (_shader_model >= 50 ? "ddy_fine(" : "ddy(") + id_to_name(args[0].base) + ')'; + }) +IMPLEMENT_INTRINSIC_SPIRV(ddy_fine, 0, { + return + add_instruction(spv::OpDPdyFine, convert_type(res_type)) + .add(args[0].base); }) // ret fwidth(x) @@ -1271,8 +1397,7 @@ IMPLEMENT_INTRINSIC_HLSL(fwidth, 0, { IMPLEMENT_INTRINSIC_SPIRV(fwidth, 0, { return add_instruction(spv::OpFwidth, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret dot(x, y) @@ -1296,15 +1421,13 @@ IMPLEMENT_INTRINSIC_SPIRV(dot, 0, { return add_instruction(spv::OpFMul, convert_type(res_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(dot, 1, { return add_instruction(spv::OpDot, convert_type(res_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret cross(x, y) @@ -1321,8 +1444,7 @@ IMPLEMENT_INTRINSIC_SPIRV(cross, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Cross) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret length(x) @@ -1341,8 +1463,7 @@ IMPLEMENT_INTRINSIC_SPIRV(length, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Length) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret distance(x, y) @@ -1362,8 +1483,7 @@ IMPLEMENT_INTRINSIC_SPIRV(distance, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Distance) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret normalize(x) @@ -1381,8 +1501,7 @@ IMPLEMENT_INTRINSIC_SPIRV(normalize, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Normalize) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret transpose(x) @@ -1404,8 +1523,7 @@ IMPLEMENT_INTRINSIC_HLSL(transpose, 0, { IMPLEMENT_INTRINSIC_SPIRV(transpose, 0, { return add_instruction(spv::OpTranspose, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret determinant(m) @@ -1423,8 +1541,7 @@ IMPLEMENT_INTRINSIC_SPIRV(determinant, 0, { add_instruction(spv::OpExtInst, convert_type(res_type)) .add(_glsl_ext) .add(spv::GLSLstd450Determinant) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret reflect(i, n) @@ -1443,8 +1560,7 @@ IMPLEMENT_INTRINSIC_SPIRV(reflect, 0, { .add(_glsl_ext) .add(spv::GLSLstd450Reflect) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret refract(i, n, eta) @@ -1464,8 +1580,7 @@ IMPLEMENT_INTRINSIC_SPIRV(refract, 0, { .add(spv::GLSLstd450Refract) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret faceforward(n, i, ng) @@ -1486,8 +1601,7 @@ IMPLEMENT_INTRINSIC_SPIRV(faceforward, 0, { .add(spv::GLSLstd450FaceForward) .add(args[0].base) .add(args[1].base) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret mul(x, y) @@ -1507,8 +1621,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 0, { return add_instruction(spv::OpVectorTimesScalar, convert_type(res_type)) .add(args[1].base) - .add(args[0].base) - .result; + .add(args[0].base); }) DEFINE_INTRINSIC(mul, 1, int2, int2, int) DEFINE_INTRINSIC(mul, 1, int3, int3, int) @@ -1526,8 +1639,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 1, { return add_instruction(spv::OpVectorTimesScalar, convert_type(res_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) DEFINE_INTRINSIC(mul, 2, int2x2, int, int2x2) @@ -1558,8 +1670,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 2, { return add_instruction(spv::OpMatrixTimesScalar, convert_type(res_type)) .add(args[1].base) - .add(args[0].base) - .result; + .add(args[0].base); }) DEFINE_INTRINSIC(mul, 3, int2x2, int2x2, int) DEFINE_INTRINSIC(mul, 3, int2x3, int2x3, int) @@ -1589,8 +1700,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 3, { return add_instruction(spv::OpMatrixTimesScalar, convert_type(res_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); }) DEFINE_INTRINSIC(mul, 4, int2, int2, int2x2) @@ -1622,8 +1732,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 4, { return add_instruction(spv::OpMatrixTimesVector, convert_type(res_type)) .add(args[1].base) // Flip inputs because matrices are column-wise - .add(args[0].base) - .result; + .add(args[0].base); }) DEFINE_INTRINSIC(mul, 5, int2, int2x2, int2) DEFINE_INTRINSIC(mul, 5, int2, int2x3, int3) @@ -1654,8 +1763,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 5, { return add_instruction(spv::OpVectorTimesMatrix, convert_type(res_type)) .add(args[1].base) // Flip inputs because matrices are column-wise - .add(args[0].base) - .result; + .add(args[0].base); }) DEFINE_INTRINSIC(mul, 6, int2x2, int2x2, int2x2) @@ -1723,8 +1831,7 @@ IMPLEMENT_INTRINSIC_SPIRV(mul, 6, { return add_instruction(spv::OpMatrixTimesMatrix, convert_type(res_type)) .add(args[1].base) // Flip inputs because matrices are column-wise - .add(args[0].base) - .result; + .add(args[0].base); }) // ret isinf(x) @@ -1741,8 +1848,7 @@ IMPLEMENT_INTRINSIC_HLSL(isinf, 0, { IMPLEMENT_INTRINSIC_SPIRV(isinf, 0, { return add_instruction(spv::OpIsInf, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret isnan(x) @@ -1759,8 +1865,7 @@ IMPLEMENT_INTRINSIC_HLSL(isnan, 0, { IMPLEMENT_INTRINSIC_SPIRV(isnan, 0, { return add_instruction(spv::OpIsNan, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret tex1D(s, coords) @@ -1787,9 +1892,12 @@ IMPLEMENT_INTRINSIC_HLSL(tex1D, 0, { if (_shader_model >= 40) { // SM4 and higher use a more object-oriented programming model for textures if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.Sample(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; - else // Integer sampling is not supported until SM6.7, so emulate with a texture fetch - code += "uint temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions(temp" + std::to_string(res) + "); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + " * temp" + std::to_string(res) + ", 0))"; + else + // Integer sampling is not supported until SM6.7, so emulate with a texture fetch + code += "0; { " + "float _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + " * _dimensions, 0)); }"; } else { code += "tex1D(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; @@ -1802,8 +1910,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex1D, 1, { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.Sample(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else - code += "uint temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions(temp" + std::to_string(res) + "); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + " * temp" + std::to_string(res) + ", 0), " + id_to_name(args[2].base) + ')'; + code += "0; { " + "float _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + " * _dimensions, 0), " + id_to_name(args[2].base) + "); }"; } else { code += "tex1D(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + " + id_to_name(args[2].base) + " * " + id_to_name(args[0].base) + ".pixelsize)"; @@ -1818,15 +1928,13 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1D, 0, { const spv::Id res = add_instruction(spv::OpImageSampleImplicitLod, convert_type(res_vector_type)) .add(args[0].base) .add(args[1].base) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -1842,14 +1950,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1D, 1, { .add(args[0].base) .add(args[1].base) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -1878,9 +1984,12 @@ IMPLEMENT_INTRINSIC_HLSL(tex2D, 0, { if (_shader_model >= 40) { // SM4 and higher use a more object-oriented programming model for textures if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.Sample(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; - else // Integer sampling is not supported until SM6.7, so emulate with a texture fetch - code += "uint2 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions(temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + " * temp" + std::to_string(res) + ", 0))"; + else + // Integer sampling is not supported until SM6.7, so emulate with a texture fetch + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + " * _dimensions, 0)); }"; } else { code += "tex2D(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; @@ -1893,8 +2002,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex2D, 1, { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.Sample(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else - code += "uint2 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions(temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + " * temp" + std::to_string(res) + ", 0), " + id_to_name(args[2].base) + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + " * _dimensions, 0), " + id_to_name(args[2].base) + "); }"; } else { code += "tex2D(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + " + id_to_name(args[2].base) + " * " + id_to_name(args[0].base) + ".pixelsize)"; @@ -1909,15 +2020,13 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2D, 0, { const spv::Id res = add_instruction(spv::OpImageSampleImplicitLod, convert_type(res_vector_type)) .add(args[0].base) .add(args[1].base) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -1933,14 +2042,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2D, 1, { .add(args[0].base) .add(args[1].base) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -1969,9 +2076,12 @@ IMPLEMENT_INTRINSIC_HLSL(tex3D, 0, { if (_shader_model >= 40) { // SM4 and higher use a more object-oriented programming model for textures if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.Sample(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; - else // Integer sampling is not supported until SM6.7, so emulate with a texture fetch - code += "uint3 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions(temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y, temp" + std::to_string(res) + ".z); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + " * temp" + std::to_string(res) + ", 0))"; + else + // Integer sampling is not supported until SM6.7, so emulate with a texture fetch + code += "0; { " + "float3 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y, _dimensions.z); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + " * _dimensions, 0)); }"; } else { code += "tex3D(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; @@ -1984,8 +2094,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex3D, 1, { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.Sample(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else - code += "uint3 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions(temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y, temp" + std::to_string(res) + ".z); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + " * temp" + std::to_string(res) + ", 0), " + id_to_name(args[2].base) + ')'; + code += "0; { " + "float3 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y, _dimensions.z); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + " * _dimensions, 0), " + id_to_name(args[2].base) + "); }"; } else { code += "tex3D(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + " + id_to_name(args[2].base) + " * " + id_to_name(args[0].base) + ".pixelsize)"; @@ -2000,15 +2112,13 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3D, 0, { const spv::Id res = add_instruction(spv::OpImageSampleImplicitLod, convert_type(res_vector_type)) .add(args[0].base) .add(args[1].base) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2024,14 +2134,258 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3D, 1, { .add(args[0].base) .add(args[1].base) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); + else + return res; + }) + +// ret tex1Dgrad(s, coords, ddx, ddy) +// ret tex1Dgrad(s, coords, ddx, ddy, offset) +DEFINE_INTRINSIC(tex1Dgrad, 0, int, sampler1d_int, float, float, float) +DEFINE_INTRINSIC(tex1Dgrad, 0, uint, sampler1d_uint, float, float, float) +DEFINE_INTRINSIC(tex1Dgrad, 0, float, sampler1d_float, float, float, float) +DEFINE_INTRINSIC(tex1Dgrad, 0, float4, sampler1d_float4, float, float, float) +DEFINE_INTRINSIC(tex1Dgrad, 1, int, sampler1d_int, float, float, float, int) +DEFINE_INTRINSIC(tex1Dgrad, 1, uint, sampler1d_uint, float, float, float, int) +DEFINE_INTRINSIC(tex1Dgrad, 1, float, sampler1d_float, float, float, float, int) +DEFINE_INTRINSIC(tex1Dgrad, 1, float4, sampler1d_float4, float, float, float, int) +IMPLEMENT_INTRINSIC_GLSL(tex1Dgrad, 0, { + code += "textureGrad(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; // Collapse last argument from a 4-component vector + }) +IMPLEMENT_INTRINSIC_GLSL(tex1Dgrad, 1, { + code += "textureGradOffset(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(args[4].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + }) +IMPLEMENT_INTRINSIC_HLSL(tex1Dgrad, 0, { + if (_shader_model >= 40) { + code += id_to_name(args[0].base) + ".t.SampleGrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + } + else { + code += "tex1Dgrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + } + }) +IMPLEMENT_INTRINSIC_HLSL(tex1Dgrad, 1, { + if (_shader_model >= 40) { + code += id_to_name(args[0].base) + ".t.SampleGrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(args[4].base) + ')'; + } + else { + code += "tex1Dgrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + " + id_to_name(args[4].base) + " * " + id_to_name(args[0].base) + ".pixelsize, " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + } + }) +IMPLEMENT_INTRINSIC_SPIRV(tex1Dgrad, 0, { + type res_vector_type = res_type; + res_vector_type.rows = 4; + + const spv::Id res = add_instruction(spv::OpImageSampleExplicitLod, convert_type(res_vector_type)) + .add(args[0].base) + .add(args[1].base) + .add(spv::ImageOperandsGradMask) + .add(args[2].base) + .add(args[3].base); + if (res_type.rows == 1) + // Collapse last argument from a 4-component vector + return + add_instruction(spv::OpCompositeExtract, convert_type(res_type)) + .add(res) + .add(0u); + else + return res; + }) +IMPLEMENT_INTRINSIC_SPIRV(tex1Dgrad, 1, { + if (!args[4].is_constant) + add_capability(spv::CapabilityImageGatherExtended); + + type res_vector_type = res_type; + res_vector_type.rows = 4; + + const spv::Id res = add_instruction(spv::OpImageSampleExplicitLod, convert_type(res_vector_type)) + .add(args[0].base) + .add(args[1].base) + .add(spv::ImageOperandsGradMask | (args[4].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask)) + .add(args[2].base) + .add(args[3].base) + .add(args[4].base); + if (res_type.rows == 1) + return + add_instruction(spv::OpCompositeExtract, convert_type(res_type)) + .add(res) + .add(0u); + else + return res; + }) + +// ret tex2Dgrad(s, coords, ddx, ddy) +// ret tex2Dgrad(s, coords, ddx, ddy, offset) +DEFINE_INTRINSIC(tex2Dgrad, 0, int, sampler2d_int, float2, float2, float2) +DEFINE_INTRINSIC(tex2Dgrad, 0, uint, sampler2d_uint, float2, float2, float2) +DEFINE_INTRINSIC(tex2Dgrad, 0, float, sampler2d_float, float2, float2, float2) +DEFINE_INTRINSIC(tex2Dgrad, 0, float4, sampler2d_float4, float2, float2, float2) +DEFINE_INTRINSIC(tex2Dgrad, 1, int, sampler2d_int, float2, float2, float2, int2) +DEFINE_INTRINSIC(tex2Dgrad, 1, uint, sampler2d_uint, float2, float2, float2, int2) +DEFINE_INTRINSIC(tex2Dgrad, 1, float, sampler2d_float, float2, float2, float2, int2) +DEFINE_INTRINSIC(tex2Dgrad, 1, float4, sampler2d_float4, float2, float2, float2, int2) +IMPLEMENT_INTRINSIC_GLSL(tex2Dgrad, 0, { + code += "textureGrad(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; // Collapse last argument from a 4-component vector + }) +IMPLEMENT_INTRINSIC_GLSL(tex2Dgrad, 1, { + code += "textureGradOffset(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(args[4].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + }) +IMPLEMENT_INTRINSIC_HLSL(tex2Dgrad, 0, { + if (_shader_model >= 40) { + code += id_to_name(args[0].base) + ".t.SampleGrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + } + else { + code += "tex2Dgrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + } + }) +IMPLEMENT_INTRINSIC_HLSL(tex2Dgrad, 1, { + if (_shader_model >= 40) { + code += id_to_name(args[0].base) + ".t.SampleGrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(args[4].base) + ')'; + } + else { + code += "tex2Dgrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + " + id_to_name(args[4].base) + " * " + id_to_name(args[0].base) + ".pixelsize, " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + } + }) +IMPLEMENT_INTRINSIC_SPIRV(tex2Dgrad, 0, { + type res_vector_type = res_type; + res_vector_type.rows = 4; + + const spv::Id res = add_instruction(spv::OpImageSampleExplicitLod, convert_type(res_vector_type)) + .add(args[0].base) + .add(args[1].base) + .add(spv::ImageOperandsGradMask) + .add(args[2].base) + .add(args[3].base); + if (res_type.rows == 1) + // Collapse last argument from a 4-component vector + return + add_instruction(spv::OpCompositeExtract, convert_type(res_type)) + .add(res) + .add(0u); + else + return res; + }) +IMPLEMENT_INTRINSIC_SPIRV(tex2Dgrad, 1, { + if (!args[4].is_constant) + add_capability(spv::CapabilityImageGatherExtended); + + type res_vector_type = res_type; + res_vector_type.rows = 4; + + const spv::Id res = add_instruction(spv::OpImageSampleExplicitLod, convert_type(res_vector_type)) + .add(args[0].base) + .add(args[1].base) + .add(spv::ImageOperandsGradMask | (args[4].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask)) + .add(args[2].base) + .add(args[3].base) + .add(args[4].base); + if (res_type.rows == 1) + return + add_instruction(spv::OpCompositeExtract, convert_type(res_type)) + .add(res) + .add(0u); + else + return res; + }) + +// ret tex3Dgrad(s, coords, ddx, ddy) +// ret tex3Dgrad(s, coords, ddx, ddy, offset) +DEFINE_INTRINSIC(tex3Dgrad, 0, int, sampler3d_int, float3, float3, float3) +DEFINE_INTRINSIC(tex3Dgrad, 0, uint, sampler3d_uint, float3, float3, float3) +DEFINE_INTRINSIC(tex3Dgrad, 0, float, sampler3d_float, float3, float3, float3) +DEFINE_INTRINSIC(tex3Dgrad, 0, float4, sampler3d_float4, float3, float3, float3) +DEFINE_INTRINSIC(tex3Dgrad, 1, int, sampler3d_int, float3, float3, float3, int2) +DEFINE_INTRINSIC(tex3Dgrad, 1, uint, sampler3d_uint, float3, float3, float3, int2) +DEFINE_INTRINSIC(tex3Dgrad, 1, float, sampler3d_float, float3, float3, float3, int2) +DEFINE_INTRINSIC(tex3Dgrad, 1, float4, sampler3d_float4, float3, float3, float3, int2) +IMPLEMENT_INTRINSIC_GLSL(tex3Dgrad, 0, { + code += "textureGrad(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; // Collapse last argument from a 4-component vector + }) +IMPLEMENT_INTRINSIC_GLSL(tex3Dgrad, 1, { + code += "textureGradOffset(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(args[4].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + }) +IMPLEMENT_INTRINSIC_HLSL(tex3Dgrad, 0, { + if (_shader_model >= 40) { + code += id_to_name(args[0].base) + ".t.SampleGrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + } + else { + code += "tex3Dgrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + } + }) +IMPLEMENT_INTRINSIC_HLSL(tex3Dgrad, 1, { + if (_shader_model >= 40) { + code += id_to_name(args[0].base) + ".t.SampleGrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(args[4].base) + ')'; + } + else { + code += "tex3Dgrad(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + " + id_to_name(args[4].base) + " * " + id_to_name(args[0].base) + ".pixelsize, " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ')'; + if (res_type.rows == 1) + code += ".x"; + } + }) +IMPLEMENT_INTRINSIC_SPIRV(tex3Dgrad, 0, { + type res_vector_type = res_type; + res_vector_type.rows = 4; + + const spv::Id res = add_instruction(spv::OpImageSampleExplicitLod, convert_type(res_vector_type)) + .add(args[0].base) + .add(args[1].base) + .add(spv::ImageOperandsGradMask) + .add(args[2].base) + .add(args[3].base); + if (res_type.rows == 1) + // Collapse last argument from a 4-component vector + return + add_instruction(spv::OpCompositeExtract, convert_type(res_type)) + .add(res) + .add(0u); + else + return res; + }) +IMPLEMENT_INTRINSIC_SPIRV(tex3Dgrad, 1, { + if (!args[4].is_constant) + add_capability(spv::CapabilityImageGatherExtended); + + type res_vector_type = res_type; + res_vector_type.rows = 4; + + const spv::Id res = add_instruction(spv::OpImageSampleExplicitLod, convert_type(res_vector_type)) + .add(args[0].base) + .add(args[1].base) + .add(spv::ImageOperandsGradMask | (args[4].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask)) + .add(args[2].base) + .add(args[3].base) + .add(args[4].base); + if (res_type.rows == 1) + return + add_instruction(spv::OpCompositeExtract, convert_type(res_type)) + .add(res) + .add(0u); else return res; }) @@ -2060,9 +2414,12 @@ IMPLEMENT_INTRINSIC_HLSL(tex1Dlod, 0, { if (_shader_model >= 40) { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.SampleLevel(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ".x, " + id_to_name(args[1].base) + ".w)"; - else // Integer sampling is not supported until SM6.7, so emulate with a texture fetch - code += "uint2 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + ".x * temp" + std::to_string(res) + ".x, (int)" + id_to_name(args[1].base) + ".w))"; + else + // Integer sampling is not supported until SM6.7, so emulate with a texture fetch + code += "0; { " + "float _dimensions; float _levels; " + + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, _dimensions, _levels); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + ".x * _dimensions, (int)" + id_to_name(args[1].base) + ".w)); }"; } else { code += "tex1Dlod(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; @@ -2075,8 +2432,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex1Dlod, 1, { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.SampleLevel(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ".x, " + id_to_name(args[1].base) + ".w, " + id_to_name(args[2].base) + ')'; else - code += "uint2 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + ".x * temp" + std::to_string(res) + ".x, (int)" + id_to_name(args[1].base) + ".w))" + id_to_name(args[2].base) + ')'; + code += "0; { " + "float _dimensions; float _levels; " + + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, _dimensions, _levels); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + ".x * _dimensions, (int)" + id_to_name(args[1].base) + ".w), " + id_to_name(args[2].base) + "); }"; } else { code += "tex1Dlod(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + float4(" + id_to_name(args[2].base) + " * " + id_to_name(args[0].base) + ".pixelsize, 0, 0, 0))"; @@ -2087,12 +2446,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex1Dlod, 1, { IMPLEMENT_INTRINSIC_SPIRV(tex1Dlod, 0, { const spv::Id x = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(0) // .x - .result; + .add(0); // .x; const spv::Id lod = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(3) // .w - .result; + .add(3); // .w; type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2101,15 +2458,13 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dlod, 0, { .add(args[0].base) .add(x) .add(spv::ImageOperandsLodMask) - .add(lod) - .result; + .add(lod); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2119,12 +2474,10 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dlod, 1, { const spv::Id x = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(0) // .x - .result; + .add(0); // .x; const spv::Id lod = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(3) // .w - .result; + .add(3); // .w; type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2134,14 +2487,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dlod, 1, { .add(x) .add(spv::ImageOperandsLodMask | (args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask)) .add(lod) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2170,9 +2521,12 @@ IMPLEMENT_INTRINSIC_HLSL(tex2Dlod, 0, { if (_shader_model >= 40) { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.SampleLevel(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ".xy, " + id_to_name(args[1].base) + ".w)"; - else // Integer sampling is not supported until SM6.7, so emulate with a texture fetch - code += "uint3 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y, temp" + std::to_string(res) + ".z); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + ".xy * temp" + std::to_string(res) + ".xy, (int)" + id_to_name(args[1].base) + ".w))"; + else + // Integer sampling is not supported until SM6.7, so emulate with a texture fetch + code += "0; { " + "float2 _dimensions; float _levels; " + + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, _dimensions.x, _dimensions.y, _levels); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + ".xy * _dimensions, (int)" + id_to_name(args[1].base) + ".w)); }"; } else { code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; @@ -2185,8 +2539,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex2Dlod, 1, { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.SampleLevel(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ".xy, " + id_to_name(args[1].base) + ".w, " + id_to_name(args[2].base) + ')'; else - code += "uint3 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y, temp" + std::to_string(res) + ".z); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + ".xy * temp" + std::to_string(res) + ".xy, (int)" + id_to_name(args[1].base) + ".w))" + id_to_name(args[2].base) + ')'; + code += "0; { " + "float2 _dimensions; float _levels; " + + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, _dimensions.x, _dimensions.y, _levels); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + ".xy * _dimensions, (int)" + id_to_name(args[1].base) + ".w), " + id_to_name(args[2].base) + "); }"; } else { code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + float4(" + id_to_name(args[2].base) + " * " + id_to_name(args[0].base) + ".pixelsize, 0, 0))"; @@ -2198,13 +2554,11 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dlod, 0, { const spv::Id xy = add_instruction(spv::OpVectorShuffle, convert_type({ type::t_float, 2, 1 })) .add(args[1].base) .add(args[1].base) - .add(0) // .x - .add(1) // .y - .result; + .add(0) // .x + .add(1); // .y; const spv::Id lod = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(3) // .w - .result; + .add(3); // .w; type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2213,15 +2567,13 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dlod, 0, { .add(args[0].base) .add(xy) .add(spv::ImageOperandsLodMask) - .add(lod) - .result; + .add(lod); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2232,13 +2584,11 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dlod, 1, { const spv::Id xy = add_instruction(spv::OpVectorShuffle, convert_type({ type::t_float, 2, 1 })) .add(args[1].base) .add(args[1].base) - .add(0) // .x - .add(1) // .y - .result; + .add(0) // .x + .add(1); // .y; const spv::Id lod = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(3) // .w - .result; + .add(3); // .w; type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2248,14 +2598,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dlod, 1, { .add(xy) .add(spv::ImageOperandsLodMask | (args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask)) .add(lod) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2284,9 +2632,12 @@ IMPLEMENT_INTRINSIC_HLSL(tex3Dlod, 0, { if (_shader_model >= 40) { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.SampleLevel(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ".xyz, " + id_to_name(args[1].base) + ".w)"; - else // Integer sampling is not supported until SM6.7, so emulate with a texture fetch - code += "uint4 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y, temp" + std::to_string(res) + ".z, temp" + std::to_string(res) + ".w); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + ".xyz * temp" + std::to_string(res) + ".xyz, (int)" + id_to_name(args[1].base) + ".w))"; + else + // Integer sampling is not supported until SM6.7, so emulate with a texture fetch + code += "0; { " + "float3 _dimensions; float _levels; " + + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, _dimensions.x, _dimensions.y, _dimensions.z, _levels); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + ".xyz * _dimensions, (int)" + id_to_name(args[1].base) + ".w)); }"; } else { code += "tex3Dlod(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; @@ -2299,8 +2650,10 @@ IMPLEMENT_INTRINSIC_HLSL(tex3Dlod, 1, { if (res_type.is_floating_point() || _shader_model >= 67) code += id_to_name(args[0].base) + ".t.SampleLevel(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ".xyz, " + id_to_name(args[1].base) + ".w, " + id_to_name(args[2].base) + ')'; else - code += "uint4 temp" + std::to_string(res) + "; " + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, temp" + std::to_string(res) + ".x, temp" + std::to_string(res) + ".y, temp" + std::to_string(res) + ".z, temp" + std::to_string(res) + ".w); " + - id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + ".xyz * temp" + std::to_string(res) + ".xyz, (int)" + id_to_name(args[1].base) + ".w))" + id_to_name(args[2].base) + ')'; + code += "0; { " + "float3 _dimensions; float _levels; " + + id_to_name(args[0].base) + ".t.GetDimensions((int)" + id_to_name(args[1].base) + ".w, _dimensions.x, _dimensions.y, _dimensions.z, levels); " + + id_to_name(res) + " = " + id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + ".xyz * _dimensions, (int)" + id_to_name(args[1].base) + ".w), " + id_to_name(args[2].base) + "); }"; } else { code += "tex3Dlod(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + " + float4(" + id_to_name(args[2].base) + " * " + id_to_name(args[0].base) + ".pixelsize, 0))"; @@ -2312,14 +2665,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dlod, 0, { const spv::Id xyz = add_instruction(spv::OpVectorShuffle, convert_type({ type::t_float, 3, 1 })) .add(args[1].base) .add(args[1].base) - .add(0) // .x - .add(1) // .y - .add(2) // .z - .result; + .add(0) // .x + .add(1) // .y + .add(2); // .z; const spv::Id lod = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(3) // .w - .result; + .add(3); // .w; type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2328,15 +2679,13 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dlod, 0, { .add(args[0].base) .add(xyz) .add(spv::ImageOperandsLodMask) - .add(lod) - .result; + .add(lod); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2347,14 +2696,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dlod, 1, { const spv::Id xyz = add_instruction(spv::OpVectorShuffle, convert_type({ type::t_float, 3, 1 })) .add(args[1].base) .add(args[1].base) - .add(0) // .x - .add(1) // .y - .add(2) // .z - .result; + .add(0) // .x + .add(1) // .y + .add(2); // .z; const spv::Id lod = add_instruction(spv::OpCompositeExtract, convert_type({ type::t_float, 1, 1 })) .add(args[1].base) - .add(3) // .w - .result; + .add(3); // .w; type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2364,14 +2711,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dlod, 1, { .add(xyz) .add(spv::ImageOperandsLodMask | (args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask)) .add(lod) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2412,8 +2757,8 @@ IMPLEMENT_INTRINSIC_HLSL(tex1Dfetch, 0, { // SM3 does not have a fetch intrinsic, so emulate it by transforming coordinates into texture space ones // Also add a half-pixel offset to align texels with pixels // (coords + 0.5) / size - code += "tex1Dlod(" + id_to_name(args[0].base) + ".s, float4((" + - id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize, 0, 0, 0))"; + code += "tex1Dlod(" + id_to_name(args[0].base) + ".s, float4(" + "(" + id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize, 0, 0, 0))"; if (res_type.rows == 1) code += ".x"; } @@ -2422,9 +2767,8 @@ IMPLEMENT_INTRINSIC_HLSL(tex1Dfetch, 1, { if (_shader_model >= 40) code += id_to_name(args[0].base) + ".t.Load(int2(" + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + "))"; else { - code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, float4((" + - id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize * exp2(" + id_to_name(args[2].base) + "), 0, 0, " + - id_to_name(args[2].base) + "))"; + code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, float4(" + "(" + id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize * exp2(" + id_to_name(args[2].base) + "), 0, 0, " + id_to_name(args[2].base) + "))"; if (res_type.rows == 1) code += ".x"; } @@ -2434,30 +2778,26 @@ IMPLEMENT_INTRINSIC_HLSL(tex1Dfetch, 2, { }) IMPLEMENT_INTRINSIC_SPIRV(tex1Dfetch, 0, { const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); type res_vector_type = res_type; res_vector_type.rows = 4; const spv::Id res = add_instruction(spv::OpImageFetch, convert_type(res_vector_type)) .add(image) - .add(args[1].base) - .result; + .add(args[1].base); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) IMPLEMENT_INTRINSIC_SPIRV(tex1Dfetch, 1, { const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2466,14 +2806,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dfetch, 1, { .add(image) .add(args[1].base) .add(spv::ImageOperandsLodMask) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2483,14 +2821,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dfetch, 2, { const spv::Id res = add_instruction(spv::OpImageRead, convert_type(res_vector_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2531,8 +2867,8 @@ IMPLEMENT_INTRINSIC_HLSL(tex2Dfetch, 0, { // SM3 does not have a fetch intrinsic, so emulate it by transforming coordinates into texture space ones // Also add a half-pixel offset to align texels with pixels // (coords + 0.5) / size - code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, float4((" + - id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize, 0, 0))"; + code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, float4(" + "(" + id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize, 0, 0))"; if (res_type.rows == 1) code += ".x"; } @@ -2541,9 +2877,8 @@ IMPLEMENT_INTRINSIC_HLSL(tex2Dfetch, 1, { if (_shader_model >= 40) code += id_to_name(args[0].base) + ".t.Load(int3(" + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + "))"; else { - code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, float4((" + - id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize * exp2(" + id_to_name(args[2].base) + "), 0, " + - id_to_name(args[2].base) + "))"; + code += "tex2Dlod(" + id_to_name(args[0].base) + ".s, float4(" + "(" + id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize * exp2(" + id_to_name(args[2].base) + "), 0, " + id_to_name(args[2].base) + "))"; if (res_type.rows == 1) code += ".x"; } @@ -2556,30 +2891,26 @@ IMPLEMENT_INTRINSIC_HLSL(tex2Dfetch, 2, { }) IMPLEMENT_INTRINSIC_SPIRV(tex2Dfetch, 0, { const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); type res_vector_type = res_type; res_vector_type.rows = 4; const spv::Id res = add_instruction(spv::OpImageFetch, convert_type(res_vector_type)) .add(image) - .add(args[1].base) - .result; + .add(args[1].base); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) IMPLEMENT_INTRINSIC_SPIRV(tex2Dfetch, 1, { const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2588,14 +2919,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dfetch, 1, { .add(image) .add(args[1].base) .add(spv::ImageOperandsLodMask) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2605,14 +2934,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dfetch, 2, { const spv::Id res = add_instruction(spv::OpImageRead, convert_type(res_vector_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2653,8 +2980,8 @@ IMPLEMENT_INTRINSIC_HLSL(tex3Dfetch, 0, { // SM3 does not have a fetch intrinsic, so emulate it by transforming coordinates into texture space ones // Also add a half-pixel offset to align texels with pixels // (coords + 0.5) / size - code += "tex3Dlod(" + id_to_name(args[0].base) + ".s, float4((" + - id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize, 0))"; + code += "tex3Dlod(" + id_to_name(args[0].base) + ".s, float4(" + "(" + id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize, 0))"; if (res_type.rows == 1) code += ".x"; } @@ -2663,9 +2990,8 @@ IMPLEMENT_INTRINSIC_HLSL(tex3Dfetch, 1, { if (_shader_model >= 40) code += id_to_name(args[0].base) + ".t.Load(int4(" + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + "))"; else { - code += "tex3Dlod(" + id_to_name(args[0].base) + ".s, float4((" + - id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize * exp2(" + id_to_name(args[2].base) + "), " + - id_to_name(args[2].base) + "))"; + code += "tex3Dlod(" + id_to_name(args[0].base) + ".s, float4(" + "(" + id_to_name(args[1].base) + " + 0.5) * " + id_to_name(args[0].base) + ".pixelsize * exp2(" + id_to_name(args[2].base) + "), " + id_to_name(args[2].base) + "))"; if (res_type.rows == 1) code += ".x"; } @@ -2675,30 +3001,26 @@ IMPLEMENT_INTRINSIC_HLSL(tex3Dfetch, 2, { }) IMPLEMENT_INTRINSIC_SPIRV(tex3Dfetch, 0, { const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); type res_vector_type = res_type; res_vector_type.rows = 4; const spv::Id res = add_instruction(spv::OpImageFetch, convert_type(res_vector_type)) .add(image) - .add(args[1].base) - .result; + .add(args[1].base); if (res_type.rows == 1) // Collapse last argument from a 4-component vector return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) IMPLEMENT_INTRINSIC_SPIRV(tex3Dfetch, 1, { const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); type res_vector_type = res_type; res_vector_type.rows = 4; @@ -2707,14 +3029,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dfetch, 1, { .add(image) .add(args[1].base) .add(spv::ImageOperandsLodMask) - .add(args[2].base) - .result; + .add(args[2].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2724,14 +3044,12 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dfetch, 2, { const spv::Id res = add_instruction(spv::OpImageRead, convert_type(res_vector_type)) .add(args[0].base) - .add(args[1].base) - .result; + .add(args[1].base); if (res_type.rows == 1) return add_instruction(spv::OpCompositeExtract, convert_type(res_type)) .add(res) - .add(0u) - .result; + .add(0u); else return res; }) @@ -2759,35 +3077,45 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherR, 0, { const std::string s = id_to_name(args[0].base); if (_shader_model >= 50) code += s + ".t.GatherRed(" + s + ".s, " + id_to_name(args[1].base) + ')'; - else if (_shader_model >= 40) // Emulate texture gather intrinsic by sampling each location separately (SM41 has 'Gather', but that only works on single component texture formats) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 1))." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 1))." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 0))." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 0))." + 'r' + ')'; + else if (_shader_model >= 40) + // Emulate texture gather intrinsic by fetching each location separately (SM41 has 'Gather', but that only works on single component texture formats) + // See also https://www.reedbeta.com/blog/texture-gathers-and-coordinate-precision/ + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1)).r, " + + s + ".t.Load(_location, int2(1, 1)).r, " + + s + ".t.Load(_location, int2(1, 0)).r, " + + s + ".t.Load(_location, int2(0, 0)).r); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0))." + 'r' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0)).r)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherR, 1, { const std::string s = id_to_name(args[0].base); if (_shader_model >= 50) code += s + ".t.GatherRed(" + s + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 1))." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 1))." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 0))." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 0))." + 'r' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").r, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[2].base) + ").r, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[2].base) + ").r, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[2].base) + ").r); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0))." + 'r' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0)).r)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherR, 2, { const std::string s = id_to_name(args[0].base); @@ -2798,17 +3126,21 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherR, 2, { id_to_name(args[4].base) + " - int2(1, 0), " + id_to_name(args[5].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + ")." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[3].base) + ")." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[4].base) + ")." + 'r' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[5].base) + ")." + 'r' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").r, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[3].base) + ").r, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[4].base) + ").r, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[5].base) + ").r); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0))." + 'r' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0))." + 'r' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0)).r, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0)).r)"; }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 0, { const spv::Id comp = emit_constant(0u); @@ -2818,8 +3150,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 0, { .add(args[0].base) .add(args[1].base) .add(comp) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 1, { if (!args[2].is_constant) @@ -2833,8 +3164,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 1, { .add(args[1].base) .add(comp) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 2, { add_capability(spv::CapabilityImageGatherExtended); @@ -2844,8 +3174,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 2, { .add(args[2].base) .add(args[3].base) .add(args[4].base) - .add(args[5].base) - .result; + .add(args[5].base); return add_instruction(spv::OpImageGather, convert_type(res_type)) @@ -2853,8 +3182,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherR, 2, { .add(args[1].base) .add(comp) .add(spv::ImageOperandsConstOffsetsMask) - .add(offsets) - .result; + .add(offsets); }) // ret tex2DgatherG(s, coords) // ret tex2DgatherG(s, coords, offset) @@ -2880,34 +3208,42 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherG, 0, { if (_shader_model >= 50) code += s + ".t.GatherGreen(" + id_to_name(args[0].base) + ".s, " + id_to_name(args[1].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 1))." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 1))." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 0))." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 0))." + 'g' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1)).g, " + + s + ".t.Load(_location, int2(1, 1)).g, " + + s + ".t.Load(_location, int2(1, 0)).g, " + + s + ".t.Load(_location, int2(0, 0)).g); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0))." + 'g' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0)).g)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherG, 1, { const std::string s = id_to_name(args[0].base); if (_shader_model >= 50) code += s + ".t.GatherGreen(" + s + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 1))." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 1))." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 0))." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 0))." + 'g' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").g, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[2].base) + ").g, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[2].base) + ").g, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[2].base) + ").g); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0))." + 'g' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0)).g)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherG, 2, { const std::string s = id_to_name(args[0].base); @@ -2918,17 +3254,21 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherG, 2, { id_to_name(args[4].base) + " - int2(1, 0), " + id_to_name(args[5].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + ")." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[3].base) + ")." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[4].base) + ")." + 'g' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[5].base) + ")." + 'g' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").g, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[3].base) + ").g, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[4].base) + ").g, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[5].base) + ").g); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0))." + 'g' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0))." + 'g' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0)).g, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0)).g)"; }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 0, { const spv::Id comp = emit_constant(1u); @@ -2938,8 +3278,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 0, { .add(args[0].base) .add(args[1].base) .add(comp) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 1, { if (!args[2].is_constant) @@ -2953,8 +3292,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 1, { .add(args[1].base) .add(comp) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 2, { add_capability(spv::CapabilityImageGatherExtended); @@ -2964,8 +3302,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 2, { .add(args[2].base) .add(args[3].base) .add(args[4].base) - .add(args[5].base) - .result; + .add(args[5].base); return add_instruction(spv::OpImageGather, convert_type(res_type)) @@ -2973,8 +3310,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherG, 2, { .add(args[1].base) .add(comp) .add(spv::ImageOperandsConstOffsetsMask) - .add(offsets) - .result; + .add(offsets); }) // ret tex2DgatherB(s, coords) // ret tex2DgatherB(s, coords, offset) @@ -3000,34 +3336,42 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherB, 0, { if (_shader_model >= 50) code += s + ".t.GatherBlue(" + s + ".s, " + id_to_name(args[1].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 1))." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 1))." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 0))." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 0))." + 'b' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1)).b, " + + s + ".t.Load(_location, int2(1, 1)).b, " + + s + ".t.Load(_location, int2(1, 0)).b, " + + s + ".t.Load(_location, int2(0, 0)).b); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0))." + 'b' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0)).b)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherB, 1, { const std::string s = id_to_name(args[0].base); if (_shader_model >= 50) code += s + ".t.GatherBlue(" + s + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 1))." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 1))." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 0))." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 0))." + 'b' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").b, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[2].base) + ").b, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[2].base) + ").b, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[2].base) + ").b); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0))." + 'b' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0)).b)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherB, 2, { const std::string s = id_to_name(args[0].base); @@ -3038,17 +3382,21 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherB, 2, { id_to_name(args[4].base) + " - int2(1, 0), " + id_to_name(args[5].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + ")." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[3].base) + ")." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[4].base) + ")." + 'b' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[5].base) + ")." + 'b' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").b, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[3].base) + ").b, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[4].base) + ").b, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[5].base) + ").b); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0))." + 'b' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0))." + 'b' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0)).b, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0)).b)"; }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 0, { const spv::Id comp = emit_constant(2u); @@ -3058,8 +3406,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 0, { .add(args[0].base) .add(args[1].base) .add(comp) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 1, { if (!args[2].is_constant) @@ -3073,8 +3420,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 1, { .add(args[1].base) .add(comp) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 2, { add_capability(spv::CapabilityImageGatherExtended); @@ -3084,8 +3430,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 2, { .add(args[2].base) .add(args[3].base) .add(args[4].base) - .add(args[5].base) - .result; + .add(args[5].base); return add_instruction(spv::OpImageGather, convert_type(res_type)) @@ -3093,8 +3438,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherB, 2, { .add(args[1].base) .add(comp) .add(spv::ImageOperandsConstOffsetsMask) - .add(offsets) - .result; + .add(offsets); }) // ret tex2DgatherA(s, coords) // ret tex2DgatherA(s, coords, offset) @@ -3120,34 +3464,42 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherA, 0, { if (_shader_model >= 50) code += s + ".t.GatherAlpha(" + s + ".s, " + id_to_name(args[1].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 1))." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 1))." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(1, 0))." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, int2(0, 0))." + 'a' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1)).a, " + + s + ".t.Load(_location, int2(1, 1)).a, " + + s + ".t.Load(_location, int2(1, 0)).a, " + + s + ".t.Load(_location, int2(0, 0)).a); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0))." + 'a' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 1) * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 1) * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(1, 0) * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + float2(0, 0) * " + s + ".pixelsize, 0, 0)).a)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherA, 1, { const std::string s = id_to_name(args[0].base); if (_shader_model >= 50) code += s + ".t.GatherAlpha(" + s + ".s, " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 1))." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 1))." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(1, 0))." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + " + int2(0, 0))." + 'a' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").a, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[2].base) + ").a, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[2].base) + ").a, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[2].base) + ").a); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0))." + 'a' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 1)) * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 1)) * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(1, 0)) * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + " + float2(0, 0)) * " + s + ".pixelsize, 0, 0)).a)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2DgatherA, 2, { const std::string s = id_to_name(args[0].base); @@ -3158,17 +3510,21 @@ IMPLEMENT_INTRINSIC_HLSL(tex2DgatherA, 2, { id_to_name(args[4].base) + " - int2(1, 0), " + id_to_name(args[5].base) + ')'; else if (_shader_model >= 40) - code += "float4(" + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[2].base) + ")." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[3].base) + ")." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[4].base) + ")." + 'a' + ", " + - s + ".t.SampleLevel(" + s + ".s, " + id_to_name(args[1].base) + ", 0, " + id_to_name(args[5].base) + ")." + 'a' + ')'; + code += "0; { " + "float2 _dimensions; " + + id_to_name(args[0].base) + ".t.GetDimensions(_dimensions.x, _dimensions.y); " + "int3 _location = int3(" + id_to_name(args[1].base) + " * _dimensions - 0.5 + 1.0 / 512.0, 0); " + + id_to_name(res) + " = float4(" + + s + ".t.Load(_location, int2(0, 1) + " + id_to_name(args[2].base) + ").a, " + + s + ".t.Load(_location, int2(1, 1) + " + id_to_name(args[3].base) + ").a, " + + s + ".t.Load(_location, int2(1, 0) + " + id_to_name(args[4].base) + ").a, " + + s + ".t.Load(_location, int2(0, 0) + " + id_to_name(args[5].base) + ").a); }"; else code += "float4(" - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0))." + 'a' + ", " - "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0))." + 'a' + ')'; + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[2].base) + ") * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[3].base) + ") * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[4].base) + ") * " + s + ".pixelsize, 0, 0)).a, " + "tex2Dlod(" + s + ".s, float4(" + id_to_name(args[1].base) + " + (" + id_to_name(args[5].base) + ") * " + s + ".pixelsize, 0, 0)).a)"; }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 0, { const spv::Id comp = emit_constant(3u); @@ -3178,8 +3534,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 0, { .add(args[0].base) .add(args[1].base) .add(comp) - .add(spv::ImageOperandsMaskNone) - .result; + .add(spv::ImageOperandsMaskNone); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 1, { if (!args[2].is_constant) @@ -3193,8 +3548,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 1, { .add(args[1].base) .add(comp) .add(args[2].is_constant ? spv::ImageOperandsConstOffsetMask : spv::ImageOperandsOffsetMask) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 2, { add_capability(spv::CapabilityImageGatherExtended); @@ -3204,8 +3558,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 2, { .add(args[2].base) .add(args[3].base) .add(args[4].base) - .add(args[5].base) - .result; + .add(args[5].base); return add_instruction(spv::OpImageGather, convert_type(res_type)) @@ -3213,8 +3566,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2DgatherA, 2, { .add(args[1].base) .add(comp) .add(spv::ImageOperandsConstOffsetsMask) - .add(offsets) - .result; + .add(offsets); }) // tex1Dstore(s, coords, value) @@ -3223,17 +3575,17 @@ DEFINE_INTRINSIC(tex1Dstore, 0, void, storage1d_uint, int, uint) DEFINE_INTRINSIC(tex1Dstore, 0, void, storage1d_float, int, float) DEFINE_INTRINSIC(tex1Dstore, 0, void, storage1d_float4, int, float4) IMPLEMENT_INTRINSIC_GLSL(tex1Dstore, 0, { - code += "imageStore(" + id_to_name(args[0].base) + ", " + - id_to_name(args[1].base) + ", " + - id_to_name(args[2].base); + code += "imageStore(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", "; if (args[2].type.rows == 1) - code += ".xxxx"; // Expand last argument to a 4-component vector + code += '('; + code += id_to_name(args[2].base); + if (args[2].type.rows == 1) + code += ").xxxx"; // Expand last argument to a 4-component vector code += ')'; }) IMPLEMENT_INTRINSIC_HLSL(tex1Dstore, 0, { - if (_shader_model >= 50) { + if (_shader_model >= 50) code += id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + "] = " + id_to_name(args[2].base); - } }) IMPLEMENT_INTRINSIC_SPIRV(tex1Dstore, 0, { spv::Id data = args[2].base; @@ -3247,8 +3599,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dstore, 0, { .add(data) .add(data) .add(data) - .add(data) - .result; + .add(data); } add_instruction_without_result(spv::OpImageWrite) @@ -3264,11 +3615,12 @@ DEFINE_INTRINSIC(tex2Dstore, 0, void, storage2d_uint, int2, uint) DEFINE_INTRINSIC(tex2Dstore, 0, void, storage2d_float, int2, float) DEFINE_INTRINSIC(tex2Dstore, 0, void, storage2d_float4, int2, float4) IMPLEMENT_INTRINSIC_GLSL(tex2Dstore, 0, { - code += "imageStore(" + id_to_name(args[0].base) + ", " + - id_to_name(args[1].base) + ", " + - id_to_name(args[2].base); + code += "imageStore(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", "; if (args[2].type.rows == 1) - code += ".xxxx"; // Expand last argument to a 4-component vector + code += '('; + code += id_to_name(args[2].base); + if (args[2].type.rows == 1) + code += ").xxxx"; // Expand last argument to a 4-component vector code += ')'; }) IMPLEMENT_INTRINSIC_HLSL(tex2Dstore, 0, { @@ -3287,8 +3639,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dstore, 0, { .add(data) .add(data) .add(data) - .add(data) - .result; + .add(data); } add_instruction_without_result(spv::OpImageWrite) @@ -3304,15 +3655,17 @@ DEFINE_INTRINSIC(tex3Dstore, 0, void, storage3d_uint, int3, uint) DEFINE_INTRINSIC(tex3Dstore, 0, void, storage3d_float, int3, float) DEFINE_INTRINSIC(tex3Dstore, 0, void, storage3d_float4, int3, float4) IMPLEMENT_INTRINSIC_GLSL(tex3Dstore, 0, { - code += "imageStore(" + id_to_name(args[0].base) + ", " + - id_to_name(args[1].base) + ", " + - id_to_name(args[2].base); + code += "imageStore(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", "; if (args[2].type.rows == 1) - code += ".xxxx"; // Expand last argument to a 4-component vector + code += '('; + code += id_to_name(args[2].base); + if (args[2].type.rows == 1) + code += ").xxxx"; // Expand last argument to a 4-component vector code += ')'; }) IMPLEMENT_INTRINSIC_HLSL(tex3Dstore, 0, { - code += id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + "] = " + id_to_name(args[2].base); + if (_shader_model >= 50) + code += id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + "] = " + id_to_name(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex3Dstore, 0, { spv::Id data = args[2].base; @@ -3326,8 +3679,7 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dstore, 0, { .add(data) .add(data) .add(data) - .add(data) - .result; + .add(data); } add_instruction_without_result(spv::OpImageWrite) @@ -3362,20 +3714,20 @@ IMPLEMENT_INTRINSIC_GLSL(tex1Dsize, 2, { }) IMPLEMENT_INTRINSIC_HLSL(tex1Dsize, 0, { if (_shader_model >= 40) - code += id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(res) + ')'; + code += "0; " + id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(res) + ')'; else code += "int(1.0 / " + id_to_name(args[0].base) + ".pixelsize)"; }) IMPLEMENT_INTRINSIC_HLSL(tex1Dsize, 1, { if (_shader_model >= 40) - code += "uint temp" + std::to_string(res) + "; " + // Don't need the number of levels out value, so route that to a dummy variable - id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(args[1].base) + ", " + id_to_name(res) + ", temp" + std::to_string(res) + ')'; + code += "0; { uint _levels; " + // Don't need the number of levels out value, so route that to a dummy variable + id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(args[1].base) + ", " + id_to_name(res) + ", _levels); }"; else code += "int(1.0 / " + id_to_name(args[0].base) + ".pixelsize) / exp2(" + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(tex1Dsize, 2, { if (_shader_model >= 50) - code += id_to_name(args[0].base) + ".GetDimensions(" + id_to_name(res) + ')'; + code += "0; " + id_to_name(args[0].base) + ".GetDimensions(" + id_to_name(res) + ')'; else code += "0"; // Only supported on SM5+ }) @@ -3383,36 +3735,31 @@ IMPLEMENT_INTRINSIC_SPIRV(tex1Dsize, 0, { add_capability(spv::CapabilityImageQuery); const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); const spv::Id level = emit_constant(0u); return add_instruction(spv::OpImageQuerySizeLod, convert_type(res_type)) .add(image) - .add(level) - .result; + .add(level); }) IMPLEMENT_INTRINSIC_SPIRV(tex1Dsize, 1, { add_capability(spv::CapabilityImageQuery); const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); return add_instruction(spv::OpImageQuerySizeLod, convert_type(res_type)) .add(image) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex1Dsize, 2, { add_capability(spv::CapabilityImageQuery); return add_instruction(spv::OpImageQuerySize, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret tex2Dsize(s) @@ -3440,20 +3787,20 @@ IMPLEMENT_INTRINSIC_GLSL(tex2Dsize, 2, { }) IMPLEMENT_INTRINSIC_HLSL(tex2Dsize, 0, { if (_shader_model >= 40) - code += id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y)"; + code += "0; " + id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y)"; else code += "int2(1.0 / " + id_to_name(args[0].base) + ".pixelsize)"; }) IMPLEMENT_INTRINSIC_HLSL(tex2Dsize, 1, { if (_shader_model >= 40) - code += "uint temp" + std::to_string(res) + "; " + // Don't need the number of levels out value, so route that to a dummy variable - id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(args[1].base) + ", " + id_to_name(res) + ".x, " + id_to_name(res) + ".y, temp" + std::to_string(res) + ')'; + code += "0; { uint _levels; " + // Don't need the number of levels out value, so route that to a dummy variable + id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(args[1].base) + ", " + id_to_name(res) + ".x, " + id_to_name(res) + ".y, _levels); }"; else code += "int2(1.0 / " + id_to_name(args[0].base) + ".pixelsize) / exp2(" + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(tex2Dsize, 2, { if (_shader_model >= 50) - code += id_to_name(args[0].base) + ".GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y)"; + code += "0; " + id_to_name(args[0].base) + ".GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y)"; else code += "int2(0, 0)"; // Only supported on SM5+ }) @@ -3461,36 +3808,31 @@ IMPLEMENT_INTRINSIC_SPIRV(tex2Dsize, 0, { add_capability(spv::CapabilityImageQuery); const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); const spv::Id level = emit_constant(0u); return add_instruction(spv::OpImageQuerySizeLod, convert_type(res_type)) .add(image) - .add(level) - .result; + .add(level); }) IMPLEMENT_INTRINSIC_SPIRV(tex2Dsize, 1, { add_capability(spv::CapabilityImageQuery); const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); return add_instruction(spv::OpImageQuerySizeLod, convert_type(res_type)) .add(image) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex2Dsize, 2, { add_capability(spv::CapabilityImageQuery); return add_instruction(spv::OpImageQuerySize, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // ret tex3Dsize(s) @@ -3518,20 +3860,20 @@ IMPLEMENT_INTRINSIC_GLSL(tex3Dsize, 2, { }) IMPLEMENT_INTRINSIC_HLSL(tex3Dsize, 0, { if (_shader_model >= 40) - code += id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y, " + id_to_name(res) + ".z)"; + code += "0; " + id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y, " + id_to_name(res) + ".z)"; else code += "int3(1.0 / " + id_to_name(args[0].base) + ".pixelsize)"; }) IMPLEMENT_INTRINSIC_HLSL(tex3Dsize, 1, { if (_shader_model >= 40) - code += "uint temp" + std::to_string(res) + "; " + // Don't need the number of levels out value, so route that to a dummy variable - id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(args[1].base) + ", " + id_to_name(res) + ".x, " + id_to_name(res) + ".y, " + id_to_name(res) + ".z, temp" + std::to_string(res) + ')'; + code += "0; { uint _levels; " + // Don't need the number of levels out value, so route that to a dummy variable + id_to_name(args[0].base) + ".t.GetDimensions(" + id_to_name(args[1].base) + ", " + id_to_name(res) + ".x, " + id_to_name(res) + ".y, " + id_to_name(res) + ".z, _levels); }"; else code += "int3(1.0 / " + id_to_name(args[0].base) + ".pixelsize) / exp2(" + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(tex3Dsize, 2, { if (_shader_model >= 50) - code += id_to_name(args[0].base) + ".GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y, " + id_to_name(res) + ".z)"; + code += "0; " + id_to_name(args[0].base) + ".GetDimensions(" + id_to_name(res) + ".x, " + id_to_name(res) + ".y, " + id_to_name(res) + ".z)"; else code += "int3(0, 0, 0)"; // Only supported on SM5+ }) @@ -3539,36 +3881,31 @@ IMPLEMENT_INTRINSIC_SPIRV(tex3Dsize, 0, { add_capability(spv::CapabilityImageQuery); const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); const spv::Id level = emit_constant(0u); return add_instruction(spv::OpImageQuerySizeLod, convert_type(res_type)) .add(image) - .add(level) - .result; + .add(level); }) IMPLEMENT_INTRINSIC_SPIRV(tex3Dsize, 1, { add_capability(spv::CapabilityImageQuery); const spv::Id image = add_instruction(spv::OpImage, convert_image_type(args[0].type)) - .add(args[0].base) - .result; + .add(args[0].base); return add_instruction(spv::OpImageQuerySizeLod, convert_type(res_type)) .add(image) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(tex3Dsize, 2, { add_capability(spv::CapabilityImageQuery); return add_instruction(spv::OpImageQuerySize, convert_type(res_type)) - .add(args[0].base) - .result; + .add(args[0].base); }) // barrier() @@ -3636,9 +3973,9 @@ IMPLEMENT_INTRINSIC_GLSL(atomicAdd, 0, { }) IMPLEMENT_INTRINSIC_HLSL(atomicAdd, 0, { if (_shader_model >= 50) - code += "InterlockedAdd(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedAdd(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " += " + id_to_name(args[1].base); + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " += " + id_to_name(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicAdd, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3649,8 +3986,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicAdd, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicAdd(s, coords, data) DEFINE_INTRINSIC(atomicAdd, 1, int, inout_storage1d_int, int, int) @@ -3664,7 +4000,7 @@ IMPLEMENT_INTRINSIC_GLSL(atomicAdd, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicAdd, 1, { if (_shader_model >= 50) - code += "InterlockedAdd(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedAdd(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicAdd, 1, { const spv::Id ms_sample = emit_constant(0u); @@ -3672,8 +4008,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicAdd, 1, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -3682,8 +4017,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicAdd, 1, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicAnd(inout mem, data) @@ -3694,9 +4028,9 @@ IMPLEMENT_INTRINSIC_GLSL(atomicAnd, 0, { }) IMPLEMENT_INTRINSIC_HLSL(atomicAnd, 0, { if (_shader_model >= 50) - code += "InterlockedAnd(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedAnd(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " &= " + id_to_name(args[1].base); + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " &= " + id_to_name(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicAnd, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3707,8 +4041,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicAnd, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicAnd(s, coords, data) DEFINE_INTRINSIC(atomicAnd, 1, int, inout_storage1d_int, int, int) @@ -3730,8 +4063,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicAnd, 1, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -3740,8 +4072,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicAnd, 1, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicOr(inout mem, data) @@ -3752,9 +4083,9 @@ IMPLEMENT_INTRINSIC_GLSL(atomicOr, 0, { }) IMPLEMENT_INTRINSIC_HLSL(atomicOr, 0, { if (_shader_model >= 50) - code += "InterlockedOr(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedOr(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " |= " + id_to_name(args[1].base); + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " |= " + id_to_name(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicOr, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3765,8 +4096,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicOr, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicOr(s, coords, data) DEFINE_INTRINSIC(atomicOr, 1, int, inout_storage1d_int, int, int) @@ -3780,7 +4110,7 @@ IMPLEMENT_INTRINSIC_GLSL(atomicOr, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicOr, 1, { if (_shader_model >= 50) - code += "InterlockedOr(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedOr(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicOr, 1, { const spv::Id ms_sample = emit_constant(0u); @@ -3788,8 +4118,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicOr, 1, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -3798,8 +4127,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicOr, 1, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicXor(inout mem, data) @@ -3810,9 +4138,9 @@ IMPLEMENT_INTRINSIC_GLSL(atomicXor, 0, { }) IMPLEMENT_INTRINSIC_HLSL(atomicXor, 0, { if (_shader_model >= 50) - code += "InterlockedXor(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedXor(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " ^= " + id_to_name(args[1].base); + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " ^= " + id_to_name(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicXor, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3823,8 +4151,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicXor, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicXor(s, coords, data) DEFINE_INTRINSIC(atomicXor, 1, int, inout_storage1d_int, int, int) @@ -3838,7 +4165,7 @@ IMPLEMENT_INTRINSIC_GLSL(atomicXor, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicXor, 1, { if (_shader_model >= 50) - code += "InterlockedXor(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedXor(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicXor, 1, { const spv::Id ms_sample = emit_constant(0u); @@ -3846,8 +4173,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicXor, 1, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -3856,8 +4182,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicXor, 1, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicMin(inout mem, data) @@ -3871,15 +4196,15 @@ IMPLEMENT_INTRINSIC_GLSL(atomicMin, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicMin, 0, { if (_shader_model >= 50) - code += "InterlockedMin(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMin(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = min(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = min(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(atomicMin, 1, { if (_shader_model >= 50) - code += "InterlockedMin(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMin(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = min(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = min(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3890,8 +4215,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 1, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3902,8 +4226,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 1, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicMin(s, coords, data) DEFINE_INTRINSIC(atomicMin, 2, int, inout_storage1d_int, int, int) @@ -3920,11 +4243,11 @@ IMPLEMENT_INTRINSIC_GLSL(atomicMin, 3, { }) IMPLEMENT_INTRINSIC_HLSL(atomicMin, 2, { if (_shader_model >= 50) - code += "InterlockedMin(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMin(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(atomicMin, 3, { if (_shader_model >= 50) - code += "InterlockedMin(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMin(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 2, { const spv::Id ms_sample = emit_constant(0u); @@ -3932,8 +4255,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 2, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -3942,8 +4264,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 2, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 3, { const spv::Id ms_sample = emit_constant(0u); @@ -3951,8 +4272,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 3, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -3961,8 +4281,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMin, 3, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicMax(inout mem, data) @@ -3976,15 +4295,15 @@ IMPLEMENT_INTRINSIC_GLSL(atomicMax, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicMax, 0, { if (_shader_model >= 50) - code += "InterlockedMax(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMax(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = max(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = max(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(atomicMax, 1, { if (_shader_model >= 50) - code += "InterlockedMax(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMax(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = max(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = max(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -3995,8 +4314,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 1, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -4007,8 +4325,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 1, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicMax(s, coords, data) DEFINE_INTRINSIC(atomicMax, 2, int, inout_storage1d_int, int, int) @@ -4025,11 +4342,11 @@ IMPLEMENT_INTRINSIC_GLSL(atomicMax, 3, { }) IMPLEMENT_INTRINSIC_HLSL(atomicMax, 2, { if (_shader_model >= 50) - code += "InterlockedMax(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMax(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_HLSL(atomicMax, 3, { if (_shader_model >= 50) - code += "InterlockedMax(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedMax(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 2, { const spv::Id ms_sample = emit_constant(0u); @@ -4037,8 +4354,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 2, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -4047,8 +4363,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 2, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 3, { const spv::Id ms_sample = emit_constant(0u); @@ -4056,8 +4371,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 3, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -4066,8 +4380,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicMax, 3, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicExchange(inout mem, data) @@ -4078,9 +4391,9 @@ IMPLEMENT_INTRINSIC_GLSL(atomicExchange, 0, { }) IMPLEMENT_INTRINSIC_HLSL(atomicExchange, 0, { if (_shader_model >= 50) - code += "InterlockedExchange(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedExchange(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = " + id_to_name(args[1].base); + code += id_to_name(args[0].base) + "; " + id_to_name(args[0].base) + " = " + id_to_name(args[1].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicExchange, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -4091,8 +4404,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicExchange, 0, { .add(args[0].base) .add(mem_scope) .add(mem_semantics) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicExchange(s, coords, data) DEFINE_INTRINSIC(atomicExchange, 1, int, inout_storage1d_int, int, int) @@ -4106,7 +4418,7 @@ IMPLEMENT_INTRINSIC_GLSL(atomicExchange, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicExchange, 1, { if (_shader_model >= 50) - code += "InterlockedExchange(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedExchange(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicExchange, 1, { const spv::Id ms_sample = emit_constant(0u); @@ -4114,8 +4426,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicExchange, 1, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -4124,8 +4435,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicExchange, 1, { .add(texel) .add(mem_scope) .add(mem_semantics) - .add(args[2].base) - .result; + .add(args[2].base); }) // ret atomicCompareExchange(inout mem, compare, data) @@ -4136,9 +4446,9 @@ IMPLEMENT_INTRINSIC_GLSL(atomicCompareExchange, 0, { }) IMPLEMENT_INTRINSIC_HLSL(atomicCompareExchange, 0, { if (_shader_model >= 50) - code += "InterlockedCompareExchange(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedCompareExchange(" + id_to_name(args[0].base) + ", " + id_to_name(args[1].base) + ", " + id_to_name(args[2].base) + ", " + id_to_name(res) + ')'; else - code += id_to_name(res) + " = " + id_to_name(args[0].base) + "; if (" + id_to_name(args[0].base) + " == " + id_to_name(args[1].base) + ") " + id_to_name(args[0].base) + " = " + id_to_name(args[2].base); + code += id_to_name(args[0].base) + "; if (" + id_to_name(args[0].base) + " == " + id_to_name(args[1].base) + ") " + id_to_name(args[0].base) + " = " + id_to_name(args[2].base); }) IMPLEMENT_INTRINSIC_SPIRV(atomicCompareExchange, 0, { const spv::Id mem_scope = emit_constant(spv::ScopeDevice); @@ -4151,8 +4461,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicCompareExchange, 0, { .add(mem_semantics) .add(mem_semantics) .add(args[2].base) - .add(args[1].base) - .result; + .add(args[1].base); }) // ret atomicCompareExchange(s, coords, compare, data) DEFINE_INTRINSIC(atomicCompareExchange, 1, int, inout_storage1d_int, int, int, int) @@ -4166,7 +4475,7 @@ IMPLEMENT_INTRINSIC_GLSL(atomicCompareExchange, 1, { }) IMPLEMENT_INTRINSIC_HLSL(atomicCompareExchange, 1, { if (_shader_model >= 50) - code += "InterlockedCompareExchange(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(res) + ')'; + code += "0; InterlockedCompareExchange(" + id_to_name(args[0].base) + '[' + id_to_name(args[1].base) + ']' + ", " + id_to_name(args[2].base) + ", " + id_to_name(args[3].base) + ", " + id_to_name(res) + ')'; }) IMPLEMENT_INTRINSIC_SPIRV(atomicCompareExchange, 1, { const spv::Id ms_sample = emit_constant(0u); @@ -4174,8 +4483,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicCompareExchange, 1, { const spv::Id texel = add_instruction(spv::OpImageTexelPointer, convert_type(res_type, true, spv::StorageClassImage)) .add(args[0].base) .add(args[1].base) - .add(ms_sample) - .result; + .add(ms_sample); const spv::Id mem_scope = emit_constant(spv::ScopeDevice); const spv::Id mem_semantics = emit_constant(spv::MemorySemanticsMaskNone); @@ -4186,8 +4494,7 @@ IMPLEMENT_INTRINSIC_SPIRV(atomicCompareExchange, 1, { .add(mem_semantics) .add(mem_semantics) .add(args[3].base) - .add(args[2].base) - .result; + .add(args[2].base); }) #undef DEFINE_INTRINSIC diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 2fe13dda7..3e022d721 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -83,15 +83,16 @@ static std::unique_ptr CreateRFXCodegen() case RenderAPI::Metal: { return std::unique_ptr(reshadefx::create_codegen_glsl( - false, true, debug_info, uniforms_to_spec_constants, false, (rapi == RenderAPI::Vulkan))); + 460, false, true, debug_info, uniforms_to_spec_constants, false, (rapi == RenderAPI::Vulkan))); } case RenderAPI::OpenGL: case RenderAPI::OpenGLES: default: { - return std::unique_ptr(reshadefx::create_codegen_glsl( - (rapi == RenderAPI::OpenGLES), false, debug_info, uniforms_to_spec_constants, false, true)); + return std::unique_ptr( + reshadefx::create_codegen_glsl(ShaderGen::GetGLSLVersion(rapi), (rapi == RenderAPI::OpenGLES), false, + debug_info, uniforms_to_spec_constants, false, true)); } } } @@ -120,7 +121,7 @@ static GPUTexture::Format MapTextureFormat(reshadefx::texture_format format) return s_mapping[static_cast(format)]; } -static GPUSampler::Config MapSampler(const reshadefx::sampler_info& si) +static GPUSampler::Config MapSampler(const reshadefx::sampler_desc& si) { GPUSampler::Config config = GPUSampler::GetNearestConfig(); @@ -200,44 +201,44 @@ static GPUSampler::Config MapSampler(const reshadefx::sampler_info& si) return config; } -static GPUPipeline::BlendState MapBlendState(const reshadefx::pass_info& pi) +static GPUPipeline::BlendState MapBlendState(const reshadefx::pass& pi) { - static constexpr auto map_blend_op = [](const reshadefx::pass_blend_op o) { + static constexpr auto map_blend_op = [](const reshadefx::blend_op o) { switch (o) { - case reshadefx::pass_blend_op::add: + case reshadefx::blend_op::add: return GPUPipeline::BlendOp::Add; - case reshadefx::pass_blend_op::subtract: + case reshadefx::blend_op::subtract: return GPUPipeline::BlendOp::Subtract; - case reshadefx::pass_blend_op::reverse_subtract: + case reshadefx::blend_op::reverse_subtract: return GPUPipeline::BlendOp::ReverseSubtract; - case reshadefx::pass_blend_op::min: + case reshadefx::blend_op::min: return GPUPipeline::BlendOp::Min; - case reshadefx::pass_blend_op::max: + case reshadefx::blend_op::max: default: return GPUPipeline::BlendOp::Max; } }; - static constexpr auto map_blend_factor = [](const reshadefx::pass_blend_factor f) { + static constexpr auto map_blend_factor = [](const reshadefx::blend_factor f) { switch (f) { - case reshadefx::pass_blend_factor::zero: + case reshadefx::blend_factor::zero: return GPUPipeline::BlendFunc::Zero; - case reshadefx::pass_blend_factor::one: + case reshadefx::blend_factor::one: return GPUPipeline::BlendFunc::One; - case reshadefx::pass_blend_factor::source_color: + case reshadefx::blend_factor::source_color: return GPUPipeline::BlendFunc::SrcColor; - case reshadefx::pass_blend_factor::one_minus_source_color: + case reshadefx::blend_factor::one_minus_source_color: return GPUPipeline::BlendFunc::InvSrcColor; - case reshadefx::pass_blend_factor::dest_color: + case reshadefx::blend_factor::dest_color: return GPUPipeline::BlendFunc::DstColor; - case reshadefx::pass_blend_factor::one_minus_dest_color: + case reshadefx::blend_factor::one_minus_dest_color: return GPUPipeline::BlendFunc::InvDstColor; - case reshadefx::pass_blend_factor::source_alpha: + case reshadefx::blend_factor::source_alpha: return GPUPipeline::BlendFunc::SrcAlpha; - case reshadefx::pass_blend_factor::one_minus_source_alpha: + case reshadefx::blend_factor::one_minus_source_alpha: return GPUPipeline::BlendFunc::InvSrcAlpha; - case reshadefx::pass_blend_factor::dest_alpha: + case reshadefx::blend_factor::dest_alpha: default: return GPUPipeline::BlendFunc::DstAlpha; } @@ -245,13 +246,13 @@ static GPUPipeline::BlendState MapBlendState(const reshadefx::pass_info& pi) GPUPipeline::BlendState bs = GPUPipeline::BlendState::GetNoBlendingState(); bs.enable = (pi.blend_enable[0] != 0); - bs.blend_op = map_blend_op(pi.blend_op[0]); - bs.src_blend = map_blend_factor(pi.src_blend[0]); - bs.dst_blend = map_blend_factor(pi.dest_blend[0]); - bs.alpha_blend_op = map_blend_op(pi.blend_op_alpha[0]); - bs.src_alpha_blend = map_blend_factor(pi.src_blend_alpha[0]); - bs.dst_alpha_blend = map_blend_factor(pi.dest_blend_alpha[0]); - bs.write_mask = pi.color_write_mask[0]; + bs.blend_op = map_blend_op(pi.color_blend_op[0]); + bs.src_blend = map_blend_factor(pi.source_color_blend_factor[0]); + bs.dst_blend = map_blend_factor(pi.dest_color_blend_factor[0]); + bs.alpha_blend_op = map_blend_op(pi.alpha_blend_op[0]); + bs.src_alpha_blend = map_blend_factor(pi.source_alpha_blend_factor[0]); + bs.dst_alpha_blend = map_blend_factor(pi.dest_alpha_blend_factor[0]); + bs.write_mask = pi.render_target_write_mask[0]; return bs; } @@ -306,14 +307,19 @@ bool PostProcessing::ReShadeFXShader::LoadFromString(std::string name, std::stri if (code.empty() || code.back() != '\n') code.push_back('\n'); - reshadefx::module temp_module; + // TODO: This could use spv, it's probably fastest. + std::unique_ptr cg = CreateRFXCodegen(); + if (!cg) + return false; + if (!CreateModule(only_config ? DEFAULT_BUFFER_WIDTH : g_gpu_device->GetWindowWidth(), - only_config ? DEFAULT_BUFFER_HEIGHT : g_gpu_device->GetWindowHeight(), &temp_module, - std::move(code), error)) + only_config ? DEFAULT_BUFFER_HEIGHT : g_gpu_device->GetWindowHeight(), cg.get(), std::move(code), + error)) { return false; } + const reshadefx::effect_module& temp_module = cg->module(); if (!CreateOptions(temp_module, error)) return false; @@ -321,9 +327,9 @@ bool PostProcessing::ReShadeFXShader::LoadFromString(std::string name, std::stri if (!temp_module.techniques.empty()) { bool has_passes = false; - for (const reshadefx::technique_info& tech : temp_module.techniques) + for (const reshadefx::technique& tech : temp_module.techniques) { - for (const reshadefx::pass_info& pi : tech.passes) + for (const reshadefx::pass& pi : tech.passes) { has_passes = true; @@ -338,15 +344,15 @@ bool PostProcessing::ReShadeFXShader::LoadFromString(std::string name, std::stri if (max_rt > GPUDevice::MAX_RENDER_TARGETS) { - Error::SetString(error, fmt::format("Too many render targets ({}) in pass {}, only {} are supported.", max_rt, - pi.name, GPUDevice::MAX_RENDER_TARGETS)); + Error::SetStringFmt(error, "Too many render targets ({}) in pass {}, only {} are supported.", max_rt, pi.name, + GPUDevice::MAX_RENDER_TARGETS); return false; } - if (pi.samplers.size() > GPUDevice::MAX_TEXTURE_SAMPLERS) + if (pi.sampler_bindings.size() > GPUDevice::MAX_TEXTURE_SAMPLERS) { - Error::SetString(error, fmt::format("Too many samplers ({}) in pass {}, only {} are supported.", - pi.samplers.size(), pi.name, GPUDevice::MAX_TEXTURE_SAMPLERS)); + Error::SetStringFmt(error, "Too many samplers ({}) in pass {}, only {} are supported.", + pi.sampler_bindings.size(), pi.name, GPUDevice::MAX_TEXTURE_SAMPLERS); return false; } } @@ -373,7 +379,7 @@ bool PostProcessing::ReShadeFXShader::WantsDepthBuffer() const return m_wants_depth_buffer; } -bool PostProcessing::ReShadeFXShader::CreateModule(s32 buffer_width, s32 buffer_height, reshadefx::module* mod, +bool PostProcessing::ReShadeFXShader::CreateModule(s32 buffer_width, s32 buffer_height, reshadefx::codegen* cg, std::string code, Error* error) { reshadefx::preprocessor pp; @@ -427,26 +433,21 @@ bool PostProcessing::ReShadeFXShader::CreateModule(s32 buffer_width, s32 buffer_ if (!pp.append_string(std::move(code), m_filename)) { - Error::SetString(error, fmt::format("Failed to preprocess:\n{}", pp.errors())); + Error::SetStringFmt(error, "Failed to preprocess:\n{}", pp.errors()); return false; } - std::unique_ptr cg = CreateRFXCodegen(); - if (!cg) - return false; - reshadefx::parser parser; - if (!parser.parse(pp.output(), cg.get())) + if (!parser.parse(pp.output(), cg)) { - Error::SetString(error, fmt::format("Failed to parse:\n{}", parser.errors())); + Error::SetStringFmt(error, "Failed to parse:\n{}", parser.errors()); return false; } - cg->write_result(*mod); return true; } -static bool HasAnnotationWithName(const reshadefx::uniform_info& uniform, const std::string_view annotation_name) +static bool HasAnnotationWithName(const reshadefx::uniform& uniform, const std::string_view annotation_name) { for (const reshadefx::annotation& an : uniform.annotations) { @@ -493,7 +494,7 @@ static bool GetBooleanAnnotationValue(const std::vector& } static PostProcessing::ShaderOption::ValueVector -GetVectorAnnotationValue(const reshadefx::uniform_info& uniform, const std::string_view annotation_name, +GetVectorAnnotationValue(const reshadefx::uniform& uniform, const std::string_view annotation_name, const PostProcessing::ShaderOption::ValueVector& default_value) { PostProcessing::ShaderOption::ValueVector vv = default_value; @@ -575,9 +576,9 @@ GetVectorAnnotationValue(const reshadefx::uniform_info& uniform, const std::stri return vv; } -bool PostProcessing::ReShadeFXShader::CreateOptions(const reshadefx::module& mod, Error* error) +bool PostProcessing::ReShadeFXShader::CreateOptions(const reshadefx::effect_module& mod, Error* error) { - for (const reshadefx::uniform_info& ui : mod.uniforms) + for (const reshadefx::uniform& ui : mod.uniforms) { SourceOptionType so; if (!GetSourceOption(ui, &so, error)) @@ -639,7 +640,7 @@ bool PostProcessing::ReShadeFXShader::CreateOptions(const reshadefx::module& mod break; default: - Error::SetString(error, fmt::format("Unhandled uniform type {} ({})", static_cast(ui.type.base), ui.name)); + Error::SetStringFmt(error, "Unhandled uniform type {} ({})", static_cast(ui.type.base), ui.name); return false; } @@ -648,8 +649,7 @@ bool PostProcessing::ReShadeFXShader::CreateOptions(const reshadefx::module& mod opt.vector_size = ui.type.components(); if (opt.vector_size == 0 || opt.vector_size > ShaderOption::MAX_VECTOR_COMPONENTS) { - Error::SetString(error, - fmt::format("Unhandled vector size {} ({})", static_cast(ui.type.components()), ui.name)); + Error::SetStringFmt(error, "Unhandled vector size {} ({})", static_cast(ui.type.components()), ui.name); return false; } @@ -762,8 +762,7 @@ bool PostProcessing::ReShadeFXShader::CreateOptions(const reshadefx::module& mod return true; } -bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_info& ui, SourceOptionType* si, - Error* error) +bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform& ui, SourceOptionType* si, Error* error) { // TODO: Rewrite these to a lookup table instead, this if chain is terrible. const std::string_view source = GetStringAnnotationValue(ui.annotations, "source", {}); @@ -773,8 +772,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (ui.type.base != reshadefx::type::t_float || ui.type.components() > 1) { - Error::SetString( - error, fmt::format("Unexpected type '{}' for timer source in uniform '{}'", ui.type.description(), ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for timer source in uniform '{}'", ui.type.description(), + ui.name); return false; } @@ -785,8 +784,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if ((!ui.type.is_integral() && !ui.type.is_floating_point()) || ui.type.components() > 1) { - Error::SetString( - error, fmt::format("Unexpected type '{}' for timer source in uniform '{}'", ui.type.description(), ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for timer source in uniform '{}'", ui.type.description(), + ui.name); return false; } @@ -797,8 +796,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (ui.type.base != reshadefx::type::t_float || ui.type.components() > 1) { - Error::SetString( - error, fmt::format("Unexpected type '{}' for timer source in uniform '{}'", ui.type.description(), ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for timer source in uniform '{}'", ui.type.description(), + ui.name); return false; } @@ -809,8 +808,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() < 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for pingpong source in uniform '{}'", - ui.type.description(), ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for pingpong source in uniform '{}'", ui.type.description(), + ui.name); return false; } @@ -821,8 +820,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() < 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for mousepoint source in uniform '{}'", - ui.type.description(), ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for mousepoint source in uniform '{}'", ui.type.description(), + ui.name); return false; } @@ -839,8 +838,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if ((!ui.type.is_floating_point() && !ui.type.is_integral()) || ui.type.components() != 1) { - Error::SetString(error, fmt::format("Unexpected type '{}' ({} components) for random source in uniform '{}'", - ui.type.description(), ui.type.components(), ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' ({} components) for random source in uniform '{}'", + ui.type.description(), ui.type.components(), ui.name); return false; } @@ -896,8 +895,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 1) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -908,8 +907,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 1) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -920,8 +919,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 1) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -932,8 +931,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 1) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -944,8 +943,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 1) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -956,8 +955,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -968,8 +967,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -980,8 +979,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -992,8 +991,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -1004,8 +1003,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -1016,8 +1015,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -1028,8 +1027,8 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i { if (!ui.type.is_floating_point() || ui.type.components() != 2) { - Error::SetString(error, fmt::format("Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), - source, ui.name)); + Error::SetStringFmt(error, "Unexpected type '{}' for {} source in uniform '{}'", ui.type.description(), source, + ui.name); return false; } @@ -1038,7 +1037,7 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i } else { - Error::SetString(error, fmt::format("Unknown source '{}' in uniform '{}'", source, ui.name)); + Error::SetStringFmt(error, "Unknown source '{}' in uniform '{}'", source, ui.name); return false; } } @@ -1062,11 +1061,11 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i return true; } -bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer_format, reshadefx::module& mod, - Error* error) +bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer_format, + const reshadefx::effect_module& mod, Error* error) { u32 total_passes = 0; - for (const reshadefx::technique_info& tech : mod.techniques) + for (const reshadefx::technique& tech : mod.techniques) total_passes += static_cast(tech.passes.size()); if (total_passes == 0) { @@ -1077,7 +1076,7 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer m_passes.reserve(total_passes); // Named render targets. - for (const reshadefx::texture_info& ti : mod.textures) + for (const reshadefx::texture& ti : mod.textures) { Texture tex; @@ -1097,7 +1096,7 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer const std::string_view source = GetStringAnnotationValue(ti.annotations, "source", {}); if (source.empty()) { - Error::SetString(error, fmt::format("Non-render target texture '{}' is missing source.", ti.unique_name)); + Error::SetStringFmt(error, "Non-render target texture '{}' is missing source.", ti.unique_name); return false; } @@ -1111,7 +1110,7 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer if (std::optional> resdata = Host::ReadResourceFile(resource_name.c_str(), true); !resdata.has_value() || !image.LoadFromBuffer(resource_name.c_str(), resdata->data(), resdata->size())) { - Error::SetString(error, fmt::format("Failed to load image '{}' (from '{}')", source, image_path).c_str()); + Error::SetStringFmt(error, "Failed to load image '{}' (from '{}')", source, image_path); return false; } } @@ -1121,8 +1120,7 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); if (!tex.texture) { - Error::SetString( - error, fmt::format("Failed to create {}x{} texture ({})", image.GetWidth(), image.GetHeight(), source)); + Error::SetStringFmt(error, "Failed to create {}x{} texture ({})", image.GetWidth(), image.GetHeight(), source); return false; } @@ -1133,9 +1131,9 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer m_textures.push_back(std::move(tex)); } - for (reshadefx::technique_info& tech : mod.techniques) + for (const reshadefx::technique& tech : mod.techniques) { - for (reshadefx::pass_info& pi : tech.passes) + for (const reshadefx::pass& pi : tech.passes) { const bool is_final = (&tech == &mod.techniques.back() && &pi == &tech.passes.back()); @@ -1164,8 +1162,7 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer } if (rt == static_cast(m_textures.size())) { - Error::SetString(error, - fmt::format("Unknown texture '{}' used as render target in pass '{}'", rtname, pi.name)); + Error::SetStringFmt(error, "Unknown texture '{}' used as render target in pass '{}'", rtname, pi.name); return false; } @@ -1182,17 +1179,22 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer } u32 texture_slot = 0; - for (const reshadefx::sampler_info& si : pi.samplers) + Assert(pi.texture_bindings.size() == pi.sampler_bindings.size()); + for (size_t tb_index = 0; tb_index < pi.texture_bindings.size(); tb_index++) { + const reshadefx::texture_binding& tb = pi.texture_bindings[tb_index]; + const reshadefx::sampler_binding& sb = pi.sampler_bindings[tb_index]; + Sampler sampler; sampler.slot = texture_slot++; - sampler.reshade_name = si.unique_name; sampler.texture_id = static_cast(m_textures.size()); - for (const reshadefx::texture_info& ti : mod.textures) + for (const reshadefx::texture& ti : mod.textures) { - if (ti.unique_name == si.texture_name) + if (ti.unique_name == tb.texture_name) { + sampler.reshade_name = ti.unique_name; // TODO: REMOVE THIS + // found the texture, now look for our side of it if (ti.semantic == "COLOR") { @@ -1207,14 +1209,14 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer } else if (!ti.semantic.empty()) { - Error::SetString(error, fmt::format("Unknown semantic {} in texture {}", ti.semantic, ti.name)); + Error::SetStringFmt(error, "Unknown semantic {} in texture {}", ti.semantic, ti.name); return false; } // must be a render target, or another texture for (u32 i = 0; i < static_cast(m_textures.size()); i++) { - if (m_textures[i].reshade_name == si.texture_name) + if (m_textures[i].reshade_name == ti.unique_name) { // hook it up sampler.texture_id = static_cast(i); @@ -1225,16 +1227,16 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer break; } } + if (sampler.texture_id == static_cast(m_textures.size())) { - Error::SetString( - error, fmt::format("Unknown texture {} (sampler {}) in pass {}", si.texture_name, si.name, pi.name)); + Error::SetStringFmt(error, "Unknown texture {} in pass {}", tb.texture_name, pi.name); return false; } - DEV_LOG("Pass {} Texture {} => {}", pi.name, si.texture_name, sampler.texture_id); + DEV_LOG("Pass {} Texture {} => {}", pi.name, tb.texture_name, sampler.texture_id); - sampler.sampler = GetSampler(MapSampler(si)); + sampler.sampler = GetSampler(MapSampler(sb)); if (!sampler.sampler) { Error::SetString(error, "Failed to create sampler."); @@ -1320,14 +1322,18 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, if (fxcode.empty() || fxcode.back() != '\n') fxcode.push_back('\n'); + std::unique_ptr cg = CreateRFXCodegen(); + if (!cg) + return false; + Error error; - reshadefx::module mod; - if (!CreateModule(width, height, &mod, std::move(fxcode), &error)) + if (!CreateModule(width, height, cg.get(), std::move(fxcode), &error)) { ERROR_LOG("Failed to create module for '{}': {}", m_name, error.GetDescription()); return false; } + const reshadefx::effect_module& mod = cg->module(); DebugAssert(!mod.techniques.empty()); if (!CreatePasses(format, mod, &error)) @@ -1336,53 +1342,16 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, return false; } - const std::string_view code(mod.code.data(), mod.code.size()); + // TODO: If using spv, this will be populated. + // const std::string effect_code = cg->finalize_code(); - auto get_shader = [api, needs_main_defn, &code](const std::string& name, const std::span samplers, - GPUShaderStage stage) { - std::string real_code; - if (needs_main_defn) - { - // dFdx/dFdy are not defined in the vertex shader. - const char* defns = - (stage == GPUShaderStage::Vertex) ? "#define dFdx(x) x\n#define dFdy(x) x\n#define discard\n" : ""; - const char* precision = (api == RenderAPI::OpenGLES) ? - "precision highp float;\nprecision highp int;\nprecision highp sampler2D;\n" : - ""; + auto get_shader = [api, needs_main_defn, &cg](const std::string& name, const std::span samplers, + GPUShaderStage stage) { + const std::string real_code = cg->finalize_code_for_entry_point(name); - TinyString version_string = "#version 460 core\n"; -#ifdef ENABLE_OPENGL - if (api == RenderAPI::OpenGL || api == RenderAPI::OpenGLES) - version_string = ShaderGen::GetGLSLVersionString(api, ShaderGen::GetGLSLVersion(api)); +#if 0 + FileSystem::WriteStringToFile(fmt::format("D:\\reshade_{}.txt", Path::SanitizeFileName(name)).c_str(), real_code); #endif - real_code = fmt::format("{}\n#define ENTRY_POINT_{}\n{}\n{}\n{}", version_string, name, defns, precision, code); - - for (const Sampler& sampler : samplers) - { - std::string decl = fmt::format("binding = /*SAMPLER:{}*/0", sampler.reshade_name); - std::string replacement = fmt::format("binding = {}", sampler.slot); - StringUtil::ReplaceAll(&real_code, decl, replacement); - } - } - else - { - real_code = std::string(code); - - for (const Sampler& sampler : samplers) - { - std::string decl = fmt::format("__{}_t : register( t0);", sampler.reshade_name); - std::string replacement = - fmt::format("__{}_t : register({}t{});", sampler.reshade_name, (sampler.slot < 10) ? " " : "", sampler.slot); - StringUtil::ReplaceAll(&real_code, decl, replacement); - - decl = fmt::format("__{}_s : register( s0);", sampler.reshade_name); - replacement = - fmt::format("__{}_s : register({}s{});", sampler.reshade_name, (sampler.slot < 10) ? " " : "", sampler.slot); - StringUtil::ReplaceAll(&real_code, decl, replacement); - } - } - - // FileSystem::WriteStringToFile("D:\\foo.txt", real_code); Error error; std::unique_ptr sshader = g_gpu_device->CreateShader( @@ -1407,15 +1376,15 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, progress->PushState(); size_t total_passes = 0; - for (const reshadefx::technique_info& tech : mod.techniques) + for (const reshadefx::technique& tech : mod.techniques) total_passes += tech.passes.size(); progress->SetProgressRange(static_cast(total_passes)); progress->SetProgressValue(0); u32 passnum = 0; - for (const reshadefx::technique_info& tech : mod.techniques) + for (const reshadefx::technique& tech : mod.techniques) { - for (const reshadefx::pass_info& info : tech.passes) + for (const reshadefx::pass& info : tech.passes) { DebugAssert(passnum < m_passes.size()); Pass& pass = m_passes[passnum++]; diff --git a/src/util/postprocessing_shader_fx.h b/src/util/postprocessing_shader_fx.h index e265e4d40..01fe96133 100644 --- a/src/util/postprocessing_shader_fx.h +++ b/src/util/postprocessing_shader_fx.h @@ -13,6 +13,10 @@ #include +namespace reshadefx { +class codegen; +} + class Error; namespace PostProcessing { @@ -94,10 +98,10 @@ private: ShaderOption::ValueVector value; }; - bool CreateModule(s32 buffer_width, s32 buffer_height, reshadefx::module* mod, std::string code, Error* error); - bool CreateOptions(const reshadefx::module& mod, Error* error); - bool GetSourceOption(const reshadefx::uniform_info& ui, SourceOptionType* si, Error* error); - bool CreatePasses(GPUTexture::Format backbuffer_format, reshadefx::module& mod, Error* error); + bool CreateModule(s32 buffer_width, s32 buffer_height, reshadefx::codegen* cg, std::string code, Error* error); + bool CreateOptions(const reshadefx::effect_module& mod, Error* error); + bool GetSourceOption(const reshadefx::uniform& ui, SourceOptionType* si, Error* error); + bool CreatePasses(GPUTexture::Format backbuffer_format, const reshadefx::effect_module& mod, Error* error); const char* GetTextureNameForID(TextureID id) const; GPUTexture* GetTextureByID(TextureID id, GPUTexture* input_color, GPUTexture* input_depth, diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 840d924e5..919d694aa 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -98,9 +98,9 @@ void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, s32 value) ss << "#define " << name << " " << value << "\n"; } -#ifdef ENABLE_OPENGL u32 ShaderGen::GetGLSLVersion(RenderAPI render_api) { +#ifdef ENABLE_OPENGL const char* glsl_version = reinterpret_cast(glGetString(GL_SHADING_LANGUAGE_VERSION)); const bool glsl_es = (render_api == RenderAPI::OpenGLES); Assert(glsl_version != nullptr); @@ -136,6 +136,9 @@ u32 ShaderGen::GetGLSLVersion(RenderAPI render_api) } return (static_cast(major_version) * 100) + static_cast(minor_version); +#else + return 460; +#endif } TinyString ShaderGen::GetGLSLVersionString(RenderAPI render_api, u32 version) @@ -147,7 +150,6 @@ TinyString ShaderGen::GetGLSLVersionString(RenderAPI render_api, u32 version) return TinyString::from_format("#version {}{:02d}{}", major_version, minor_version, (glsl_es && major_version >= 3) ? " es" : ""); } -#endif void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */) { diff --git a/src/util/shadergen.h b/src/util/shadergen.h index ec2797128..f6761b136 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -20,10 +20,8 @@ public: static GPUShaderLanguage GetShaderLanguageForAPI(RenderAPI api); static bool UseGLSLBindingLayout(); -#ifdef ENABLE_OPENGL static u32 GetGLSLVersion(RenderAPI render_api); static TinyString GetGLSLVersionString(RenderAPI render_api, u32 version); -#endif ALWAYS_INLINE GPUShaderLanguage GetLanguage() const { return m_shader_language; }