From 9ebe25fd7718970e82dd92a7364d8f332eed9c4f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 2 May 2024 23:31:13 +0300 Subject: [PATCH 1/4] [GPU] Declare unused register fields explicitly --- src/xenia/gpu/registers.h | 132 ++++++++++++++++++++++++-------------- src/xenia/gpu/xenos.h | 7 +- 2 files changed, 87 insertions(+), 52 deletions(-) diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index 5ee5f4ac1..c8ad6a286 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -21,6 +21,9 @@ // Some registers were added on Adreno specifically and are not referenced in // game .pdb files and never set by games. +// All unused bits are intentionally declared as named fields for stable +// comparisons when register values are constructed or modified by Xenia itself. + // Only 32-bit types (uint32_t, int32_t, float or enums with uint32_t / int32_t // as the underlying type) are allowed in the bit fields here, as Visual C++ // restarts packing when a field requires different alignment than the previous @@ -58,11 +61,11 @@ union alignas(uint32_t) COHER_STATUS_HOST { uint32_t dest_base_5_ena : 1; // +14 uint32_t dest_base_6_ena : 1; // +15 uint32_t dest_base_7_ena : 1; // +16 - uint32_t : 7; // +17 + uint32_t _pad_17 : 7; // +17 uint32_t vc_action_ena : 1; // +24 uint32_t tc_action_ena : 1; // +25 uint32_t pglb_action_ena : 1; // +26 - uint32_t : 4; // +27 + uint32_t _pad_27 : 4; // +27 uint32_t status : 1; // +31 }; static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST; @@ -72,22 +75,23 @@ static_assert_size(COHER_STATUS_HOST, sizeof(uint32_t)); union alignas(uint32_t) WAIT_UNTIL { uint32_t value; struct { - uint32_t : 1; // +0 + uint32_t _pad_0 : 1; // +0 uint32_t wait_re_vsync : 1; // +1 uint32_t wait_fe_vsync : 1; // +2 uint32_t wait_vsync : 1; // +3 uint32_t wait_dsply_id0 : 1; // +4 uint32_t wait_dsply_id1 : 1; // +5 uint32_t wait_dsply_id2 : 1; // +6 - uint32_t : 3; // +7 + uint32_t _pad_7 : 3; // +7 uint32_t wait_cmdfifo : 1; // +10 - uint32_t : 3; // +11 + uint32_t _pad_11 : 3; // +11 uint32_t wait_2d_idle : 1; // +14 uint32_t wait_3d_idle : 1; // +15 uint32_t wait_2d_idleclean : 1; // +16 uint32_t wait_3d_idleclean : 1; // +17 - uint32_t : 2; // +18 + uint32_t _pad_18 : 2; // +18 uint32_t cmdfifo_entries : 4; // +20 + uint32_t _pad_24 : 8; // +24 }; static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL; }; @@ -129,7 +133,7 @@ union alignas(uint32_t) SQ_CONTEXT_MISC { uint32_t inst_pred_optimize : 1; // +0 uint32_t sc_output_screen_xy : 1; // +1 xenos::SampleControl sc_sample_cntl : 2; // +2 - uint32_t : 4; // +4 + uint32_t _pad_4 : 4; // +4 // Pixel shader interpolator (according to the XNA microcode validator - // limited to the interpolator count, 16, not the total register count of // 64) index to write pixel parameters to. @@ -209,6 +213,7 @@ union alignas(uint32_t) SQ_CONTEXT_MISC { uint32_t perfcounter_ref : 1; // +16 uint32_t yeild_optimize : 1; // +17 sic uint32_t tx_cache_sel : 1; // +18 + uint32_t _pad_19 : 13; // +19 }; static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC; }; @@ -229,10 +234,11 @@ static_assert_size(SQ_INTERPOLATOR_CNTL, sizeof(uint32_t)); union alignas(uint32_t) SQ_VS_CONST { uint32_t value; struct { - uint32_t base : 9; // +0 - uint32_t : 3; // +9 + uint32_t base : 9; // +0 + uint32_t _pad_9 : 3; // +9 // Vec4 count minus one. - uint32_t size : 9; // 12 + uint32_t size : 9; // +12 + uint32_t _pad_21 : 11; // +21 }; static constexpr Register register_index = XE_GPU_REG_SQ_VS_CONST; }; @@ -242,10 +248,11 @@ static_assert_size(SQ_VS_CONST, sizeof(uint32_t)); union alignas(uint32_t) SQ_PS_CONST { uint32_t value; struct { - uint32_t base : 9; // +0 - uint32_t : 3; // +9 + uint32_t base : 9; // +0 + uint32_t _pad_9 : 3; // +9 // Vec4 count minus one. - uint32_t size : 9; // 12 + uint32_t size : 9; // +12 + uint32_t _pad_21 : 11; // +21 }; static constexpr Register register_index = XE_GPU_REG_SQ_PS_CONST; }; @@ -273,7 +280,7 @@ union alignas(uint32_t) VGT_DMA_SIZE { uint32_t value; struct { uint32_t num_words : 24; // +0 - uint32_t : 6; // +24 + uint32_t _pad_24 : 6; // +24 xenos::Endian swap_mode : 2; // +30 }; static constexpr Register register_index = XE_GPU_REG_VGT_DMA_SIZE; @@ -286,10 +293,10 @@ union alignas(uint32_t) VGT_DRAW_INITIATOR { xenos::PrimitiveType prim_type : 6; // +0 xenos::SourceSelect source_select : 2; // +6 xenos::MajorMode major_mode : 2; // +8 - uint32_t : 1; // +10 + uint32_t _pad_10 : 1; // +10 xenos::IndexFormat index_size : 1; // +11 uint32_t not_eop : 1; // +12 - uint32_t : 3; // +13 + uint32_t _pad_13 : 3; // +13 uint32_t num_indices : 16; // +16 }; static constexpr Register register_index = XE_GPU_REG_VGT_DRAW_INITIATOR; @@ -317,7 +324,8 @@ union alignas(uint32_t) VGT_MULTI_PRIM_IB_RESET_INDX { // the restart index check likely only involves the lower 24 bit of the // vertex index - therefore, if reset_indx is 0xFFFFFF, likely 0xFFFFFF, // 0x1FFFFFF, 0xFFFFFFFF all cause primitive reset. - uint32_t reset_indx : 24; + uint32_t reset_indx : 24; // +0 + uint32_t _pad_24 : 8; // +24 }; static constexpr Register register_index = XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX; @@ -334,7 +342,8 @@ union alignas(uint32_t) VGT_INDX_OFFSET { // sign-extending on the host. Direct3D 9 just writes BaseVertexIndex as a // signed int32 to the entire register, but the upper 8 bits are ignored // anyway, and that has no effect on offsets that fit in 24 bits. - uint32_t indx_offset : 24; + uint32_t indx_offset : 24; // +0 + uint32_t _pad_24 : 8; // +24 }; static constexpr Register register_index = XE_GPU_REG_VGT_INDX_OFFSET; }; @@ -343,7 +352,8 @@ static_assert_size(VGT_INDX_OFFSET, sizeof(uint32_t)); union alignas(uint32_t) VGT_MIN_VTX_INDX { uint32_t value; struct { - uint32_t min_indx : 24; + uint32_t min_indx : 24; // +0 + uint32_t _pad_24 : 8; // +24 }; static constexpr Register register_index = XE_GPU_REG_VGT_MIN_VTX_INDX; }; @@ -353,7 +363,8 @@ union alignas(uint32_t) VGT_MAX_VTX_INDX { uint32_t value; struct { // Usually 0xFFFF or 0xFFFFFF. - uint32_t max_indx : 24; + uint32_t max_indx : 24; // +0 + uint32_t _pad_24 : 8; // +24 }; static constexpr Register register_index = XE_GPU_REG_VGT_MAX_VTX_INDX; }; @@ -363,6 +374,7 @@ union alignas(uint32_t) VGT_OUTPUT_PATH_CNTL { uint32_t value; struct { xenos::VGTOutputPath path_select : 2; // +0 + uint32_t _pad_2 : 30; // +2 }; static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; }; @@ -372,6 +384,7 @@ union alignas(uint32_t) VGT_HOS_CNTL { uint32_t value; struct { xenos::TessellationMode tess_mode : 2; // +0 + uint32_t _pad_2 : 30; // +2 }; static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL; }; @@ -430,19 +443,20 @@ union alignas(uint32_t) PA_SU_SC_MODE_CNTL { uint32_t poly_offset_front_enable : 1; // +11 uint32_t poly_offset_back_enable : 1; // +12 uint32_t poly_offset_para_enable : 1; // +13 - uint32_t : 1; // +14 + uint32_t _pad_14 : 1; // +14 uint32_t msaa_enable : 1; // +15 uint32_t vtx_window_offset_enable : 1; // +16 // LINE_STIPPLE_ENABLE was added on Adreno. - uint32_t : 2; // +17 + uint32_t _pad_17 : 2; // +17 uint32_t provoking_vtx_last : 1; // +19 uint32_t persp_corr_dis : 1; // +20 uint32_t multi_prim_ib_ena : 1; // +21 - uint32_t : 1; // +22 + uint32_t _pad_22 : 1; // +22 uint32_t quad_order_enable : 1; // +23 uint32_t sc_one_quad_per_clock : 1; // +24 // WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on // Adreno. + uint32_t _pad_25 : 7; // +25 }; static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; }; @@ -455,6 +469,7 @@ union alignas(uint32_t) PA_SU_VTX_CNTL { uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL). uint32_t round_mode : 2; // +1 uint32_t quant_mode : 3; // +3 + uint32_t _pad_6 : 26; // +6 }; static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL; }; @@ -464,7 +479,7 @@ union alignas(uint32_t) PA_SC_MPASS_PS_CNTL { uint32_t value; struct { uint32_t mpass_pix_vec_per_pass : 20; // +0 - uint32_t : 11; // +20 + uint32_t _pad_20 : 11; // +20 uint32_t mpass_ps_ena : 1; // +31 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; @@ -482,6 +497,7 @@ union alignas(uint32_t) PA_SC_VIZ_QUERY { uint32_t kill_pix_post_hi_z : 1; // +7 // not used with d3d uint32_t kill_pix_post_detail_mask : 1; // +8 + uint32_t _pad_9 : 23; // +9 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; }; @@ -497,7 +513,7 @@ union alignas(uint32_t) PA_CL_CLIP_CNTL { uint32_t ucp_ena_3 : 1; // +3 uint32_t ucp_ena_4 : 1; // +4 uint32_t ucp_ena_5 : 1; // +5 - uint32_t : 8; // +6 + uint32_t _pad_6 : 8; // +6 uint32_t ps_ucp_mode : 2; // +14 uint32_t clip_disable : 1; // +16 uint32_t ucp_cull_only_ena : 1; // +17 @@ -508,6 +524,7 @@ union alignas(uint32_t) PA_CL_CLIP_CNTL { uint32_t xy_nan_retain : 1; // +22 uint32_t z_nan_retain : 1; // +23 uint32_t w_nan_retain : 1; // +24 + uint32_t _pad_25 : 7; // +25 }; struct { uint32_t ucp_ena : 6; @@ -526,11 +543,12 @@ union alignas(uint32_t) PA_CL_VTE_CNTL { uint32_t vport_y_offset_ena : 1; // +3 uint32_t vport_z_scale_ena : 1; // +4 uint32_t vport_z_offset_ena : 1; // +5 - uint32_t : 2; // +6 + uint32_t _pad_6 : 2; // +6 uint32_t vtx_xy_fmt : 1; // +8 uint32_t vtx_z_fmt : 1; // +9 uint32_t vtx_w0_fmt : 1; // +10 uint32_t perfcounter_ref : 1; // +11 + uint32_t _pad_12 : 20; // +12 }; static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; @@ -539,9 +557,10 @@ static_assert_size(PA_CL_VTE_CNTL, sizeof(uint32_t)); union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_TL { uint32_t value; struct { - int32_t tl_x : 15; // +0 - uint32_t : 1; // +15 - int32_t tl_y : 15; // +16 + int32_t tl_x : 15; // +0 + uint32_t _pad_15 : 1; // +15 + int32_t tl_y : 15; // +16 + uint32_t _pad_31 : 1; // +31 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL; }; @@ -550,9 +569,10 @@ static_assert_size(PA_SC_SCREEN_SCISSOR_TL, sizeof(uint32_t)); union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_BR { uint32_t value; struct { - int32_t br_x : 15; // +0 - uint32_t : 1; // +15 - int32_t br_y : 15; // +16 + int32_t br_x : 15; // +0 + uint32_t _pad_15 : 1; // +15 + int32_t br_y : 15; // +16 + uint32_t _pad_31 : 1; // +31 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR; }; @@ -562,8 +582,9 @@ union alignas(uint32_t) PA_SC_WINDOW_OFFSET { uint32_t value; struct { int32_t window_x_offset : 15; // +0 - uint32_t : 1; // +15 + uint32_t _pad_15 : 1; // +15 int32_t window_y_offset : 15; // +16 + uint32_t _pad_31 : 1; // +31 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; }; @@ -573,9 +594,9 @@ union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_TL { uint32_t value; struct { uint32_t tl_x : 14; // +0 - uint32_t : 2; // +14 + uint32_t _pad_14 : 2; // +14 uint32_t tl_y : 14; // +16 - uint32_t : 1; // +30 + uint32_t _pad_30 : 1; // +30 uint32_t window_offset_disable : 1; // +31 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; @@ -585,9 +606,10 @@ static_assert_size(PA_SC_WINDOW_SCISSOR_TL, sizeof(uint32_t)); union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_BR { uint32_t value; struct { - uint32_t br_x : 14; // +0 - uint32_t : 2; // +14 - uint32_t br_y : 14; // +16 + uint32_t br_x : 14; // +0 + uint32_t _pad_14 : 2; // +14 + uint32_t br_y : 14; // +16 + uint32_t _pad_30 : 2; // +30 }; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; }; @@ -610,6 +632,7 @@ union alignas(uint32_t) RB_MODECONTROL { uint32_t value; struct { xenos::ModeControl edram_mode : 3; // +0 + uint32_t _pad_3 : 29; // +3 }; static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL; }; @@ -619,7 +642,7 @@ union alignas(uint32_t) RB_SURFACE_INFO { uint32_t value; struct { uint32_t surface_pitch : 14; // +0 in pixels. - uint32_t : 2; // +14 + uint32_t _pad_14 : 2; // +14 xenos::MsaaSamples msaa_samples : 2; // +16 uint32_t hiz_pitch : 14; // +18 }; @@ -634,7 +657,7 @@ union alignas(uint32_t) RB_COLORCONTROL { uint32_t alpha_test_enable : 1; // +3 uint32_t alpha_to_mask_enable : 1; // +4 // Everything in between was added on Adreno. - uint32_t : 19; // +5 + uint32_t _pad_5 : 19; // +5 // TODO(Triang3l): Redo these tests and possibly flip these vertically in // the comment and in the actual implementation. It appears that // gl_FragCoord.y is mirrored as opposed to the actual screen coordinates in @@ -689,9 +712,10 @@ union alignas(uint32_t) RB_COLOR_INFO { // for convenience and to avoid mistakes. uint32_t color_base : 11; // +0 in tiles. uint32_t color_base_bit_11 : 1; // +11 - uint32_t : 4; // +12 + uint32_t _pad_12 : 4; // +12 xenos::ColorRenderTargetFormat color_format : 4; // +16 int32_t color_exp_bias : 6; // +20 + uint32_t _pad_26 : 6; // +26 }; static constexpr Register register_index = XE_GPU_REG_RB_COLOR_INFO; // RB_COLOR[1-3]_INFO also use this format. @@ -718,6 +742,7 @@ union alignas(uint32_t) RB_COLOR_MASK { uint32_t write_green3 : 1; // +13 uint32_t write_blue3 : 1; // +14 uint32_t write_alpha3 : 1; // +15 + uint32_t _pad_16 : 16; // +16 }; static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK; }; @@ -729,11 +754,12 @@ union alignas(uint32_t) RB_BLENDCONTROL { xenos::BlendFactor color_srcblend : 5; // +0 xenos::BlendOp color_comb_fcn : 3; // +5 xenos::BlendFactor color_destblend : 5; // +8 - uint32_t : 3; // +13 + uint32_t _pad_13 : 3; // +13 xenos::BlendFactor alpha_srcblend : 5; // +16 xenos::BlendOp alpha_comb_fcn : 3; // +21 xenos::BlendFactor alpha_destblend : 5; // +24 // BLEND_FORCE_ENABLE and BLEND_FORCE were added on Adreno. + uint32_t _pad_29 : 3; // +29 }; // RB_BLENDCONTROL[0-3] use this format. static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0; @@ -748,7 +774,7 @@ union alignas(uint32_t) RB_DEPTHCONTROL { uint32_t z_enable : 1; // +1 uint32_t z_write_enable : 1; // +2 // EARLY_Z_ENABLE was added on Adreno. - uint32_t : 1; // +3 + uint32_t _pad_3 : 1; // +3 xenos::CompareFunction zfunc : 3; // +4 uint32_t backface_enable : 1; // +7 xenos::CompareFunction stencilfunc : 3; // +8 @@ -770,6 +796,7 @@ union alignas(uint32_t) RB_STENCILREFMASK { uint32_t stencilref : 8; // +0 uint32_t stencilmask : 8; // +8 uint32_t stencilwritemask : 8; // +16 + uint32_t _pad_24 : 8; // +24 }; static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK; // RB_STENCILREFMASK_BF also uses this format. @@ -784,8 +811,9 @@ union alignas(uint32_t) RB_DEPTH_INFO { // for convenience and to avoid mistakes. uint32_t depth_base : 11; // +0 in tiles. uint32_t depth_base_bit_11 : 1; // +11 - uint32_t : 4; // +12 + uint32_t _pad_12 : 4; // +12 xenos::DepthRenderTargetFormat depth_format : 1; // +16 + uint32_t _pad_17 : 15; // +17 }; static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO; }; @@ -797,13 +825,14 @@ union alignas(uint32_t) RB_COPY_CONTROL { uint32_t value; struct { uint32_t copy_src_select : 3; // +0 Depth is 4. - uint32_t : 1; // +3 + uint32_t _pad_3 : 1; // +3 xenos::CopySampleSelect copy_sample_select : 3; // +4 - uint32_t : 1; // +7 + uint32_t _pad_7 : 1; // +7 uint32_t color_clear_enable : 1; // +8 uint32_t depth_clear_enable : 1; // +9 - uint32_t : 10; // +10 + uint32_t _pad_10 : 10; // +10 xenos::CopyCommand copy_command : 2; // +20 + uint32_t _pad_22 : 10; // +22 }; static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL; }; @@ -818,8 +847,9 @@ union alignas(uint32_t) RB_COPY_DEST_INFO { xenos::ColorFormat copy_dest_format : 6; // +7 xenos::SurfaceNumberFormat copy_dest_number : 3; // +13 int32_t copy_dest_exp_bias : 6; // +16 - uint32_t : 2; // +22 + uint32_t _pad_22 : 2; // +22 uint32_t copy_dest_swap : 1; // +24 + uint32_t _pad_25 : 7; // +25 }; static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO; }; @@ -829,8 +859,9 @@ union alignas(uint32_t) RB_COPY_DEST_PITCH { uint32_t value; struct { uint32_t copy_dest_pitch : 14; // +0 - uint32_t : 2; // +14 + uint32_t _pad_14 : 2; // +14 uint32_t copy_dest_height : 14; // +16 + uint32_t _pad_30 : 2; // +30 }; static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; }; @@ -856,6 +887,7 @@ union alignas(uint32_t) DC_LUT_RW_INDEX { // absolute index, without the lower or upper 10 bits selection in the // bit 0. For PWL, the bit 7 is ignored. uint32_t rw_index : 8; // +0 + uint32_t _pad_8 : 24; // +8 }; static constexpr Register register_index = XE_GPU_REG_DC_LUT_RW_INDEX; }; @@ -865,6 +897,7 @@ union alignas(uint32_t) DC_LUT_SEQ_COLOR { uint32_t value; struct { uint32_t seq_color : 16; // +0, bits 0:5 are hardwired to zero + uint32_t _pad_16 : 16; // +16 }; static constexpr Register register_index = XE_GPU_REG_DC_LUT_SEQ_COLOR; }; @@ -893,6 +926,7 @@ union alignas(uint32_t) DC_LUT_30_COLOR { uint32_t color_10_blue : 10; // +0 uint32_t color_10_green : 10; // +10 uint32_t color_10_red : 10; // +20 + uint32_t _pad_30 : 2; // +30 }; static constexpr Register register_index = XE_GPU_REG_DC_LUT_30_COLOR; }; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 396056bfe..5517d73c0 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -1067,8 +1067,9 @@ union alignas(uint32_t) LoopConstant { // The resulting aL is `iterator * step + start`, 10-bit, and has the real // range of [-256, 256], according to the IPR2015-00325 sequencer // specification. - uint32_t start : 8; // +8 - int32_t step : 8; // +16 + uint32_t start : 8; // +8 + int32_t step : 8; // +16 + uint32_t _pad_24 : 8; // +24 }; }; static_assert_size(LoopConstant, sizeof(uint32_t)); @@ -1208,7 +1209,7 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t { union { // dword_2 struct { uint32_t width : 24; - uint32_t : 8; + uint32_t _pad_88 : 8; } size_1d; struct { uint32_t width : 13; From f87c6afdeb8299b6b423be4318b0795a554f603d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 4 May 2024 19:59:28 +0300 Subject: [PATCH 2/4] [Vulkan] Update headers to 1.3.278 --- third_party/Vulkan-Headers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/Vulkan-Headers b/third_party/Vulkan-Headers index b32da5329..31aa7f634 160000 --- a/third_party/Vulkan-Headers +++ b/third_party/Vulkan-Headers @@ -1 +1 @@ -Subproject commit b32da5329b50e3cb96229aaecba9ded032fe29cc +Subproject commit 31aa7f634b052d87ede4664053e85f3f4d1d50d3 From e9f7a8bd48198541a3bd71fe6267df3dbe83c5c5 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 4 May 2024 22:47:14 +0300 Subject: [PATCH 3/4] [Vulkan] Optional functionality usage improvements Functional changes: - Enable only actually used features, as drivers may take more optimal paths when certain features are disabled. - Support VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE. - Fix the separateStencilMaskRef check doing the opposite. - Support shaderRoundingModeRTEFloat32. - Fix vkGetDeviceBufferMemoryRequirements pointer not passed to the Vulkan Memory Allocator. Stylistic changes: - Move all device extensions, properties and features to one structure, especially simplifying portability subset feature checks, and also making it easier to request new extension functionality in the future. - Remove extension suffixes from usage of promoted extensions. --- src/xenia/gpu/spirv_shader_translator.cc | 66 +- src/xenia/gpu/spirv_shader_translator.h | 4 +- .../gpu/vulkan/vulkan_command_processor.cc | 72 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 128 +- .../gpu/vulkan/vulkan_primitive_processor.cc | 18 +- .../gpu/vulkan/vulkan_render_target_cache.cc | 106 +- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 19 +- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 88 +- .../ui/vulkan/vulkan_immediate_drawer.cc | 6 +- src/xenia/ui/vulkan/vulkan_mem_alloc.cc | 30 +- src/xenia/ui/vulkan/vulkan_presenter.cc | 2 +- src/xenia/ui/vulkan/vulkan_provider.cc | 1306 +++++++++-------- src/xenia/ui/vulkan/vulkan_provider.h | 257 ++-- .../ui/vulkan/vulkan_upload_buffer_pool.cc | 6 +- src/xenia/ui/vulkan/vulkan_util.cc | 18 +- src/xenia/ui/vulkan/vulkan_util.h | 16 +- 16 files changed, 1115 insertions(+), 1027 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 1f496082c..8bcaa19fd 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -37,56 +37,32 @@ SpirvShaderTranslator::Features::Features(bool all) full_draw_index_uint32(all), image_view_format_swizzle(all), signed_zero_inf_nan_preserve_float32(all), - denorm_flush_to_zero_float32(all) {} + denorm_flush_to_zero_float32(all), + rounding_mode_rte_float32(all) {} SpirvShaderTranslator::Features::Features( - const ui::vulkan::VulkanProvider& provider) - : max_storage_buffer_range( - provider.device_properties().limits.maxStorageBufferRange), - clip_distance(provider.device_features().shaderClipDistance), - cull_distance(provider.device_features().shaderCullDistance), - demote_to_helper_invocation( - provider.device_extensions().ext_shader_demote_to_helper_invocation && - provider.device_shader_demote_to_helper_invocation_features() - .shaderDemoteToHelperInvocation), + const ui::vulkan::VulkanProvider::DeviceInfo& device_info) + : max_storage_buffer_range(device_info.maxStorageBufferRange), + clip_distance(device_info.shaderClipDistance), + cull_distance(device_info.shaderCullDistance), + demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation), fragment_shader_sample_interlock( - provider.device_extensions().ext_fragment_shader_interlock && - provider.device_fragment_shader_interlock_features() - .fragmentShaderSampleInterlock), - full_draw_index_uint32(provider.device_features().fullDrawIndexUint32) { - uint32_t device_version = provider.device_properties().apiVersion; - const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions = - provider.device_extensions(); - if (device_version >= VK_MAKE_VERSION(1, 2, 0)) { + device_info.fragmentShaderSampleInterlock), + full_draw_index_uint32(device_info.fullDrawIndexUint32), + image_view_format_swizzle(device_info.imageViewFormatSwizzle), + signed_zero_inf_nan_preserve_float32( + device_info.shaderSignedZeroInfNanPreserveFloat32), + denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32), + rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32) { + if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { spirv_version = spv::Spv_1_5; - } else if (device_extensions.khr_spirv_1_4) { + } else if (device_info.ext_1_2_VK_KHR_spirv_1_4) { spirv_version = spv::Spv_1_4; - } else if (device_version >= VK_MAKE_VERSION(1, 1, 0)) { + } else if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { spirv_version = spv::Spv_1_3; } else { spirv_version = spv::Spv_1_0; } - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (device_portability_subset_features) { - image_view_format_swizzle = - bool(device_portability_subset_features->imageViewFormatSwizzle); - } else { - image_view_format_swizzle = true; - } - if (spirv_version >= spv::Spv_1_4 || - device_extensions.khr_shader_float_controls) { - const VkPhysicalDeviceFloatControlsPropertiesKHR& - float_controls_properties = provider.device_float_controls_properties(); - signed_zero_inf_nan_preserve_float32 = - bool(float_controls_properties.shaderSignedZeroInfNanPreserveFloat32); - denorm_flush_to_zero_float32 = - bool(float_controls_properties.shaderDenormFlushToZeroFloat32); - } else { - signed_zero_inf_nan_preserve_float32 = false; - denorm_flush_to_zero_float32 = false; - } } uint64_t SpirvShaderTranslator::GetDefaultVertexShaderModification( @@ -168,7 +144,8 @@ void SpirvShaderTranslator::StartTranslation() { : spv::CapabilityShader); if (features_.spirv_version < spv::Spv_1_4) { if (features_.signed_zero_inf_nan_preserve_float32 || - features_.denorm_flush_to_zero_float32) { + features_.denorm_flush_to_zero_float32 || + features_.rounding_mode_rte_float32) { builder_->addExtension("SPV_KHR_float_controls"); } } @@ -724,6 +701,11 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { builder_->addExecutionMode(function_main_, spv::ExecutionModeSignedZeroInfNanPreserve, 32); } + if (features_.rounding_mode_rte_float32) { + builder_->addCapability(spv::CapabilityRoundingModeRTE); + builder_->addExecutionMode(function_main_, + spv::ExecutionModeRoundingModeRTE, 32); + } spv::Instruction* entry_point = builder_->addEntryPoint(execution_model, function_main_, "main"); for (spv::Id interface_id : main_interface_) { diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 9889fb630..0ed368ae4 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -320,7 +320,8 @@ class SpirvShaderTranslator : public ShaderTranslator { static constexpr uint32_t kSpirvMagicToolId = 26; struct Features { - explicit Features(const ui::vulkan::VulkanProvider& provider); + explicit Features( + const ui::vulkan::VulkanProvider::DeviceInfo& device_info); explicit Features(bool all = false); unsigned int spirv_version; uint32_t max_storage_buffer_range; @@ -332,6 +333,7 @@ class SpirvShaderTranslator : public ShaderTranslator { bool image_view_format_swizzle; bool signed_zero_inf_nan_preserve_float32; bool denorm_flush_to_zero_float32; + bool rounding_mode_rte_float32; }; SpirvShaderTranslator(const Features& features, diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 7115929f4..58336c901 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -138,7 +138,8 @@ bool VulkanCommandProcessor::SetupContext() { const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); // The unconditional inclusion of the vertex shader stage also covers the case // of manual index / factor buffer fetch (the system constants and the shared @@ -147,12 +148,12 @@ bool VulkanCommandProcessor::SetupContext() { guest_shader_pipeline_stages_ = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; guest_shader_vertex_stages_ = VK_SHADER_STAGE_VERTEX_BIT; - if (device_features.tessellationShader) { + if (device_info.tessellationShader) { guest_shader_pipeline_stages_ |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; guest_shader_vertex_stages_ |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; } - if (!device_features.vertexPipelineStoresAndAtomics) { + if (!device_info.vertexPipelineStoresAndAtomics) { // For memory export from vertex shaders converted to compute shaders. guest_shader_pipeline_stages_ |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; guest_shader_vertex_stages_ |= VK_SHADER_STAGE_COMPUTE_BIT; @@ -160,14 +161,11 @@ bool VulkanCommandProcessor::SetupContext() { // 16384 is bigger than any single uniform buffer that Xenia needs, but is the // minimum maxUniformBufferRange, thus the safe minimum amount. - VkDeviceSize uniform_buffer_alignment = std::max( - provider.device_properties().limits.minUniformBufferOffsetAlignment, - VkDeviceSize(1)); uniform_buffer_pool_ = std::make_unique( provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize, size_t(16384)), - size_t(uniform_buffer_alignment))); + size_t(device_info.minUniformBufferOffsetAlignment))); // Descriptor set layouts that don't depend on the setup of other subsystems. VkShaderStageFlags guest_shader_stages = @@ -201,10 +199,9 @@ bool VulkanCommandProcessor::SetupContext() { [SpirvShaderTranslator::kConstantBufferSystem] .stageFlags = guest_shader_stages | - (device_features.tessellationShader - ? VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT - : 0) | - (device_features.geometryShader ? VK_SHADER_STAGE_GEOMETRY_BIT : 0); + (device_info.tessellationShader ? VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT + : 0) | + (device_info.geometryShader ? VK_SHADER_STAGE_GEOMETRY_BIT : 0); descriptor_set_layout_bindings_constants [SpirvShaderTranslator::kConstantBufferFloatVertex] .stageFlags = guest_shader_vertex_stages_; @@ -283,7 +280,7 @@ bool VulkanCommandProcessor::SetupContext() { uint32_t shared_memory_binding_count_log2 = SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2( - provider.device_properties().limits.maxStorageBufferRange); + device_info.maxStorageBufferRange); uint32_t shared_memory_binding_count = UINT32_C(1) << shared_memory_binding_count_log2; @@ -487,14 +484,14 @@ bool VulkanCommandProcessor::SetupContext() { &gamma_ramp_host_visible_buffer_memory_requirements); uint32_t gamma_ramp_host_visible_buffer_memory_types = gamma_ramp_host_visible_buffer_memory_requirements.memoryTypeBits & - (provider.memory_types_device_local() & - provider.memory_types_host_visible()); + (device_info.memory_types_device_local & + device_info.memory_types_host_visible); VkMemoryAllocateInfo gamma_ramp_host_visible_buffer_memory_allocate_info; // Prefer a host-uncached (because it's write-only) memory type, but try a // host-cached host-visible device-local one as well. if (xe::bit_scan_forward( gamma_ramp_host_visible_buffer_memory_types & - ~provider.memory_types_host_cached(), + ~device_info.memory_types_host_cached, &(gamma_ramp_host_visible_buffer_memory_allocate_info .memoryTypeIndex)) || xe::bit_scan_forward( @@ -509,16 +506,16 @@ bool VulkanCommandProcessor::SetupContext() { gamma_ramp_host_visible_buffer_memory_allocate_info.pNext = nullptr; gamma_ramp_host_visible_buffer_memory_allocate_info.allocationSize = gamma_ramp_host_visible_buffer_memory_requirements.size; - VkMemoryDedicatedAllocateInfoKHR + VkMemoryDedicatedAllocateInfo gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info; - if (provider.device_extensions().khr_dedicated_allocation) { + if (device_info.ext_1_1_VK_KHR_dedicated_allocation) { gamma_ramp_host_visible_buffer_memory_allocate_info_last->pNext = &gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info; gamma_ramp_host_visible_buffer_memory_allocate_info_last = reinterpret_cast( &gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info); gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.pNext = nullptr; gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.image = @@ -2419,10 +2416,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, current_guest_graphics_pipeline_layout_ = pipeline_layout; } - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + GetVulkanProvider().device_info(); bool host_render_targets_used = render_target_cache_->GetPath() == RenderTargetCache::Path::kHostRenderTargets; @@ -2446,8 +2441,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // interlocks case completely - apply the viewport and the scissor offset // directly to pixel address and to things like ps_param_gen. draw_util::GetHostViewportInfo( - regs, 1, 1, false, device_limits.maxViewportDimensions[0], - device_limits.maxViewportDimensions[1], true, normalized_depth_control, + regs, 1, 1, false, device_info.maxViewportDimensions[0], + device_info.maxViewportDimensions[1], true, normalized_depth_control, false, host_render_targets_used, pixel_shader && pixel_shader->writes_depth(), viewport_info); @@ -2461,7 +2456,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // indirectly in the vertex shader if full 32-bit indices are not supported by // the host. bool shader_32bit_index_dma = - !device_features.fullDrawIndexUint32 && + !device_info.fullDrawIndexUint32 && primitive_processing_result.index_buffer_type == PrimitiveProcessor::ProcessedIndexBufferType::kGuestDMA && vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32 && @@ -3315,21 +3310,16 @@ void VulkanCommandProcessor::UpdateDynamicState( if (normalized_depth_control.stencil_enable) { Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg; if (primitive_polygonal && normalized_depth_control.backface_enable) { - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (!device_portability_subset_features || - device_portability_subset_features->separateStencilMaskRef) { + if (GetVulkanProvider().device_info().separateStencilMaskRef) { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; + } else { // Choose the back face values only if drawing only back faces. stencil_ref_mask_front_reg = regs.Get().cull_front ? XE_GPU_REG_RB_STENCILREFMASK_BF : XE_GPU_REG_RB_STENCILREFMASK; stencil_ref_mask_back_reg = stencil_ref_mask_front_reg; - } else { - stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; - stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; } } else { stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; @@ -3681,12 +3671,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } // Texture host swizzle in the shader. - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (device_portability_subset_features && - !device_portability_subset_features->imageViewFormatSwizzle) { + if (!GetVulkanProvider().device_info().imageViewFormatSwizzle) { uint32_t textures_remaining = used_texture_mask; uint32_t texture_index; while (xe::bit_scan_forward(textures_remaining, &texture_index)) { @@ -3968,8 +3953,8 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, kAllConstantBuffersMask) { current_graphics_descriptor_set_values_up_to_date_ &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants); - size_t uniform_buffer_alignment = size_t( - provider.device_properties().limits.minUniformBufferOffsetAlignment); + size_t uniform_buffer_alignment = + size_t(provider.device_info().minUniformBufferOffsetAlignment); // System constants. if (!(current_constant_buffers_up_to_date_ & (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem))) { @@ -4348,8 +4333,7 @@ uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); uint8_t* mapping = uniform_buffer_pool_->Request( frame_current_, size, - size_t( - provider.device_properties().limits.minUniformBufferOffsetAlignment), + size_t(provider.device_info().minUniformBufferOffsetAlignment), descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset); if (!mapping) { return nullptr; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 67aae26ed..f91cc4e6b 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -59,7 +59,7 @@ bool VulkanPipelineCache::Initialize() { RenderTargetCache::Path::kPixelShaderInterlock; shader_translator_ = std::make_unique( - SpirvShaderTranslator::Features(provider), + SpirvShaderTranslator::Features(provider.device_info()), render_target_cache_.msaa_2x_attachments_supported(), render_target_cache_.msaa_2x_no_attachments_supported(), edram_fragment_shader_interlock); @@ -471,13 +471,9 @@ void VulkanPipelineCache::WritePipelineRenderTargetDescription( render_target_out.dst_alpha_blend_factor = kBlendFactorMap[uint32_t(blend_control.alpha_destblend)]; render_target_out.alpha_blend_op = blend_control.alpha_comb_fcn; - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (device_portability_subset_features && - !device_portability_subset_features->constantAlphaColorBlendFactors) { + if (!command_processor_.GetVulkanProvider() + .device_info() + .constantAlphaColorBlendFactors) { if (blend_control.color_srcblend == xenos::BlendFactor::kConstantAlpha) { render_target_out.src_color_blend_factor = PipelineBlendFactor::kConstantColor; @@ -516,12 +512,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription( PipelineDescription& description_out) const { description_out.Reset(); - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); const RegisterFile& regs = register_file_; auto pa_su_sc_mode_cntl = regs.Get(); @@ -556,8 +548,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( break; case xenos::PrimitiveType::kTriangleFan: // The check should be performed at primitive processing time. - assert_true(!device_portability_subset_features || - device_portability_subset_features->triangleFans); + assert_true(device_info.triangleFans); primitive_topology = PipelinePrimitiveTopology::kTriangleFan; break; case xenos::PrimitiveType::kTriangleStrip: @@ -581,8 +572,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_processing_result.host_primitive_reset_enabled; description_out.depth_clamp_enable = - device_features.depthClamp && - regs.Get().clip_disable; + device_info.depthClamp && regs.Get().clip_disable; // TODO(Triang3l): Tessellation. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); @@ -597,7 +587,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( bool cull_back = pa_su_sc_mode_cntl.cull_back; description_out.cull_front = cull_front; description_out.cull_back = cull_back; - if (device_features.fillModeNonSolid) { + if (device_info.fillModeNonSolid) { xenos::PolygonType polygon_type = xenos::PolygonType::kTriangles; if (!cull_front) { polygon_type = @@ -614,11 +604,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( case xenos::PolygonType::kPoints: // When points are not supported, use lines instead, preserving // debug-like purpose. - description_out.polygon_mode = - (!device_portability_subset_features || - device_portability_subset_features->pointPolygons) - ? PipelinePolygonMode::kPoint - : PipelinePolygonMode::kLine; + description_out.polygon_mode = device_info.pointPolygons + ? PipelinePolygonMode::kPoint + : PipelinePolygonMode::kLine; break; case xenos::PolygonType::kLines: description_out.polygon_mode = PipelinePolygonMode::kLine; @@ -683,7 +671,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( // Color blending and write masks (filled only for the attachments present // in the render pass object). uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1; - if (device_features.independentBlend) { + if (device_info.independentBlend) { uint32_t render_pass_color_rts_remaining = render_pass_color_rts; uint32_t color_rt_index; while (xe::bit_scan_forward(render_pass_color_rts_remaining, @@ -779,63 +767,35 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( return false; } - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (device_portability_subset_features) { - if (description.primitive_topology == - PipelinePrimitiveTopology::kTriangleFan && - !device_portability_subset_features->triangleFans) { - return false; - } - - if (description.polygon_mode == PipelinePolygonMode::kPoint && - !device_portability_subset_features->pointPolygons) { - return false; - } - - if (!device_portability_subset_features->constantAlphaColorBlendFactors) { - uint32_t color_rts_remaining = - description.render_pass_key.depth_and_color_used >> 1; - uint32_t color_rt_index; - while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { - color_rts_remaining &= ~(uint32_t(1) << color_rt_index); - const PipelineRenderTarget& color_rt = - description.render_targets[color_rt_index]; - if (color_rt.src_color_blend_factor == - PipelineBlendFactor::kConstantAlpha || - color_rt.src_color_blend_factor == - PipelineBlendFactor::kOneMinusConstantAlpha || - color_rt.dst_color_blend_factor == - PipelineBlendFactor::kConstantAlpha || - color_rt.dst_color_blend_factor == - PipelineBlendFactor::kOneMinusConstantAlpha) { - return false; - } - } - } - } - - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - - if (!device_features.geometryShader && + if (!device_info.geometryShader && description.geometry_shader != PipelineGeometryShader::kNone) { return false; } - if (!device_features.depthClamp && description.depth_clamp_enable) { + if (!device_info.triangleFans && + description.primitive_topology == + PipelinePrimitiveTopology::kTriangleFan) { return false; } - if (!device_features.fillModeNonSolid && + if (!device_info.depthClamp && description.depth_clamp_enable) { + return false; + } + + if (!device_info.pointPolygons && + description.polygon_mode == PipelinePolygonMode::kPoint) { + return false; + } + + if (!device_info.fillModeNonSolid && description.polygon_mode != PipelinePolygonMode::kFill) { return false; } - if (!device_features.independentBlend) { + if (!device_info.independentBlend) { uint32_t color_rts_remaining = description.render_pass_key.depth_and_color_used >> 1; uint32_t first_color_rt_index; @@ -865,6 +825,27 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( } } + if (!device_info.constantAlphaColorBlendFactors) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.src_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha) { + return false; + } + } + } + return true; } @@ -1913,7 +1894,8 @@ bool VulkanPipelineCache::EnsurePipelineCreated( const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); bool edram_fragment_shader_interlock = render_target_cache_.GetPath() == @@ -2222,7 +2204,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( } color_blend_attachment.colorWriteMask = VkColorComponentFlags(color_rt.color_write_mask); - if (!device_features.independentBlend) { + if (!device_info.independentBlend) { // For non-independent blend, the pAttachments element for the first // actually used color will be replicated into all. break; @@ -2231,7 +2213,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( } color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used); color_blend_state.pAttachments = color_blend_attachments; - if (color_rts_used && !device_features.independentBlend) { + if (color_rts_used && !device_info.independentBlend) { // "If the independent blending feature is not enabled, all elements of // pAttachments must be identical." uint32_t first_color_rt_index; diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index 86b13b4ae..f4898acd8 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -27,18 +27,12 @@ namespace vulkan { VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } bool VulkanPrimitiveProcessor::Initialize() { - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (!InitializeCommon(device_features.fullDrawIndexUint32, - !device_portability_subset_features || - device_portability_subset_features->triangleFans, - false, device_features.geometryShader, - device_features.geometryShader, - device_features.geometryShader)) { + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); + if (!InitializeCommon(device_info.fullDrawIndexUint32, + device_info.triangleFans, false, + device_info.geometryShader, device_info.geometryShader, + device_info.geometryShader)) { Shutdown(); return false; } diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index e5b17ef5f..bf1cda68d 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -213,8 +213,8 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) { VkPhysicalDevice physical_device = provider.physical_device(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); if (cvars::render_target_path_vulkan == "fsi") { path_ = Path::kPixelShaderInterlock; @@ -226,11 +226,6 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) { // OpenGL ES 3.1. Thus, it's fine to demand a wide range of other optional // features for the fragment shader interlock backend to work. if (path_ == Path::kPixelShaderInterlock) { - const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT& - device_fragment_shader_interlock_features = - provider.device_fragment_shader_interlock_features(); - const VkPhysicalDeviceFeatures& device_features = - provider.device_features(); // Interlocking between fragments with common sample coverage is enough, but // interlocking more is acceptable too (fragmentShaderShadingRateInterlock // would be okay too, but it's unlikely that an implementation would @@ -248,16 +243,13 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) { // between, for instance, the ability to vfetch and memexport in fragment // shaders, and the usage of fragment shader interlock, prefer the former // for simplicity. - if (!provider.device_extensions().ext_fragment_shader_interlock || - !(device_fragment_shader_interlock_features - .fragmentShaderSampleInterlock || - device_fragment_shader_interlock_features - .fragmentShaderPixelInterlock) || - !device_features.fragmentStoresAndAtomics || - !device_features.sampleRateShading || - !device_limits.standardSampleLocations || + if (!(device_info.fragmentShaderSampleInterlock || + device_info.fragmentShaderPixelInterlock) || + !device_info.fragmentStoresAndAtomics || + !device_info.sampleRateShading || + !device_info.standardSampleLocations || shared_memory_binding_count >= - device_limits.maxDescriptorSetStorageBuffers) { + device_info.maxPerStageDescriptorStorageBuffers) { path_ = Path::kHostRenderTargets; } } @@ -279,18 +271,17 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) { if (cvars::native_2x_msaa) { // Multisampled integer sampled images are optional in Vulkan and in Xenia. msaa_2x_attachments_supported_ = - (device_limits.framebufferColorSampleCounts & - device_limits.framebufferDepthSampleCounts & - device_limits.framebufferStencilSampleCounts & - device_limits.sampledImageColorSampleCounts & - device_limits.sampledImageDepthSampleCounts & - device_limits.sampledImageStencilSampleCounts & - VK_SAMPLE_COUNT_2_BIT) && - (device_limits.sampledImageIntegerSampleCounts & + (device_info.framebufferColorSampleCounts & + device_info.framebufferDepthSampleCounts & + device_info.framebufferStencilSampleCounts & + device_info.sampledImageColorSampleCounts & + device_info.sampledImageDepthSampleCounts & + device_info.sampledImageStencilSampleCounts & VK_SAMPLE_COUNT_2_BIT) && + (device_info.sampledImageIntegerSampleCounts & (VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT)) != VK_SAMPLE_COUNT_4_BIT; msaa_2x_no_attachments_supported_ = - (device_limits.framebufferNoAttachmentsSampleCounts & + (device_info.framebufferNoAttachmentsSampleCounts & VK_SAMPLE_COUNT_2_BIT) != 0; } else { msaa_2x_attachments_supported_ = false; @@ -847,10 +838,10 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) { fsi_framebuffer_create_info.pAttachments = nullptr; fsi_framebuffer_create_info.width = std::min( xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_x(), - device_limits.maxFramebufferWidth); + device_info.maxFramebufferWidth); fsi_framebuffer_create_info.height = std::min( xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_y(), - device_limits.maxFramebufferHeight); + device_info.maxFramebufferHeight); fsi_framebuffer_create_info.layers = 1; if (dfn.vkCreateFramebuffer(device, &fsi_framebuffer_create_info, nullptr, &fsi_framebuffer_.framebuffer) != VK_SUCCESS) { @@ -1680,17 +1671,17 @@ VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() { } uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const { - const VkPhysicalDeviceLimits& device_limits = - command_processor_.GetVulkanProvider().device_properties().limits; - return std::min(device_limits.maxFramebufferWidth, - device_limits.maxImageDimension2D); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); + return std::min(device_info.maxFramebufferWidth, + device_info.maxImageDimension2D); } uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const { - const VkPhysicalDeviceLimits& device_limits = - command_processor_.GetVulkanProvider().device_properties().limits; - return std::min(device_limits.maxFramebufferHeight, - device_limits.maxImageDimension2D); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); + return std::min(device_info.maxFramebufferHeight, + device_info.maxImageDimension2D); } RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( @@ -2084,8 +2075,8 @@ VulkanRenderTargetCache::GetHostRenderTargetsFramebuffer( command_processor_.GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); VkRenderPass render_pass = GetHostRenderTargetsRenderPass(render_pass_key); if (render_pass == VK_NULL_HANDLE) { @@ -2134,9 +2125,9 @@ VulkanRenderTargetCache::GetHostRenderTargetsFramebuffer( // there's no limit imposed by the sizes of the attachments that have been // created successfully. host_extent.width = std::min(host_extent.width * draw_resolution_scale_x(), - device_limits.maxFramebufferWidth); + device_info.maxFramebufferWidth); host_extent.height = std::min(host_extent.height * draw_resolution_scale_y(), - device_limits.maxFramebufferHeight); + device_info.maxFramebufferHeight); framebuffer_create_info.width = host_extent.width; framebuffer_create_info.height = host_extent.height; framebuffer_create_info.layers = 1; @@ -2161,7 +2152,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); std::vector id_vector_temp; std::vector uint_vector_temp; @@ -2249,7 +2241,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( // Outputs. bool shader_uses_stencil_reference_output = mode.output == TransferOutput::kDepth && - provider.device_extensions().ext_shader_stencil_export; + provider.device_info().ext_VK_EXT_shader_stencil_export; bool dest_color_is_uint = false; uint32_t dest_color_component_count = 0; spv::Id type_fragment_data_component = spv::NoResult; @@ -2485,7 +2477,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( spv::Id input_sample_id = spv::NoResult; spv::Id spec_const_sample_id = spv::NoResult; if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { - if (device_features.sampleRateShading) { + if (device_info.sampleRateShading) { // One draw for all samples. builder.addCapability(spv::CapabilitySampleRateShading); input_sample_id = builder.createVariable( @@ -2579,7 +2571,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( // Load the destination sample index. spv::Id dest_sample_id = spv::NoResult; if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { - if (device_features.sampleRateShading) { + if (device_info.sampleRateShading) { assert_true(input_sample_id != spv::NoResult); dest_sample_id = builder.createUnaryOp( spv::OpBitcast, type_uint, @@ -4242,12 +4234,13 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines( command_processor_.GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); uint32_t dest_sample_count = uint32_t(1) << uint32_t(key.shader_key.dest_msaa_samples); bool dest_is_masked_sample = - dest_sample_count > 1 && !device_features.sampleRateShading; + dest_sample_count > 1 && !device_info.sampleRateShading; VkPipelineShaderStageCreateInfo shader_stages[2]; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -4339,7 +4332,7 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines( ? VK_SAMPLE_COUNT_4_BIT : VkSampleCountFlagBits(dest_sample_count); if (dest_sample_count > 1) { - if (device_features.sampleRateShading) { + if (device_info.sampleRateShading) { multisample_state.sampleShadingEnable = VK_TRUE; multisample_state.minSampleShading = 1.0f; if (dest_sample_count == 2 && !msaa_2x_attachments_supported_) { @@ -4370,7 +4363,7 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines( : VK_COMPARE_OP_ALWAYS; } if ((mode.output == TransferOutput::kDepth && - provider.device_extensions().ext_shader_stencil_export) || + provider.device_info().ext_VK_EXT_shader_stencil_export) || mode.output == TransferOutput::kStencilBit) { depth_stencil_state.stencilTestEnable = VK_TRUE; depth_stencil_state.front.failOp = VK_STENCIL_OP_KEEP; @@ -4398,7 +4391,7 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines( 32 - xe::lzcnt(key.render_pass_key.depth_and_color_used >> 1); color_blend_state.pAttachments = color_blend_attachments; if (mode.output == TransferOutput::kColor) { - if (device_features.independentBlend) { + if (device_info.independentBlend) { // State the intention more explicitly. color_blend_attachments[key.shader_key.dest_color_rt_index] .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | @@ -4505,13 +4498,8 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears( const Transfer::Rectangle* resolve_clear_rectangle) { assert_true(GetPath() == Path::kHostRenderTargets); - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - bool shader_stencil_export = - provider.device_extensions().ext_shader_stencil_export; + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); uint64_t current_submission = command_processor_.GetCurrentSubmission(); DeferredCommandBuffer& command_buffer = command_processor_.deferred_command_buffer(); @@ -4826,7 +4814,7 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears( // Gather shader keys and sort to reduce pipeline state and binding // switches. Also gather stencil rectangles to clear if needed. bool need_stencil_bit_draws = - dest_rt_key.is_depth && !shader_stencil_export; + dest_rt_key.is_depth && !device_info.ext_VK_EXT_shader_stencil_export; current_transfer_invocations_.clear(); current_transfer_invocations_.reserve( current_transfers.size() << uint32_t(need_stencil_bit_draws)); @@ -5018,10 +5006,10 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears( transfer_viewport.y = 0.0f; transfer_viewport.width = float(std::min(xe::next_pow2(transfer_framebuffer->host_extent.width), - device_limits.maxViewportDimensions[0])); + device_info.maxViewportDimensions[0])); transfer_viewport.height = float( std::min(xe::next_pow2(transfer_framebuffer->host_extent.height), - device_limits.maxViewportDimensions[1])); + device_info.maxViewportDimensions[1])); transfer_viewport.minDepth = 0.0f; transfer_viewport.maxDepth = 1.0f; command_processor_.SetViewport(transfer_viewport); @@ -5072,7 +5060,7 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears( kTransferPipelineLayoutInfos[size_t( transfer_pipeline_layout_index)]; uint32_t transfer_sample_pipeline_count = - device_features.sampleRateShading + device_info.sampleRateShading ? 1 : uint32_t(1) << uint32_t(dest_rt_key.msaa_samples); bool transfer_is_stencil_bit = diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index c321b9840..4501adb5c 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -51,7 +51,8 @@ bool VulkanSharedMemory::Initialize() { command_processor_.GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); const VkBufferCreateFlags sparse_flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT | @@ -69,16 +70,14 @@ bool VulkanSharedMemory::Initialize() { buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_create_info.queueFamilyIndexCount = 0; buffer_create_info.pQueueFamilyIndices = nullptr; - if (cvars::vulkan_sparse_shared_memory && - provider.IsSparseBindingSupported() && - device_features.sparseResidencyBuffer) { + if (cvars::vulkan_sparse_shared_memory && device_info.sparseResidencyBuffer) { if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) == VK_SUCCESS) { VkMemoryRequirements buffer_memory_requirements; dfn.vkGetBufferMemoryRequirements(device, buffer_, &buffer_memory_requirements); if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & - provider.memory_types_device_local(), + device_info.memory_types_device_local, &buffer_memory_type_)) { uint32_t allocation_size_log2; xe::bit_scan_forward( @@ -131,7 +130,7 @@ bool VulkanSharedMemory::Initialize() { dfn.vkGetBufferMemoryRequirements(device, buffer_, &buffer_memory_requirements); if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & - provider.memory_types_device_local(), + device_info.memory_types_device_local, &buffer_memory_type_)) { XELOGE( "Shared memory: Failed to get a device-local Vulkan memory type for " @@ -147,15 +146,15 @@ bool VulkanSharedMemory::Initialize() { buffer_memory_allocate_info.allocationSize = buffer_memory_requirements.size; buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_; - VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info; - if (provider.device_extensions().khr_dedicated_allocation) { + VkMemoryDedicatedAllocateInfo buffer_memory_dedicated_allocate_info; + if (provider.device_info().ext_1_1_VK_KHR_dedicated_allocation) { buffer_memory_allocate_info_last->pNext = &buffer_memory_dedicated_allocate_info; buffer_memory_allocate_info_last = reinterpret_cast( &buffer_memory_dedicated_allocate_info); buffer_memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; buffer_memory_dedicated_allocate_info.pNext = nullptr; buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; buffer_memory_dedicated_allocate_info.buffer = buffer_; @@ -366,7 +365,7 @@ bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange( VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; - if (provider.device_features().tessellationShader) { + if (provider.device_info().tessellationShader) { bind_wait_stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; } diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 014f9abe2..e056c606c 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -144,17 +144,17 @@ const VulkanTextureCache::HostFormatPair xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, true}, // k_Cr_Y1_Cb_Y0_REP - // VK_FORMAT_G8B8G8R8_422_UNORM_KHR (added in + // VK_FORMAT_G8B8G8R8_422_UNORM (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. - {{kLoadShaderIndex32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, true}, + {{kLoadShaderIndex32bpb, VK_FORMAT_G8B8G8R8_422_UNORM, true}, {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_Y1_Cr_Y0_Cb_REP - // VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in + // VK_FORMAT_B8G8R8G8_422_UNORM (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. - {{kLoadShaderIndex32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, true}, + {{kLoadShaderIndex32bpb, VK_FORMAT_B8G8R8G8_422_UNORM, true}, {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_16_16_EDRAM @@ -778,15 +778,15 @@ VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters, // kClampToEdge VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // kMirrorClampToEdge - VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, // kClampToHalfway VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // kMirrorClampToHalfway - VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, // kClampToBorder VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // kMirrorClampToBorder - VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, }; sampler_create_info.addressModeU = kAddressModeMap[uint32_t(parameters.clamp_x)]; @@ -938,19 +938,17 @@ uint32_t VulkanTextureCache::GetHostFormatSwizzle(TextureKey key) const { uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight( xenos::DataDimension dimension) const { - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); switch (dimension) { case xenos::DataDimension::k1D: case xenos::DataDimension::k2DOrStacked: // 1D and 2D are emulated as 2D arrays. - return device_limits.maxImageDimension2D; + return device_info.maxImageDimension2D; case xenos::DataDimension::k3D: - return device_limits.maxImageDimension3D; + return device_info.maxImageDimension3D; case xenos::DataDimension::kCube: - return device_limits.maxImageDimensionCube; + return device_info.maxImageDimensionCube; default: assert_unhandled_case(dimension); return 0; @@ -959,17 +957,15 @@ uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight( uint32_t VulkanTextureCache::GetMaxHostTextureDepthOrArraySize( xenos::DataDimension dimension) const { - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + command_processor_.GetVulkanProvider().device_info(); switch (dimension) { case xenos::DataDimension::k1D: case xenos::DataDimension::k2DOrStacked: // 1D and 2D are emulated as 2D arrays. - return device_limits.maxImageArrayLayers; + return device_info.maxImageArrayLayers; case xenos::DataDimension::k3D: - return device_limits.maxImageDimension3D; + return device_info.maxImageDimension3D; case xenos::DataDimension::kCube: // Not requesting the imageCubeArray feature, and the Xenos doesn't // support cube map arrays. @@ -1049,14 +1045,14 @@ std::unique_ptr VulkanTextureCache::CreateTexture( image_create_info.queueFamilyIndexCount = 0; image_create_info.pQueueFamilyIndices = nullptr; image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImageFormatListCreateInfoKHR image_format_list_create_info; + VkImageFormatListCreateInfo image_format_list_create_info; if (formats[1] != VK_FORMAT_UNDEFINED && - provider.device_extensions().khr_image_format_list) { + provider.device_info().ext_1_2_VK_KHR_image_format_list) { image_create_info_last->pNext = &image_format_list_create_info; image_create_info_last = reinterpret_cast(&image_format_list_create_info); image_format_list_create_info.sType = - VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR; + VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO; image_format_list_create_info.pNext = nullptr; image_format_list_create_info.viewFormatCount = 2; image_format_list_create_info.pViewFormats = formats; @@ -1635,11 +1631,7 @@ VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, const ui::vulkan::VulkanProvider& provider = vulkan_texture_cache.command_processor_.GetVulkanProvider(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); - if (device_portability_subset_features && - !device_portability_subset_features->imageViewFormatSwizzle) { + if (!provider.device_info().imageViewFormatSwizzle) { host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA; } view_key.host_swizzle = host_swizzle; @@ -1716,9 +1708,8 @@ bool VulkanTextureCache::Initialize() { VkPhysicalDevice physical_device = provider.physical_device(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider.device_portability_subset_features(); + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + provider.device_info(); // Vulkan Memory Allocator. @@ -2476,15 +2467,15 @@ bool VulkanTextureCache::Initialize() { null_image_memory_requirements_2d_array_cube_.size; null_image_memory_allocate_info.memoryTypeIndex = null_image_memory_type_2d_array_cube_; - VkMemoryDedicatedAllocateInfoKHR null_image_memory_dedicated_allocate_info; - if (provider.device_extensions().khr_dedicated_allocation) { + VkMemoryDedicatedAllocateInfo null_image_memory_dedicated_allocate_info; + if (device_info.ext_1_1_VK_KHR_dedicated_allocation) { null_image_memory_allocate_info_last->pNext = &null_image_memory_dedicated_allocate_info; null_image_memory_allocate_info_last = reinterpret_cast( &null_image_memory_dedicated_allocate_info); null_image_memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; null_image_memory_dedicated_allocate_info.pNext = nullptr; null_image_memory_dedicated_allocate_info.image = null_image_2d_array_cube_; @@ -2538,10 +2529,8 @@ bool VulkanTextureCache::Initialize() { // constant components instead of the real texels. The image will be cleared // to (0, 0, 0, 0) anyway. VkComponentSwizzle null_image_view_swizzle = - (!device_portability_subset_features || - device_portability_subset_features->imageViewFormatSwizzle) - ? VK_COMPONENT_SWIZZLE_ZERO - : VK_COMPONENT_SWIZZLE_IDENTITY; + device_info.imageViewFormatSwizzle ? VK_COMPONENT_SWIZZLE_ZERO + : VK_COMPONENT_SWIZZLE_IDENTITY; null_image_view_create_info.components.r = null_image_view_swizzle; null_image_view_create_info.components.g = null_image_view_swizzle; null_image_view_create_info.components.b = null_image_view_swizzle; @@ -2574,10 +2563,6 @@ bool VulkanTextureCache::Initialize() { // Samplers. - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - const VkPhysicalDeviceLimits& device_limits = - provider.device_properties().limits; - // Some MoltenVK devices have a maximum of 2048, 1024, or even 96 samplers, // below Vulkan's minimum requirement of 4000. // Assuming that the current VulkanTextureCache is the only one on this @@ -2585,15 +2570,14 @@ bool VulkanTextureCache::Initialize() { // allocation slots exclusively. // Also leaving a few slots for use by things like overlay applications. sampler_max_count_ = - device_limits.maxSamplerAllocationCount - + device_info.maxSamplerAllocationCount - uint32_t(ui::vulkan::VulkanProvider::HostSampler::kCount) - 16; - if (device_features.samplerAnisotropy) { + if (device_info.samplerAnisotropy) { max_anisotropy_ = xenos::AnisoFilter( uint32_t(xenos::AnisoFilter::kMax_1_1) + - (31 - - xe::lzcnt(uint32_t(std::min( - 16.0f, std::max(1.0f, device_limits.maxSamplerAnisotropy)))))); + (31 - xe::lzcnt(uint32_t(std::min( + 16.0f, std::max(1.0f, device_info.maxSamplerAnisotropy)))))); } else { max_anisotropy_ = xenos::AnisoFilter::kDisabled; } @@ -2656,10 +2640,12 @@ xenos::ClampMode VulkanTextureCache::NormalizeClampMode( if (clamp_mode == xenos::ClampMode::kMirrorClampToEdge || clamp_mode == xenos::ClampMode::kMirrorClampToHalfway || clamp_mode == xenos::ClampMode::kMirrorClampToBorder) { - // TODO(Triang3l): VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR if - // VK_KHR_sampler_mirror_clamp_to_edge (or Vulkan 1.2) and the - // samplerMirrorClampToEdge feature are supported. - return xenos::ClampMode::kMirroredRepeat; + // No equivalents for anything other than kMirrorClampToEdge in Vulkan. + return command_processor_.GetVulkanProvider() + .device_info() + .samplerMirrorClampToEdge + ? xenos::ClampMode::kMirrorClampToEdge + : xenos::ClampMode::kMirroredRepeat; } return clamp_mode; } diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 162474762..0a7f2ae8e 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -866,9 +866,6 @@ bool VulkanImmediateDrawer::CreateTextureResource( size_t& pending_upload_index_out) { const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); VkDevice device = provider_.device(); - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features = - provider_.device_portability_subset_features(); // Create the image and the descriptor. @@ -913,8 +910,7 @@ bool VulkanImmediateDrawer::CreateTextureResource( // data == nullptr is a special case for (1, 1, 1, 1), though the image will // be cleared to (1, 1, 1, 1) anyway, just a micro-optimization. VkComponentSwizzle swizzle = - (data || (device_portability_subset_features && - !device_portability_subset_features->imageViewFormatSwizzle)) + (data || !provider_.device_info().imageViewFormatSwizzle) ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_ONE; image_view_create_info.components.r = swizzle; diff --git a/src/xenia/ui/vulkan/vulkan_mem_alloc.cc b/src/xenia/ui/vulkan/vulkan_mem_alloc.cc index d3be16c5f..688b2ff7c 100644 --- a/src/xenia/ui/vulkan/vulkan_mem_alloc.cc +++ b/src/xenia/ui/vulkan/vulkan_mem_alloc.cc @@ -27,8 +27,7 @@ VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); const VulkanProvider::InstanceExtensions& instance_extensions = provider.instance_extensions(); - const VulkanProvider::DeviceExtensions& device_extensions = - provider.device_extensions(); + const VulkanProvider::DeviceInfo& device_info = provider.device_info(); VmaVulkanFunctions vma_vulkan_functions = {}; VmaAllocatorCreateInfo allocator_create_info = {}; @@ -58,31 +57,33 @@ VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, vma_vulkan_functions.vkCreateImage = dfn.vkCreateImage; vma_vulkan_functions.vkDestroyImage = dfn.vkDestroyImage; vma_vulkan_functions.vkCmdCopyBuffer = dfn.vkCmdCopyBuffer; - if (device_extensions.khr_get_memory_requirements2) { + if (device_info.ext_1_1_VK_KHR_get_memory_requirements2) { vma_vulkan_functions.vkGetBufferMemoryRequirements2KHR = - dfn.vkGetBufferMemoryRequirements2KHR; + dfn.vkGetBufferMemoryRequirements2; vma_vulkan_functions.vkGetImageMemoryRequirements2KHR = - dfn.vkGetImageMemoryRequirements2KHR; - if (device_extensions.khr_dedicated_allocation) { + dfn.vkGetImageMemoryRequirements2; + if (device_info.ext_1_1_VK_KHR_dedicated_allocation) { allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; } } - if (device_extensions.khr_bind_memory2) { - vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2KHR; - vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2KHR; + if (device_info.ext_1_1_VK_KHR_bind_memory2) { + vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2; + vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2; allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; } if (instance_extensions.khr_get_physical_device_properties2) { vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties2KHR = - ifn.vkGetPhysicalDeviceMemoryProperties2KHR; - if (device_extensions.ext_memory_budget) { + ifn.vkGetPhysicalDeviceMemoryProperties2; + if (device_info.ext_VK_EXT_memory_budget) { allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; } } - if (device_extensions.khr_maintenance4) { + if (device_info.ext_1_3_VK_KHR_maintenance4) { + vma_vulkan_functions.vkGetDeviceBufferMemoryRequirements = + dfn.vkGetDeviceBufferMemoryRequirements; vma_vulkan_functions.vkGetDeviceImageMemoryRequirements = - dfn.vkGetDeviceImageMemoryRequirementsKHR; + dfn.vkGetDeviceImageMemoryRequirements; } if (externally_synchronized) { @@ -93,8 +94,7 @@ VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, allocator_create_info.device = provider.device(); allocator_create_info.pVulkanFunctions = &vma_vulkan_functions; allocator_create_info.instance = provider.instance(); - allocator_create_info.vulkanApiVersion = - provider.device_properties().apiVersion; + allocator_create_info.vulkanApiVersion = device_info.apiVersion; VmaAllocator allocator; if (vmaCreateAllocator(&allocator_create_info, &allocator) != VK_SUCCESS) { XELOGE("Failed to create a Vulkan Memory Allocator instance"); diff --git a/src/xenia/ui/vulkan/vulkan_presenter.cc b/src/xenia/ui/vulkan/vulkan_presenter.cc index a1551a6b7..c158415c4 100644 --- a/src/xenia/ui/vulkan/vulkan_presenter.cc +++ b/src/xenia/ui/vulkan/vulkan_presenter.cc @@ -208,7 +208,7 @@ VulkanPresenter::~VulkanPresenter() { } Surface::TypeFlags VulkanPresenter::GetSupportedSurfaceTypes() const { - if (!provider_.device_extensions().khr_swapchain) { + if (!provider_.device_info().ext_VK_KHR_swapchain) { return 0; } return GetSurfaceTypesSupportedByInstance(provider_.instance_extensions()); diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index a1ffd3e61..96265b223 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,8 @@ bool VulkanProvider::Initialize() { VkInstanceCreateInfo instance_create_info; instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instance_create_info.pNext = nullptr; + // TODO(Triang3l): Enumerate portability subset devices using + // VK_KHR_portability_enumeration when ready. instance_create_info.flags = 0; instance_create_info.pApplicationInfo = &application_info; instance_create_info.enabledLayerCount = uint32_t(layers_enabled.size()); @@ -353,13 +356,13 @@ bool VulkanProvider::Initialize() { #define XE_UI_VULKAN_FUNCTION(name) \ functions_loaded &= (ifn_.name = PFN_##name(lfn_.vkGetInstanceProcAddr( \ instance_, #name))) != nullptr; -#define XE_UI_VULKAN_FUNCTION_DONT_PROMOTE(extension_name, core_name) \ - functions_loaded &= \ - (ifn_.extension_name = PFN_##extension_name(lfn_.vkGetInstanceProcAddr( \ +#define XE_UI_VULKAN_FUNCTION_DONT_PROMOTE(extension_name, core_name) \ + functions_loaded &= \ + (ifn_.core_name = PFN_##core_name(lfn_.vkGetInstanceProcAddr( \ instance_, #extension_name))) != nullptr; #define XE_UI_VULKAN_FUNCTION_PROMOTE(extension_name, core_name) \ functions_loaded &= \ - (ifn_.extension_name = PFN_##extension_name( \ + (ifn_.core_name = PFN_##core_name( \ lfn_.vkGetInstanceProcAddr(instance_, #core_name))) != nullptr; // Core - require unconditionally. { @@ -535,616 +538,20 @@ bool VulkanProvider::Initialize() { physical_device_index_first = 0; physical_device_index_last = physical_devices.size() - 1; } - physical_device_ = VK_NULL_HANDLE; - std::vector queue_families_properties; - std::vector device_extension_properties; - std::vector device_extensions_enabled; for (size_t i = physical_device_index_first; i <= physical_device_index_last; ++i) { - VkPhysicalDevice physical_device_current = physical_devices[i]; - - // Get physical device features. Need this before obtaining the queues as - // sparse binding is an optional feature. - ifn_.vkGetPhysicalDeviceFeatures(physical_device_current, - &device_features_); - - // Get the needed queues: - // - Graphics and compute. - // - Sparse binding if used (preferably the same as the graphics and compute - // one for the lowest latency as Xenia submits sparse binding commands - // right before graphics commands anyway). - // - Additional queues for presentation as VulkanProvider may be used with - // different surfaces, and they may have varying support of presentation - // from different queue families. - uint32_t queue_family_count = 0; - ifn_.vkGetPhysicalDeviceQueueFamilyProperties(physical_device_current, - &queue_family_count, nullptr); - queue_families_properties.resize(queue_family_count); - ifn_.vkGetPhysicalDeviceQueueFamilyProperties( - physical_device_current, &queue_family_count, - queue_families_properties.data()); - assert_true(queue_family_count == queue_families_properties.size()); - // Initialize all queue families to unused. - queue_families_.clear(); - queue_families_.resize(queue_family_count); - // First, try to obtain a graphics and compute queue. Preferably find a - // queue with sparse binding support as well. - // The family indices here are listed from the best to the worst. - uint32_t queue_family_graphics_compute_sparse_binding = UINT32_MAX; - uint32_t queue_family_graphics_compute_only = UINT32_MAX; - for (uint32_t j = 0; j < queue_family_count; ++j) { - const VkQueueFamilyProperties& queue_family_properties = - queue_families_properties[j]; - if ((queue_family_properties.queueFlags & - (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) != - (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) { - continue; - } - uint32_t* queue_family_ptr; - if (device_features_.sparseBinding && - (queue_family_properties.queueFlags & VK_QUEUE_SPARSE_BINDING_BIT)) { - queue_family_ptr = &queue_family_graphics_compute_sparse_binding; - } else { - queue_family_ptr = &queue_family_graphics_compute_only; - } - if (*queue_family_ptr == UINT32_MAX) { - *queue_family_ptr = j; - } + physical_device_ = physical_devices[i]; + TryCreateDevice(); + if (device_ != VK_NULL_HANDLE) { + break; } - if (queue_family_graphics_compute_sparse_binding != UINT32_MAX) { - assert_true(device_features_.sparseBinding); - queue_family_graphics_compute_ = - queue_family_graphics_compute_sparse_binding; - } else if (queue_family_graphics_compute_only != UINT32_MAX) { - queue_family_graphics_compute_ = queue_family_graphics_compute_only; - } else { - // No graphics and compute queue family. - continue; - } - // Mark the graphics and compute queue as requested. - queue_families_[queue_family_graphics_compute_].queue_count = - std::max(queue_families_[queue_family_graphics_compute_].queue_count, - uint32_t(1)); - // Request a separate sparse binding queue if needed. - queue_family_sparse_binding_ = UINT32_MAX; - if (device_features_.sparseBinding) { - if (queue_families_properties[queue_family_graphics_compute_].queueFlags & - VK_QUEUE_SPARSE_BINDING_BIT) { - queue_family_sparse_binding_ = queue_family_graphics_compute_; - } else { - for (uint32_t j = 0; j < queue_family_count; ++j) { - if (!(queue_families_properties[j].queueFlags & - VK_QUEUE_SPARSE_BINDING_BIT)) { - continue; - } - queue_family_sparse_binding_ = j; - queue_families_[j].queue_count = - std::max(queue_families_[j].queue_count, uint32_t(1)); - break; - } - } - // Don't expose, and disable during logical device creature, the sparse - // binding feature if failed to obtain a queue supporting it. - if (queue_family_sparse_binding_ == UINT32_MAX) { - device_features_.sparseBinding = VK_FALSE; - } - } - bool any_queue_potentially_supports_present = false; - if (instance_extensions_.khr_surface) { - // Request possible presentation queues. - for (uint32_t j = 0; j < queue_family_count; ++j) { -#if XE_PLATFORM_WIN32 - if (instance_extensions_.khr_win32_surface && - !ifn_.vkGetPhysicalDeviceWin32PresentationSupportKHR( - physical_device_current, j)) { - continue; - } -#endif - any_queue_potentially_supports_present = true; - QueueFamily& queue_family = queue_families_[j]; - queue_family.queue_count = - std::max(queue_families_[j].queue_count, uint32_t(1)); - queue_family.potentially_supports_present = true; - } - } - if (!any_queue_potentially_supports_present && is_surface_required_) { - continue; - } - - // Get device properties, will be needed to check if extensions have been - // promoted to core. - ifn_.vkGetPhysicalDeviceProperties(physical_device_current, - &device_properties_); - - // Get the extensions, check if swapchain is supported. - device_extension_properties.clear(); - VkResult device_extensions_enumerate_result; - for (;;) { - uint32_t device_extension_count = - uint32_t(device_extension_properties.size()); - bool device_extensions_were_empty = !device_extension_count; - device_extensions_enumerate_result = - ifn_.vkEnumerateDeviceExtensionProperties( - physical_device_current, nullptr, &device_extension_count, - device_extensions_were_empty - ? nullptr - : device_extension_properties.data()); - // If the original extension count was 0 (first call), SUCCESS is - // returned, not INCOMPLETE. - if (device_extensions_enumerate_result == VK_SUCCESS || - device_extensions_enumerate_result == VK_INCOMPLETE) { - device_extension_properties.resize(device_extension_count); - if (device_extensions_enumerate_result == VK_SUCCESS && - (!device_extensions_were_empty || !device_extension_count)) { - break; - } - } else { - break; - } - } - if (device_extensions_enumerate_result != VK_SUCCESS) { - continue; - } - std::memset(&device_extensions_, 0, sizeof(device_extensions_)); - if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { - device_extensions_.khr_bind_memory2 = true; - device_extensions_.khr_dedicated_allocation = true; - device_extensions_.khr_get_memory_requirements2 = true; - device_extensions_.khr_sampler_ycbcr_conversion = true; - if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { - device_extensions_.khr_image_format_list = true; - device_extensions_.khr_shader_float_controls = true; - device_extensions_.khr_spirv_1_4 = true; - if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { - device_extensions_.ext_shader_demote_to_helper_invocation = true; - device_extensions_.khr_maintenance4 = true; - } - } - } - device_extensions_enabled.clear(); - // Checking if already enabled as an optimization to do fewer and fewer - // string comparisons, as well as to skip adding extensions promoted to the - // core to device_extensions_enabled. Adding literals to - // device_extensions_enabled for the most C string lifetime safety. - static const std::pair kUsedDeviceExtensions[] = { - {"VK_EXT_fragment_shader_interlock", - offsetof(DeviceExtensions, ext_fragment_shader_interlock)}, - {"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)}, - {"VK_EXT_shader_demote_to_helper_invocation", - offsetof(DeviceExtensions, ext_shader_demote_to_helper_invocation)}, - {"VK_EXT_shader_stencil_export", - offsetof(DeviceExtensions, ext_shader_stencil_export)}, - {"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)}, - {"VK_KHR_dedicated_allocation", - offsetof(DeviceExtensions, khr_dedicated_allocation)}, - {"VK_KHR_get_memory_requirements2", - offsetof(DeviceExtensions, khr_get_memory_requirements2)}, - {"VK_KHR_image_format_list", - offsetof(DeviceExtensions, khr_image_format_list)}, - {"VK_KHR_maintenance4", offsetof(DeviceExtensions, khr_maintenance4)}, - {"VK_KHR_portability_subset", - offsetof(DeviceExtensions, khr_portability_subset)}, - // While vkGetPhysicalDeviceFormatProperties should be used to check the - // format support (device support for Y'CbCr formats is not required by - // this extension or by Vulkan 1.1), still adding - // VK_KHR_sampler_ycbcr_conversion to this list to enable this extension - // on the device on Vulkan 1.0. - {"VK_KHR_sampler_ycbcr_conversion", - offsetof(DeviceExtensions, khr_sampler_ycbcr_conversion)}, - {"VK_KHR_shader_float_controls", - offsetof(DeviceExtensions, khr_shader_float_controls)}, - {"VK_KHR_spirv_1_4", offsetof(DeviceExtensions, khr_spirv_1_4)}, - {"VK_KHR_swapchain", offsetof(DeviceExtensions, khr_swapchain)}, - }; - for (const VkExtensionProperties& device_extension : - device_extension_properties) { - for (const std::pair& used_device_extension : - kUsedDeviceExtensions) { - bool& device_extension_flag = *reinterpret_cast( - reinterpret_cast(&device_extensions_) + - used_device_extension.second); - if (!device_extension_flag && - !std::strcmp(device_extension.extensionName, - used_device_extension.first)) { - device_extensions_enabled.push_back(used_device_extension.first); - device_extension_flag = true; - } - } - } - if (is_surface_required_ && !device_extensions_.khr_swapchain) { - continue; - } - - // Get portability subset features. - // VK_KHR_portability_subset reduces, not increases, the capabilities, skip - // the device completely if there's no way to retrieve what is actually - // unsupported. Though VK_KHR_portability_subset requires - // VK_KHR_get_physical_device_properties2, check just in case of an - // untrustworthy driver. - if (device_extensions_.khr_portability_subset) { - if (!instance_extensions_.khr_get_physical_device_properties2) { - continue; - } - device_portability_subset_features_.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_KHR; - device_portability_subset_features_.pNext = nullptr; - VkPhysicalDeviceProperties2KHR device_properties_2; - device_properties_2.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - device_properties_2.pNext = &device_portability_subset_features_; - ifn_.vkGetPhysicalDeviceProperties2KHR(physical_device_, - &device_properties_2); - } - - // Get the memory types. - VkPhysicalDeviceMemoryProperties memory_properties; - ifn_.vkGetPhysicalDeviceMemoryProperties(physical_device_current, - &memory_properties); - memory_types_device_local_ = 0; - memory_types_host_visible_ = 0; - memory_types_host_coherent_ = 0; - memory_types_host_cached_ = 0; - for (uint32_t j = 0; j < memory_properties.memoryTypeCount; ++j) { - VkMemoryPropertyFlags memory_property_flags = - memory_properties.memoryTypes[j].propertyFlags; - uint32_t memory_type_bit = uint32_t(1) << j; - if (memory_property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { - memory_types_device_local_ |= memory_type_bit; - } - if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - memory_types_host_visible_ |= memory_type_bit; - } - if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { - memory_types_host_coherent_ |= memory_type_bit; - } - if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { - memory_types_host_cached_ |= memory_type_bit; - } - } - if (!memory_types_device_local_ && !memory_types_host_visible_) { - // Shouldn't happen according to the specification. - continue; - } - - physical_device_ = physical_device_current; - break; } - if (physical_device_ == VK_NULL_HANDLE) { - XELOGE( - "Failed to get a compatible Vulkan physical device with swapchain " - "support"); + if (device_ == VK_NULL_HANDLE) { + XELOGE("Failed to select a compatible Vulkan physical device"); + physical_device_ = VK_NULL_HANDLE; return false; } - // Get additional device properties. - std::memset(&device_float_controls_properties_, 0, - sizeof(device_float_controls_properties_)); - device_float_controls_properties_.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; - std::memset(&device_fragment_shader_interlock_features_, 0, - sizeof(device_fragment_shader_interlock_features_)); - device_fragment_shader_interlock_features_.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; - std::memset(&device_shader_demote_to_helper_invocation_features_, 0, - sizeof(device_shader_demote_to_helper_invocation_features_)); - device_shader_demote_to_helper_invocation_features_.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; - if (instance_extensions_.khr_get_physical_device_properties2) { - VkPhysicalDeviceProperties2KHR device_properties_2; - device_properties_2.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - device_properties_2.pNext = nullptr; - VkPhysicalDeviceProperties2KHR* device_properties_2_last = - &device_properties_2; - if (device_extensions_.khr_shader_float_controls) { - device_float_controls_properties_.pNext = nullptr; - device_properties_2_last->pNext = &device_float_controls_properties_; - device_properties_2_last = - reinterpret_cast( - &device_float_controls_properties_); - } - if (device_properties_2_last != &device_properties_2) { - ifn_.vkGetPhysicalDeviceProperties2KHR(physical_device_, - &device_properties_2); - } - VkPhysicalDeviceFeatures2KHR device_features_2; - device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - device_features_2.pNext = nullptr; - VkPhysicalDeviceFeatures2KHR* device_features_2_last = &device_features_2; - if (device_extensions_.ext_fragment_shader_interlock) { - device_fragment_shader_interlock_features_.pNext = nullptr; - device_features_2_last->pNext = - &device_fragment_shader_interlock_features_; - device_features_2_last = reinterpret_cast( - &device_fragment_shader_interlock_features_); - } - if (device_extensions_.ext_shader_demote_to_helper_invocation) { - device_shader_demote_to_helper_invocation_features_.pNext = nullptr; - device_features_2_last->pNext = - &device_shader_demote_to_helper_invocation_features_; - device_features_2_last = reinterpret_cast( - &device_shader_demote_to_helper_invocation_features_); - } - if (device_features_2_last != &device_features_2) { - ifn_.vkGetPhysicalDeviceFeatures2KHR(physical_device_, - &device_features_2); - } - } - - // Create the device. - std::vector queue_create_infos; - queue_create_infos.reserve(queue_families_.size()); - uint32_t used_queue_count = 0; - uint32_t max_queue_count_per_family = 0; - for (size_t i = 0; i < queue_families_.size(); ++i) { - QueueFamily& queue_family = queue_families_[i]; - queue_family.queue_first_index = used_queue_count; - if (!queue_family.queue_count) { - continue; - } - VkDeviceQueueCreateInfo& queue_create_info = - queue_create_infos.emplace_back(); - queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queue_create_info.pNext = nullptr; - queue_create_info.flags = 0; - queue_create_info.queueFamilyIndex = uint32_t(i); - queue_create_info.queueCount = queue_family.queue_count; - // pQueuePriorities will be set later based on max_queue_count_per_family. - max_queue_count_per_family = - std::max(max_queue_count_per_family, queue_family.queue_count); - used_queue_count += queue_family.queue_count; - } - std::vector queue_priorities; - queue_priorities.resize(max_queue_count_per_family, 1.0f); - for (VkDeviceQueueCreateInfo& queue_create_info : queue_create_infos) { - queue_create_info.pQueuePriorities = queue_priorities.data(); - } - VkDeviceCreateInfo device_create_info; - device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - device_create_info.pNext = nullptr; - VkDeviceCreateInfo* device_create_info_last = &device_create_info; - device_create_info.flags = 0; - device_create_info.queueCreateInfoCount = uint32_t(queue_create_infos.size()); - device_create_info.pQueueCreateInfos = queue_create_infos.data(); - // Device layers are deprecated - using validation layer on the instance. - device_create_info.enabledLayerCount = 0; - device_create_info.ppEnabledLayerNames = nullptr; - device_create_info.enabledExtensionCount = - uint32_t(device_extensions_enabled.size()); - device_create_info.ppEnabledExtensionNames = device_extensions_enabled.data(); - // TODO(Triang3l): Enable only needed features. - device_create_info.pEnabledFeatures = &device_features_; - if (device_extensions_.khr_portability_subset) { - // TODO(Triang3l): Enable only needed portability subset features. - device_portability_subset_features_.pNext = nullptr; - device_create_info_last->pNext = &device_portability_subset_features_; - device_create_info_last = reinterpret_cast( - &device_portability_subset_features_); - } - if (device_extensions_.ext_fragment_shader_interlock) { - // TODO(Triang3l): Enable only needed fragment shader interlock features. - device_fragment_shader_interlock_features_.pNext = nullptr; - device_create_info_last->pNext = - &device_fragment_shader_interlock_features_; - device_create_info_last = reinterpret_cast( - &device_fragment_shader_interlock_features_); - } - if (device_extensions_.ext_shader_demote_to_helper_invocation) { - device_shader_demote_to_helper_invocation_features_.pNext = nullptr; - device_create_info_last->pNext = - &device_shader_demote_to_helper_invocation_features_; - device_create_info_last = reinterpret_cast( - &device_shader_demote_to_helper_invocation_features_); - } - if (ifn_.vkCreateDevice(physical_device_, &device_create_info, nullptr, - &device_) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan device"); - return false; - } - - // Get device functions. - std::memset(&dfn_, 0, sizeof(ifn_)); - bool device_functions_loaded = true; -#define XE_UI_VULKAN_FUNCTION(name) \ - functions_loaded &= \ - (dfn_.name = PFN_##name(ifn_.vkGetDeviceProcAddr(device_, #name))) != \ - nullptr; -#define XE_UI_VULKAN_FUNCTION_DONT_PROMOTE(extension_name, core_name) \ - functions_loaded &= \ - (dfn_.extension_name = PFN_##extension_name( \ - ifn_.vkGetDeviceProcAddr(device_, #extension_name))) != nullptr; -#define XE_UI_VULKAN_FUNCTION_PROMOTE(extension_name, core_name) \ - functions_loaded &= \ - (dfn_.extension_name = PFN_##extension_name( \ - ifn_.vkGetDeviceProcAddr(device_, #core_name))) != nullptr; - // Core - require unconditionally. - { - bool functions_loaded = true; -#include "xenia/ui/vulkan/functions/device_1_0.inc" - if (!functions_loaded) { - XELOGE("Failed to get Vulkan device function pointers"); - return false; - } - } - // Extensions - disable the specific extension if failed to get its functions. - if (device_extensions_.khr_bind_memory2) { - bool functions_loaded = true; - if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { -#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE -#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" -#undef XE_UI_VULKAN_FUNCTION_PROMOTED - } else { -#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE -#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" -#undef XE_UI_VULKAN_FUNCTION_PROMOTED - } - device_extensions_.khr_bind_memory2 = functions_loaded; - } - if (device_extensions_.khr_get_memory_requirements2) { - bool functions_loaded = true; - if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { -#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE -#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" -#undef XE_UI_VULKAN_FUNCTION_PROMOTED - } else { -#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE -#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" -#undef XE_UI_VULKAN_FUNCTION_PROMOTED - } - device_extensions_.khr_get_memory_requirements2 = functions_loaded; - // VK_KHR_dedicated_allocation can still work without the dedicated - // allocation preference getter even though it requires - // VK_KHR_get_memory_requirements2 to be supported and enabled. - } - if (device_extensions_.khr_maintenance4) { - bool functions_loaded = true; - if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { -#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE -#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" -#undef XE_UI_VULKAN_FUNCTION_PROMOTED - } else { -#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE -#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" -#undef XE_UI_VULKAN_FUNCTION_PROMOTED - } - device_extensions_.khr_maintenance4 = functions_loaded; - } - if (device_extensions_.khr_swapchain) { - bool functions_loaded = true; -#include "xenia/ui/vulkan/functions/device_khr_swapchain.inc" - if (!functions_loaded) { - // Outside the physical device selection loop, so can't just skip the - // device anymore, but this shouldn't really happen anyway. - XELOGE( - "Failed to get Vulkan swapchain function pointers while swapchain " - "support is required"); - return false; - } - device_extensions_.khr_swapchain = functions_loaded; - } -#undef XE_UI_VULKAN_FUNCTION_PROMOTE -#undef XE_UI_VULKAN_FUNCTION_DONT_PROMOTE -#undef XE_UI_VULKAN_FUNCTION - if (!device_functions_loaded) { - XELOGE("Failed to get Vulkan device function pointers"); - return false; - } - - // Report device information after verifying that extension function pointers - // could be obtained. - XELOGVK( - "Vulkan device: {} (vendor {:04X}, device {:04X}, driver {:08X}, API " - "{}.{}.{})", - device_properties_.deviceName, device_properties_.vendorID, - device_properties_.deviceID, device_properties_.driverVersion, - VK_VERSION_MAJOR(device_properties_.apiVersion), - VK_VERSION_MINOR(device_properties_.apiVersion), - VK_VERSION_PATCH(device_properties_.apiVersion)); - XELOGVK("Vulkan device extensions:"); - XELOGVK("* VK_EXT_fragment_shader_interlock: {}", - device_extensions_.ext_fragment_shader_interlock ? "yes" : "no"); - if (device_extensions_.ext_fragment_shader_interlock) { - XELOGVK( - " * Sample interlock: {}", - device_fragment_shader_interlock_features_.fragmentShaderSampleInterlock - ? "yes" - : "no"); - XELOGVK( - " * Pixel interlock: {}", - device_fragment_shader_interlock_features_.fragmentShaderPixelInterlock - ? "yes" - : "no"); - } - XELOGVK("* VK_EXT_memory_budget: {}", - device_extensions_.ext_memory_budget ? "yes" : "no"); - XELOGVK( - "* VK_EXT_shader_demote_to_helper_invocation: {}", - device_extensions_.ext_shader_demote_to_helper_invocation ? "yes" : "no"); - if (device_extensions_.ext_shader_demote_to_helper_invocation) { - XELOGVK(" * Demote to helper invocation: {}", - device_shader_demote_to_helper_invocation_features_ - .shaderDemoteToHelperInvocation - ? "yes" - : "no"); - } - XELOGVK("* VK_EXT_shader_stencil_export: {}", - device_extensions_.ext_shader_stencil_export ? "yes" : "no"); - XELOGVK("* VK_KHR_bind_memory2: {}", - device_extensions_.khr_bind_memory2 ? "yes" : "no"); - XELOGVK("* VK_KHR_dedicated_allocation: {}", - device_extensions_.khr_dedicated_allocation ? "yes" : "no"); - XELOGVK("* VK_KHR_get_memory_requirements2: {}", - device_extensions_.khr_get_memory_requirements2 ? "yes" : "no"); - XELOGVK("* VK_KHR_image_format_list: {}", - device_extensions_.khr_image_format_list ? "yes" : "no"); - XELOGVK("* VK_KHR_maintenance4: {}", - device_extensions_.khr_maintenance4 ? "yes" : "no"); - XELOGVK("* VK_KHR_portability_subset: {}", - device_extensions_.khr_portability_subset ? "yes" : "no"); - if (device_extensions_.khr_portability_subset) { - XELOGVK(" * Constant alpha color blend factors: {}", - device_portability_subset_features_.constantAlphaColorBlendFactors - ? "yes" - : "no"); - XELOGVK(" * Image view format reinterpretation: {}", - device_portability_subset_features_.imageViewFormatReinterpretation - ? "yes" - : "no"); - XELOGVK(" * Image view format swizzle: {}", - device_portability_subset_features_.imageViewFormatSwizzle ? "yes" - : "no"); - XELOGVK(" * Point polygons: {}", - device_portability_subset_features_.pointPolygons ? "yes" : "no"); - XELOGVK( - " * Separate stencil front and back masks and reference values: {}", - device_portability_subset_features_.separateStencilMaskRef ? "yes" - : "no"); - XELOGVK(" * Shader sample rate interpolation functions: {}", - device_portability_subset_features_ - .shaderSampleRateInterpolationFunctions - ? "yes" - : "no"); - XELOGVK(" * Triangle fans: {}", - device_portability_subset_features_.triangleFans ? "yes" : "no"); - } - XELOGVK("* VK_KHR_sampler_ycbcr_conversion: {}", - device_extensions_.khr_sampler_ycbcr_conversion ? "yes" : "no"); - XELOGVK("* VK_KHR_shader_float_controls: {}", - device_extensions_.khr_shader_float_controls ? "yes" : "no"); - if (device_extensions_.khr_shader_float_controls) { - XELOGVK( - " * Signed zero, inf, nan preserve for float32: {}", - device_float_controls_properties_.shaderSignedZeroInfNanPreserveFloat32 - ? "yes" - : "no"); - XELOGVK(" * Denorm flush to zero for float32: {}", - device_float_controls_properties_.shaderDenormFlushToZeroFloat32 - ? "yes" - : "no"); - XELOGVK("* VK_KHR_spirv_1_4: {}", - device_extensions_.khr_spirv_1_4 ? "yes" : "no"); - XELOGVK("* VK_KHR_swapchain: {}", - device_extensions_.khr_swapchain ? "yes" : "no"); - } - // TODO(Triang3l): Report properties, features. - - // Get the queues. - queues_.reset(); - queues_ = std::make_unique(used_queue_count); - uint32_t queue_index = 0; - for (size_t i = 0; i < queue_families_.size(); ++i) { - const QueueFamily& queue_family = queue_families_[i]; - if (!queue_family.queue_count) { - continue; - } - assert_true(queue_index == queue_family.queue_first_index); - for (uint32_t j = 0; j < queue_family.queue_count; ++j) { - VkQueue queue; - dfn_.vkGetDeviceQueue(device_, uint32_t(i), j, &queue); - queues_[queue_index++].queue = queue; - } - } - // Create host-side samplers. VkSamplerCreateInfo sampler_create_info = {}; sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; @@ -1310,6 +717,691 @@ VkBool32 VKAPI_CALL VulkanProvider::DebugMessengerCallback( return VK_FALSE; } +void VulkanProvider::TryCreateDevice() { + assert_true(physical_device_ != VK_NULL_HANDLE); + assert_true(device_ == VK_NULL_HANDLE); + + static_assert(std::is_trivially_copyable_v, + "DeviceInfo must be safe to clear using memset"); + std::memset(&device_info_, 0, sizeof(device_info_)); + + VkDeviceCreateInfo device_create_info = { + VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO}; + + // Needed device extensions, properties and features. + + VkPhysicalDeviceProperties properties; + ifn_.vkGetPhysicalDeviceProperties(physical_device_, &properties); + + XELOGVK("Trying Vulkan device '{}'", properties.deviceName); + + if (!instance_extensions_.khr_get_physical_device_properties2) { + // Many extensions promoted to Vulkan 1.1 and newer require the instance + // extension VK_KHR_get_physical_device_properties2, which is itself in the + // core 1.0, although there's one instance for all physical devices. + properties.apiVersion = VK_MAKE_API_VERSION( + 0, 1, 0, VK_API_VERSION_PATCH(properties.apiVersion)); + } + + device_info_.apiVersion = properties.apiVersion; + + XELOGVK("Device Vulkan API version: {}.{}.{}", + VK_API_VERSION_MAJOR(properties.apiVersion), + VK_API_VERSION_MINOR(properties.apiVersion), + VK_API_VERSION_PATCH(properties.apiVersion)); + + std::vector extension_properties; + VkResult extensions_enumerate_result; + for (;;) { + uint32_t extension_count = uint32_t(extension_properties.size()); + bool extensions_were_empty = !extension_count; + extensions_enumerate_result = ifn_.vkEnumerateDeviceExtensionProperties( + physical_device_, nullptr, &extension_count, + extensions_were_empty ? nullptr : extension_properties.data()); + // If the original extension count was 0 (first call), SUCCESS is + // returned, not INCOMPLETE. + if (extensions_enumerate_result == VK_SUCCESS || + extensions_enumerate_result == VK_INCOMPLETE) { + extension_properties.resize(extension_count); + if (extensions_enumerate_result == VK_SUCCESS && + (!extensions_were_empty || !extension_count)) { + break; + } + } else { + break; + } + } + if (extensions_enumerate_result != VK_SUCCESS) { + XELOGE("Failed to query Vulkan device '{}' extensions", + properties.deviceName); + return; + } + + XELOGVK("Requested Vulkan device extensions:"); + + std::vector enabled_extensions; + + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { + device_info_.ext_1_1_VK_KHR_dedicated_allocation = true; + device_info_.ext_1_1_VK_KHR_get_memory_requirements2 = true; + device_info_.ext_1_1_VK_KHR_sampler_ycbcr_conversion = true; + device_info_.ext_1_1_VK_KHR_bind_memory2 = true; + } + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { + device_info_.ext_1_2_VK_KHR_sampler_mirror_clamp_to_edge = true; + device_info_.ext_1_2_VK_KHR_image_format_list = true; + device_info_.ext_1_2_VK_KHR_shader_float_controls = true; + device_info_.ext_1_2_VK_KHR_spirv_1_4 = true; + } + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { + device_info_.ext_1_3_VK_EXT_shader_demote_to_helper_invocation = true; + device_info_.ext_1_3_VK_KHR_maintenance4 = true; + } + + for (const VkExtensionProperties& extension : extension_properties) { + // Checking if already enabled as an optimization to do fewer and fewer + // string comparisons. +#define EXTENSION(name) \ + if (!device_info_.ext_##name && \ + !std::strcmp(extension.extensionName, #name)) { \ + enabled_extensions.push_back(#name); \ + device_info_.ext_##name = true; \ + XELOGVK("* " #name); \ + } +#define EXTENSION_PROMOTED(name, minor_version) \ + if (!device_info_.ext_1_##minor_version##_##name && \ + !std::strcmp(extension.extensionName, #name)) { \ + enabled_extensions.push_back(#name); \ + device_info_.ext_1_##minor_version##_##name = true; \ + XELOGVK("* " #name); \ + } + EXTENSION(VK_KHR_swapchain) + EXTENSION(VK_EXT_shader_stencil_export) + if (instance_extensions_.khr_get_physical_device_properties2) { + EXTENSION(VK_KHR_portability_subset) + EXTENSION(VK_EXT_memory_budget) + EXTENSION(VK_EXT_fragment_shader_interlock) + EXTENSION(VK_EXT_non_seamless_cube_map) + } else { + if (!std::strcmp(extension.extensionName, "VK_KHR_portability_subset")) { + XELOGW( + "Vulkan device '{}' is a portability subset device, but its " + "portability subset features can't be queried as the instance " + "doesn't support VK_KHR_get_physical_device_properties2", + properties.deviceName); + return; + } + } + if (properties.apiVersion < VK_MAKE_API_VERSION(0, 1, 1, 0)) { + EXTENSION_PROMOTED(VK_KHR_dedicated_allocation, 1) + EXTENSION_PROMOTED(VK_KHR_get_memory_requirements2, 1) + EXTENSION_PROMOTED(VK_KHR_bind_memory2, 1) + if (instance_extensions_.khr_get_physical_device_properties2) { + EXTENSION_PROMOTED(VK_KHR_sampler_ycbcr_conversion, 1) + } + } + if (properties.apiVersion < VK_MAKE_API_VERSION(0, 1, 2, 0)) { + EXTENSION_PROMOTED(VK_KHR_sampler_mirror_clamp_to_edge, 2) + EXTENSION_PROMOTED(VK_KHR_image_format_list, 2) + if (instance_extensions_.khr_get_physical_device_properties2) { + EXTENSION_PROMOTED(VK_KHR_shader_float_controls, 2) + } + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { + EXTENSION_PROMOTED(VK_KHR_spirv_1_4, 2) + } + } + if (properties.apiVersion < VK_MAKE_API_VERSION(0, 1, 3, 0)) { + if (instance_extensions_.khr_get_physical_device_properties2) { + EXTENSION_PROMOTED(VK_EXT_shader_demote_to_helper_invocation, 3) + } + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { + EXTENSION_PROMOTED(VK_KHR_maintenance4, 3) + } + } +#undef EXTENSION_PROMOTED +#undef EXTENSION + } + + if (is_surface_required_ && !device_info_.ext_VK_KHR_swapchain) { + XELOGVK("Vulkan device '{}' doesn't support presentation", + properties.deviceName); + return; + } + + XELOGVK("Requested properties and features of the Vulkan device:"); + + XELOGVK("* driverVersion: 0x{:X}", properties.driverVersion); + XELOGVK("* vendorID: 0x{:04X}", properties.vendorID); + XELOGVK("* deviceID: 0x{:04X}", properties.deviceID); + +#define LIMIT(name) \ + device_info_.name = properties.limits.name; \ + XELOGVK("* " #name ": {}", properties.limits.name); +#define LIMIT_SAMPLE_COUNTS(name) \ + device_info_.name = properties.limits.name; \ + XELOGVK("* " #name ": 0b{:b}", static_cast(properties.limits.name)); + LIMIT(maxImageDimension2D) + LIMIT(maxImageDimension3D) + LIMIT(maxImageDimensionCube) + LIMIT(maxImageArrayLayers) + LIMIT(maxStorageBufferRange) + LIMIT(maxSamplerAllocationCount) + LIMIT(maxPerStageDescriptorSamplers) + LIMIT(maxPerStageDescriptorStorageBuffers) + LIMIT(maxPerStageDescriptorSampledImages) + LIMIT(maxPerStageResources) + LIMIT(maxVertexOutputComponents) + LIMIT(maxTessellationEvaluationOutputComponents) + LIMIT(maxGeometryInputComponents) + LIMIT(maxGeometryOutputComponents) + LIMIT(maxGeometryTotalOutputComponents) + LIMIT(maxFragmentInputComponents) + LIMIT(maxFragmentCombinedOutputResources) + LIMIT(maxSamplerAnisotropy) + std::memcpy(device_info_.maxViewportDimensions, + properties.limits.maxViewportDimensions, + sizeof(device_info_.maxViewportDimensions)); + XELOGVK("* maxViewportDimensions: {} x {}", + properties.limits.maxViewportDimensions[0], + properties.limits.maxViewportDimensions[1]); + std::memcpy(device_info_.viewportBoundsRange, + properties.limits.viewportBoundsRange, + sizeof(device_info_.viewportBoundsRange)); + XELOGVK("* viewportBoundsRange: [{}, {}]", + properties.limits.viewportBoundsRange[0], + properties.limits.viewportBoundsRange[1]); + LIMIT(minUniformBufferOffsetAlignment) + LIMIT(minStorageBufferOffsetAlignment) + LIMIT(maxFramebufferWidth) + LIMIT(maxFramebufferHeight) + LIMIT_SAMPLE_COUNTS(framebufferColorSampleCounts) + LIMIT_SAMPLE_COUNTS(framebufferDepthSampleCounts) + LIMIT_SAMPLE_COUNTS(framebufferStencilSampleCounts) + LIMIT_SAMPLE_COUNTS(framebufferNoAttachmentsSampleCounts) + LIMIT_SAMPLE_COUNTS(sampledImageColorSampleCounts) + LIMIT_SAMPLE_COUNTS(sampledImageIntegerSampleCounts) + LIMIT_SAMPLE_COUNTS(sampledImageDepthSampleCounts) + LIMIT_SAMPLE_COUNTS(sampledImageStencilSampleCounts) + LIMIT(standardSampleLocations) + LIMIT(optimalBufferCopyOffsetAlignment) + LIMIT(optimalBufferCopyRowPitchAlignment) + LIMIT(nonCoherentAtomSize) +#undef LIMIT_SAMPLE_COUNTS +#undef LIMIT + + VkPhysicalDeviceFeatures supported_features; + ifn_.vkGetPhysicalDeviceFeatures(physical_device_, &supported_features); + // Enabling only needed features because drivers may take more optimal paths + // when certain features are disabled. Also, in VK_EXT_shader_object, which + // features are enabled effects the pipeline state must be set before drawing. + VkPhysicalDeviceFeatures enabled_features = {}; + +#define FEATURE(name) \ + if (supported_features.name) { \ + device_info_.name = true; \ + enabled_features.name = VK_TRUE; \ + XELOGVK("* " #name); \ + } + FEATURE(fullDrawIndexUint32) + FEATURE(independentBlend) + FEATURE(geometryShader) + FEATURE(tessellationShader) + FEATURE(sampleRateShading) + FEATURE(depthClamp) + FEATURE(fillModeNonSolid) + FEATURE(samplerAnisotropy) + FEATURE(vertexPipelineStoresAndAtomics) + FEATURE(fragmentStoresAndAtomics) + FEATURE(shaderClipDistance) + FEATURE(shaderCullDistance) + FEATURE(sparseBinding) + FEATURE(sparseResidencyBuffer) +#undef FEATURE + + VkPhysicalDeviceProperties2 properties2 = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; +#define PROPERTIES2_DECLARE(type_suffix, structure_type_suffix) \ + VkPhysicalDevice##type_suffix supported_##type_suffix = { \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##structure_type_suffix}; +#define PROPERTIES2_ADD(type_suffix) \ + (supported_##type_suffix).pNext = properties2.pNext; \ + properties2.pNext = &(supported_##type_suffix); + + VkPhysicalDeviceFeatures2 features2 = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; +#define FEATURES2_DECLARE(type_suffix, structure_type_suffix) \ + VkPhysicalDevice##type_suffix supported_##type_suffix = { \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##structure_type_suffix}; \ + VkPhysicalDevice##type_suffix enabled_##type_suffix = { \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##structure_type_suffix}; +#define FEATURES2_ADD(type_suffix) \ + (supported_##type_suffix).pNext = features2.pNext; \ + features2.pNext = &(supported_##type_suffix); \ + (enabled_##type_suffix).pNext = const_cast(device_create_info.pNext); \ + device_create_info.pNext = &(enabled_##type_suffix); + // VUID-VkDeviceCreateInfo-pNext: "If the pNext chain includes a + // VkPhysicalDeviceVulkan1XFeatures structure, then it must not include..." + // Enabling the features in Vulkan1XFeatures instead. +#define FEATURES2_ADD_PROMOTED(type_suffix, minor_version) \ + (supported_##type_suffix).pNext = features2.pNext; \ + features2.pNext = &(supported_##type_suffix); \ + if (properties.apiVersion < VK_MAKE_API_VERSION(0, 1, minor_version, 0)) { \ + (enabled_##type_suffix).pNext = \ + const_cast(device_create_info.pNext); \ + device_create_info.pNext = &(enabled_##type_suffix); \ + } + + FEATURES2_DECLARE(Vulkan11Features, VULKAN_1_1_FEATURES) + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { + FEATURES2_ADD(Vulkan11Features) + } + FEATURES2_DECLARE(Vulkan12Features, VULKAN_1_2_FEATURES) + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { + FEATURES2_ADD(Vulkan12Features) + } + FEATURES2_DECLARE(Vulkan13Features, VULKAN_1_3_FEATURES) + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { + FEATURES2_ADD(Vulkan13Features) + } + + FEATURES2_DECLARE(PortabilitySubsetFeaturesKHR, + PORTABILITY_SUBSET_FEATURES_KHR) + if (device_info_.ext_VK_KHR_portability_subset) { + FEATURES2_ADD(PortabilitySubsetFeaturesKHR) + } + PROPERTIES2_DECLARE(FloatControlsProperties, FLOAT_CONTROLS_PROPERTIES) + if (device_info_.ext_1_2_VK_KHR_shader_float_controls) { + PROPERTIES2_ADD(FloatControlsProperties) + } + FEATURES2_DECLARE(FragmentShaderInterlockFeaturesEXT, + FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT) + if (device_info_.ext_VK_EXT_fragment_shader_interlock) { + FEATURES2_ADD(FragmentShaderInterlockFeaturesEXT) + } + FEATURES2_DECLARE(ShaderDemoteToHelperInvocationFeatures, + SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES) + if (device_info_.ext_1_3_VK_EXT_shader_demote_to_helper_invocation) { + FEATURES2_ADD_PROMOTED(ShaderDemoteToHelperInvocationFeatures, 3) + } + FEATURES2_DECLARE(NonSeamlessCubeMapFeaturesEXT, + NON_SEAMLESS_CUBE_MAP_FEATURES_EXT) + if (device_info_.ext_VK_EXT_non_seamless_cube_map) { + FEATURES2_ADD(NonSeamlessCubeMapFeaturesEXT) + } + + if (instance_extensions_.khr_get_physical_device_properties2) { + ifn_.vkGetPhysicalDeviceProperties2(physical_device_, &properties2); + ifn_.vkGetPhysicalDeviceFeatures2(physical_device_, &features2); + } + +#undef FEATURES2_ADD_PROMOTED +#undef FEATURES2_ADD +#undef FEATURES2_DECLARE +#undef PROPERTIES2_ADD +#undef PROPERTIES2_DECLARE + + // VK_KHR_portability_subset removes functionality rather than adding it, so + // if the extension is not present, all its features are true by default. +#define PORTABILITY_SUBSET_FEATURE(name) \ + if (device_info_.ext_VK_KHR_portability_subset) { \ + if (supported_PortabilitySubsetFeaturesKHR.name) { \ + device_info_.name = true; \ + enabled_PortabilitySubsetFeaturesKHR.name = VK_TRUE; \ + XELOGVK("* " #name); \ + } \ + } else { \ + device_info_.name = true; \ + } + PORTABILITY_SUBSET_FEATURE(constantAlphaColorBlendFactors) + PORTABILITY_SUBSET_FEATURE(imageViewFormatReinterpretation) + PORTABILITY_SUBSET_FEATURE(imageViewFormatSwizzle) + PORTABILITY_SUBSET_FEATURE(pointPolygons) + PORTABILITY_SUBSET_FEATURE(separateStencilMaskRef) + PORTABILITY_SUBSET_FEATURE(shaderSampleRateInterpolationFunctions) + PORTABILITY_SUBSET_FEATURE(triangleFans) +#undef PORTABILITY_SUBSET_FEATURE + +#define EXTENSION_PROPERTY(type_suffix, name) \ + device_info_.name = supported_##type_suffix.name; \ + XELOGVK("* " #name ": {}", supported_##type_suffix.name); +#define EXTENSION_FEATURE(type_suffix, name) \ + if (supported_##type_suffix.name) { \ + device_info_.name = true; \ + enabled_##type_suffix.name = VK_TRUE; \ + XELOGVK("* " #name); \ + } +#define EXTENSION_FEATURE_PROMOTED(type_suffix, name, minor_version) \ + if (supported_##type_suffix.name) { \ + device_info_.name = true; \ + enabled_##type_suffix.name = VK_TRUE; \ + enabled_Vulkan1##minor_version##Features.name = VK_TRUE; \ + XELOGVK("* " #name); \ + } +#define EXTENSION_FEATURE_PROMOTED_AS_OPTIONAL(name, minor_version) \ + if (supported_Vulkan1##minor_version##Features.name) { \ + device_info_.name = true; \ + enabled_Vulkan1##minor_version##Features.name = VK_TRUE; \ + XELOGVK("* " #name); \ + } + + if (device_info_.ext_1_2_VK_KHR_sampler_mirror_clamp_to_edge) { + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { + // Promoted - the feature is optional, and must be enabled if the + // extension is also enabled + // (VUID-VkDeviceCreateInfo-ppEnabledExtensionNames-02832). + EXTENSION_FEATURE_PROMOTED_AS_OPTIONAL(samplerMirrorClampToEdge, 2) + } else { + // Extension - the feature is implied. + device_info_.samplerMirrorClampToEdge = true; + XELOGVK("* samplerMirrorClampToEdge"); + } + } + + if (device_info_.ext_1_2_VK_KHR_shader_float_controls) { + EXTENSION_PROPERTY(FloatControlsProperties, + shaderSignedZeroInfNanPreserveFloat32) + EXTENSION_PROPERTY(FloatControlsProperties, shaderDenormFlushToZeroFloat32) + EXTENSION_PROPERTY(FloatControlsProperties, shaderRoundingModeRTEFloat32) + } + + if (device_info_.ext_VK_EXT_fragment_shader_interlock) { + EXTENSION_FEATURE(FragmentShaderInterlockFeaturesEXT, + fragmentShaderSampleInterlock) + // fragmentShaderPixelInterlock is not needed by Xenia if + // fragmentShaderSampleInterlock is available as it accesses only per-sample + // data. + if (!device_info_.fragmentShaderSampleInterlock) { + EXTENSION_FEATURE(FragmentShaderInterlockFeaturesEXT, + fragmentShaderPixelInterlock) + } + } + + if (device_info_.ext_1_3_VK_EXT_shader_demote_to_helper_invocation) { + EXTENSION_FEATURE_PROMOTED(ShaderDemoteToHelperInvocationFeatures, + shaderDemoteToHelperInvocation, 3) + } + + if (device_info_.ext_VK_EXT_non_seamless_cube_map) { + EXTENSION_FEATURE(NonSeamlessCubeMapFeaturesEXT, nonSeamlessCubeMap) + } + +#undef EXTENSION_FEATURE_PROMOTED_AS_OPTIONAL +#undef EXTENSION_FEATURE_PROMOTED +#undef EXTENSION_FEATURE +#undef EXTENSION_PROPERTY + + // Memory types. + + VkPhysicalDeviceMemoryProperties memory_properties; + ifn_.vkGetPhysicalDeviceMemoryProperties(physical_device_, + &memory_properties); + for (uint32_t memory_type_index = 0; + memory_type_index < memory_properties.memoryTypeCount; + ++memory_type_index) { + VkMemoryPropertyFlags memory_property_flags = + memory_properties.memoryTypes[memory_type_index].propertyFlags; + uint32_t memory_type_bit = uint32_t(1) << memory_type_index; + if (memory_property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + device_info_.memory_types_device_local |= memory_type_bit; + } + if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + device_info_.memory_types_host_visible |= memory_type_bit; + } + if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { + device_info_.memory_types_host_coherent |= memory_type_bit; + } + if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + device_info_.memory_types_host_cached |= memory_type_bit; + } + } + + // Queue families. + + uint32_t queue_family_count; + ifn_.vkGetPhysicalDeviceQueueFamilyProperties(physical_device_, + &queue_family_count, nullptr); + std::vector queue_families_properties( + queue_family_count); + ifn_.vkGetPhysicalDeviceQueueFamilyProperties( + physical_device_, &queue_family_count, queue_families_properties.data()); + queue_families_properties.resize(queue_family_count); + + queue_families_.clear(); + queue_families_.resize(queue_family_count); + + queue_family_graphics_compute_ = UINT32_MAX; + queue_family_sparse_binding_ = UINT32_MAX; + if (device_info_.sparseBinding) { + // Prefer a queue family that supports both graphics/compute and sparse + // binding because in Xenia sparse binding is done serially with graphics + // work. + for (uint32_t queue_family_index = 0; + queue_family_index < queue_family_count; ++queue_family_index) { + VkQueueFlags queue_flags = + queue_families_properties[queue_family_index].queueFlags; + bool is_graphics_compute = + (queue_flags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) == + (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); + bool is_sparse_binding = (queue_flags & VK_QUEUE_SPARSE_BINDING_BIT) == + VK_QUEUE_SPARSE_BINDING_BIT; + if (is_graphics_compute && is_sparse_binding) { + queue_family_graphics_compute_ = queue_family_index; + queue_family_sparse_binding_ = queue_family_index; + break; + } + // If can't do both, prefer the queue family that can do either with the + // lowest index. + if (is_graphics_compute && queue_family_graphics_compute_ == UINT32_MAX) { + queue_family_graphics_compute_ = queue_family_index; + } + if (is_sparse_binding && queue_family_sparse_binding_ == UINT32_MAX) { + queue_family_sparse_binding_ = queue_family_index; + } + } + } else { + for (uint32_t queue_family_index = 0; + queue_family_index < queue_family_count; ++queue_family_index) { + if ((queue_families_properties[queue_family_index].queueFlags & + (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) == + (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) { + queue_family_graphics_compute_ = queue_family_index; + break; + } + } + } + + if (queue_family_graphics_compute_ == UINT32_MAX) { + XELOGVK("Vulkan device '{}' doesn't have a graphics and compute queue", + properties.deviceName); + return; + } + queue_families_[queue_family_graphics_compute_].queue_count = std::max( + uint32_t(1), queue_families_[queue_family_graphics_compute_].queue_count); + + if (device_info_.sparseBinding && + queue_family_sparse_binding_ == UINT32_MAX) { + XELOGVK( + "Vulkan device '{}' reports that it supports sparse binding, but " + "doesn't have a queue that can perform sparse binding operations, " + "disabling sparse binding", + properties.deviceName); + device_info_.sparseBinding = false; + enabled_features.sparseBinding = false; + } + if (!enabled_features.sparseBinding) { + device_info_.sparseResidencyBuffer = false; + enabled_features.sparseResidencyBuffer = false; + } + if (queue_family_sparse_binding_ != UINT32_MAX) { + queue_families_[queue_family_sparse_binding_].queue_count = std::max( + uint32_t(1), queue_families_[queue_family_sparse_binding_].queue_count); + } + + // Request queues of all families potentially supporting presentation as which + // ones will actually be used depends on the surface object. + bool any_queue_potentially_supports_present = false; + if (instance_extensions_.khr_surface) { + for (uint32_t queue_family_index = 0; + queue_family_index < queue_family_count; ++queue_family_index) { +#if XE_PLATFORM_WIN32 + if (instance_extensions_.khr_win32_surface && + !ifn_.vkGetPhysicalDeviceWin32PresentationSupportKHR( + physical_device_, queue_family_index)) { + continue; + } +#endif + QueueFamily& queue_family = queue_families_[queue_family_index]; + // Requesting an additional queue in each family where possible so + // asynchronous presentation can potentially be done within a single queue + // family too. + queue_family.queue_count = + std::min(queue_families_properties[queue_family_index].queueCount, + queue_family.queue_count + uint32_t(1)); + queue_family.potentially_supports_present = true; + any_queue_potentially_supports_present = true; + } + } + if (!any_queue_potentially_supports_present && is_surface_required_) { + XELOGVK( + "Vulkan device '{}' doesn't have any queues supporting presentation", + properties.deviceName); + return; + } + + std::vector queue_create_infos; + queue_create_infos.reserve(queue_families_.size()); + uint32_t used_queue_count = 0; + uint32_t max_queue_count_per_family = 0; + for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count; + ++queue_family_index) { + QueueFamily& queue_family = queue_families_[queue_family_index]; + queue_family.queue_first_index = used_queue_count; + if (!queue_family.queue_count) { + continue; + } + VkDeviceQueueCreateInfo& queue_create_info = + queue_create_infos.emplace_back(); + queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_info.pNext = nullptr; + queue_create_info.flags = 0; + queue_create_info.queueFamilyIndex = queue_family_index; + queue_create_info.queueCount = queue_family.queue_count; + // pQueuePriorities will be set later based on max_queue_count_per_family. + max_queue_count_per_family = + std::max(max_queue_count_per_family, queue_family.queue_count); + used_queue_count += queue_family.queue_count; + } + std::vector queue_priorities(max_queue_count_per_family, 1.0f); + for (VkDeviceQueueCreateInfo& queue_create_info : queue_create_infos) { + queue_create_info.pQueuePriorities = queue_priorities.data(); + } + + // Create the device. + + device_create_info.queueCreateInfoCount = + static_cast(queue_create_infos.size()); + device_create_info.pQueueCreateInfos = queue_create_infos.data(); + device_create_info.enabledExtensionCount = + static_cast(enabled_extensions.size()); + device_create_info.ppEnabledExtensionNames = enabled_extensions.data(); + device_create_info.pEnabledFeatures = &enabled_features; + VkResult create_device_result = ifn_.vkCreateDevice( + physical_device_, &device_create_info, nullptr, &device_); + if (create_device_result != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan device for physical device '{}', result {}", + properties.deviceName, static_cast(create_device_result)); + device_ = VK_NULL_HANDLE; + return; + } + + // Device function pointers. + + std::memset(&dfn_, 0, sizeof(dfn_)); + + bool functions_loaded = true; + +#define XE_UI_VULKAN_FUNCTION(name) \ + functions_loaded &= \ + (dfn_.name = PFN_##name(ifn_.vkGetDeviceProcAddr(device_, #name))) != \ + nullptr; + + // Vulkan 1.0. +#include "xenia/ui/vulkan/functions/device_1_0.inc" + + // Promoted extensions when the API version they're promoted to is supported. +#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \ + functions_loaded &= \ + (dfn_.core_name = PFN_##core_name( \ + ifn_.vkGetDeviceProcAddr(device_, #core_name))) != nullptr; + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" + } + if (properties.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" + } +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + + // Non-promoted extensions, and promoted extensions on API versions lower than + // the ones they were promoted to. +#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \ + functions_loaded &= \ + (dfn_.core_name = PFN_##core_name( \ + ifn_.vkGetDeviceProcAddr(device_, #extension_name))) != nullptr; + if (device_info_.ext_VK_KHR_swapchain) { +#include "xenia/ui/vulkan/functions/device_khr_swapchain.inc" + } + if (properties.apiVersion < VK_MAKE_API_VERSION(0, 1, 1, 0)) { + if (device_info_.ext_1_1_VK_KHR_get_memory_requirements2) { +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" + } + if (device_info_.ext_1_1_VK_KHR_bind_memory2) { +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" + } + } + if (properties.apiVersion < VK_MAKE_API_VERSION(0, 1, 3, 0)) { + if (device_info_.ext_1_3_VK_KHR_maintenance4) { +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" + } + } +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + +#undef XE_UI_VULKAN_FUNCTION + + if (!functions_loaded) { + XELOGE("Failed to get all Vulkan device function pointers for '{}'", + properties.deviceName); + ifn_.vkDestroyDevice(device_, nullptr); + device_ = VK_NULL_HANDLE; + return; + } + + // Queues. + + queues_.reset(); + queues_ = std::make_unique(used_queue_count); + uint32_t queue_index = 0; + for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count; + ++queue_family_index) { + const QueueFamily& queue_family = queue_families_[queue_family_index]; + if (!queue_family.queue_count) { + continue; + } + assert_true(queue_index == queue_family.queue_first_index); + for (uint32_t family_queue_index = 0; + family_queue_index < queue_family.queue_count; ++family_queue_index) { + VkQueue queue; + dfn_.vkGetDeviceQueue(device_, queue_family_index, family_queue_index, + &queue); + queues_[queue_index++].queue = queue; + } + } + + XELOGVK("Created a Vulkan device for physical device '{}'", + properties.deviceName); +} + } // namespace vulkan } // namespace ui } // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 2d499a614..12d1710b1 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -57,6 +57,160 @@ namespace vulkan { class VulkanProvider : public GraphicsProvider { public: + struct DeviceInfo { + // "ext_1_X"-prefixed extension fields are set to true not only if the + // extension itself is actually exposed, but also if it was promoted to the + // device's API version. Therefore, merely the field being set to true + // doesn't imply that all the required features in the extension are + // supported - actual properties and features must be checked rather than + // the extension itself where they matter. + + // Vulkan 1.0. + + uint32_t memory_types_device_local; + uint32_t memory_types_host_visible; + uint32_t memory_types_host_coherent; + uint32_t memory_types_host_cached; + + uint32_t apiVersion; + uint32_t maxImageDimension2D; + uint32_t maxImageDimension3D; + uint32_t maxImageDimensionCube; + uint32_t maxImageArrayLayers; + uint32_t maxStorageBufferRange; + uint32_t maxSamplerAllocationCount; + uint32_t maxPerStageDescriptorSamplers; + uint32_t maxPerStageDescriptorStorageBuffers; + uint32_t maxPerStageDescriptorSampledImages; + uint32_t maxPerStageResources; + uint32_t maxVertexOutputComponents; + uint32_t maxTessellationEvaluationOutputComponents; + uint32_t maxGeometryInputComponents; + uint32_t maxGeometryOutputComponents; + uint32_t maxGeometryTotalOutputComponents; + uint32_t maxFragmentInputComponents; + uint32_t maxFragmentCombinedOutputResources; + float maxSamplerAnisotropy; + uint32_t maxViewportDimensions[2]; + float viewportBoundsRange[2]; + VkDeviceSize minUniformBufferOffsetAlignment; + VkDeviceSize minStorageBufferOffsetAlignment; + uint32_t maxFramebufferWidth; + uint32_t maxFramebufferHeight; + VkSampleCountFlags framebufferColorSampleCounts; + VkSampleCountFlags framebufferDepthSampleCounts; + VkSampleCountFlags framebufferStencilSampleCounts; + VkSampleCountFlags framebufferNoAttachmentsSampleCounts; + VkSampleCountFlags sampledImageColorSampleCounts; + VkSampleCountFlags sampledImageIntegerSampleCounts; + VkSampleCountFlags sampledImageDepthSampleCounts; + VkSampleCountFlags sampledImageStencilSampleCounts; + VkSampleCountFlags standardSampleLocations; + VkDeviceSize optimalBufferCopyOffsetAlignment; + VkDeviceSize optimalBufferCopyRowPitchAlignment; + VkDeviceSize nonCoherentAtomSize; + + bool fullDrawIndexUint32; + bool independentBlend; + bool geometryShader; + bool tessellationShader; + bool sampleRateShading; + bool depthClamp; + bool fillModeNonSolid; + bool samplerAnisotropy; + bool vertexPipelineStoresAndAtomics; + bool fragmentStoresAndAtomics; + bool shaderClipDistance; + bool shaderCullDistance; + bool sparseBinding; + bool sparseResidencyBuffer; + + // VK_KHR_swapchain (#2). + + bool ext_VK_KHR_swapchain; + + // VK_KHR_sampler_mirror_clamp_to_edge (#15, Vulkan 1.2). + + bool ext_1_2_VK_KHR_sampler_mirror_clamp_to_edge; + + bool samplerMirrorClampToEdge; + + // VK_KHR_dedicated_allocation (#128, Vulkan 1.1). + + bool ext_1_1_VK_KHR_dedicated_allocation; + + // VK_EXT_shader_stencil_export (#141). + + bool ext_VK_EXT_shader_stencil_export; + + // VK_KHR_get_memory_requirements2 (#147, Vulkan 1.1). + + bool ext_1_1_VK_KHR_get_memory_requirements2; + + // VK_KHR_image_format_list (#148, Vulkan 1.2). + + bool ext_1_2_VK_KHR_image_format_list; + + // VK_KHR_sampler_ycbcr_conversion (#157, Vulkan 1.1). + + bool ext_1_1_VK_KHR_sampler_ycbcr_conversion; + + // VK_KHR_bind_memory2 (#158, Vulkan 1.1). + + bool ext_1_1_VK_KHR_bind_memory2; + + // VK_KHR_portability_subset (#164). + + bool ext_VK_KHR_portability_subset; + + bool constantAlphaColorBlendFactors; + bool imageViewFormatReinterpretation; + bool imageViewFormatSwizzle; + bool pointPolygons; + bool separateStencilMaskRef; + bool shaderSampleRateInterpolationFunctions; + bool triangleFans; + + // VK_KHR_shader_float_controls (#198, Vulkan 1.2). + + bool ext_1_2_VK_KHR_shader_float_controls; + + bool shaderSignedZeroInfNanPreserveFloat32; + bool shaderDenormFlushToZeroFloat32; + bool shaderRoundingModeRTEFloat32; + + // VK_KHR_spirv_1_4 (#237, Vulkan 1.2). + + bool ext_1_2_VK_KHR_spirv_1_4; + + // VK_EXT_memory_budget (#238). + + bool ext_VK_EXT_memory_budget; + + // VK_EXT_fragment_shader_interlock (#252). + + bool ext_VK_EXT_fragment_shader_interlock; + + bool fragmentShaderSampleInterlock; + bool fragmentShaderPixelInterlock; + + // VK_EXT_shader_demote_to_helper_invocation (#277, Vulkan 1.3). + + bool ext_1_3_VK_EXT_shader_demote_to_helper_invocation; + + bool shaderDemoteToHelperInvocation; + + // VK_KHR_maintenance4 (#414, Vulkan 1.3). + + bool ext_1_3_VK_KHR_maintenance4; + + // VK_EXT_non_seamless_cube_map (#423). + + bool ext_VK_EXT_non_seamless_cube_map; + + bool nonSeamlessCubeMap; + }; + ~VulkanProvider(); static std::unique_ptr Create(bool is_surface_required); @@ -106,7 +260,7 @@ class VulkanProvider : public GraphicsProvider { struct InstanceFunctions { #define XE_UI_VULKAN_FUNCTION(name) PFN_##name name; #define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \ - PFN_##extension_name extension_name; + PFN_##core_name core_name; #include "xenia/ui/vulkan/functions/instance_1_0.inc" #include "xenia/ui/vulkan/functions/instance_ext_debug_utils.inc" #include "xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc" @@ -124,61 +278,9 @@ class VulkanProvider : public GraphicsProvider { const InstanceFunctions& ifn() const { return ifn_; } VkPhysicalDevice physical_device() const { return physical_device_; } - const VkPhysicalDeviceProperties& device_properties() const { - return device_properties_; - } - const VkPhysicalDeviceFeatures& device_features() const { - return device_features_; - } - struct DeviceExtensions { - bool ext_fragment_shader_interlock; - bool ext_memory_budget; - // Core since 1.3.0. - bool ext_shader_demote_to_helper_invocation; - bool ext_shader_stencil_export; - // Core since 1.1.0. - bool khr_bind_memory2; - // Core since 1.1.0. - bool khr_dedicated_allocation; - // Core since 1.1.0. - bool khr_get_memory_requirements2; - // Core since 1.2.0. - bool khr_image_format_list; - // Core since 1.3.0. - bool khr_maintenance4; - // Requires the VK_KHR_get_physical_device_properties2 instance extension. - bool khr_portability_subset; - // Core since 1.1.0. - bool khr_sampler_ycbcr_conversion; - // Core since 1.2.0. - bool khr_shader_float_controls; - // Core since 1.2.0. - bool khr_spirv_1_4; - bool khr_swapchain; - }; - const DeviceExtensions& device_extensions() const { - return device_extensions_; - } - // Returns nullptr if the device is fully compliant with Vulkan 1.0. - const VkPhysicalDevicePortabilitySubsetFeaturesKHR* - device_portability_subset_features() const { - if (!device_extensions_.khr_portability_subset) { - return nullptr; - } - return &device_portability_subset_features_; - } - uint32_t memory_types_device_local() const { - return memory_types_device_local_; - } - uint32_t memory_types_host_visible() const { - return memory_types_host_visible_; - } - uint32_t memory_types_host_coherent() const { - return memory_types_host_coherent_; - } - uint32_t memory_types_host_cached() const { - return memory_types_host_cached_; - } + + const DeviceInfo& device_info() const { return device_info_; } + struct QueueFamily { uint32_t queue_first_index = 0; uint32_t queue_count = 0; @@ -196,18 +298,6 @@ class VulkanProvider : public GraphicsProvider { uint32_t queue_family_sparse_binding() const { return queue_family_sparse_binding_; } - const VkPhysicalDeviceFloatControlsPropertiesKHR& - device_float_controls_properties() const { - return device_float_controls_properties_; - } - const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT& - device_fragment_shader_interlock_features() const { - return device_fragment_shader_interlock_features_; - } - const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT& - device_shader_demote_to_helper_invocation_features() const { - return device_shader_demote_to_helper_invocation_features_; - } struct Queue { VkQueue queue = VK_NULL_HANDLE; @@ -235,7 +325,7 @@ class VulkanProvider : public GraphicsProvider { struct DeviceFunctions { #define XE_UI_VULKAN_FUNCTION(name) PFN_##name name; #define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \ - PFN_##extension_name extension_name; + PFN_##core_name core_name; #include "xenia/ui/vulkan/functions/device_1_0.inc" #include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" #include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" @@ -261,10 +351,6 @@ class VulkanProvider : public GraphicsProvider { ifn_.vkSetDebugUtilsObjectNameEXT(device_, &name_info); } - bool IsSparseBindingSupported() const { - return queue_family_sparse_binding_ != UINT32_MAX; - } - // Samplers that may be useful for host needs. Only these samplers should be // used in host, non-emulation contexts, because the total number of samplers // is heavily limited (4000) on Nvidia GPUs - the rest of samplers are @@ -298,6 +384,12 @@ class VulkanProvider : public GraphicsProvider { const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data); + // For the current `physical_device_`, sets up the members obtained from the + // physical device info, and tries to create a device and get the needed + // queues. + // The call is successful if `device_` is not VK_NULL_HANDLE as a result. + void TryCreateDevice(); + bool is_surface_required_; RenderdocApi renderdoc_api_; @@ -313,30 +405,21 @@ class VulkanProvider : public GraphicsProvider { InstanceExtensions instance_extensions_; VkInstance instance_ = VK_NULL_HANDLE; InstanceFunctions ifn_; + VkDebugUtilsMessengerEXT debug_messenger_ = VK_NULL_HANDLE; bool debug_names_used_ = false; VkPhysicalDevice physical_device_ = VK_NULL_HANDLE; - VkPhysicalDeviceProperties device_properties_; - VkPhysicalDeviceFeatures device_features_; - DeviceExtensions device_extensions_; - VkPhysicalDevicePortabilitySubsetFeaturesKHR - device_portability_subset_features_; - uint32_t memory_types_device_local_; - uint32_t memory_types_host_visible_; - uint32_t memory_types_host_coherent_; - uint32_t memory_types_host_cached_; + + DeviceInfo device_info_ = {}; + std::vector queue_families_; uint32_t queue_family_graphics_compute_; uint32_t queue_family_sparse_binding_; - VkPhysicalDeviceFloatControlsPropertiesKHR device_float_controls_properties_; - VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT - device_fragment_shader_interlock_features_; - VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT - device_shader_demote_to_helper_invocation_features_; VkDevice device_ = VK_NULL_HANDLE; DeviceFunctions dfn_ = {}; + // Queues contain a mutex, can't use std::vector. std::unique_ptr queues_; diff --git a/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc index 2e0a054c7..3f4c139f4 100644 --- a/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc +++ b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc @@ -138,13 +138,13 @@ VulkanUploadBufferPool::CreatePageImplementation() { memory_allocate_info.pNext = nullptr; memory_allocate_info.allocationSize = allocation_size_; memory_allocate_info.memoryTypeIndex = memory_type_; - VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; - if (provider_.device_extensions().khr_dedicated_allocation) { + VkMemoryDedicatedAllocateInfo memory_dedicated_allocate_info; + if (provider_.device_info().ext_1_1_VK_KHR_dedicated_allocation) { memory_allocate_info_last->pNext = &memory_dedicated_allocate_info; memory_allocate_info_last = reinterpret_cast( &memory_dedicated_allocate_info); memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; memory_dedicated_allocate_info.pNext = nullptr; memory_dedicated_allocate_info.image = VK_NULL_HANDLE; memory_dedicated_allocate_info.buffer = buffer; diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc index b4eb02c3f..d51edb2d6 100644 --- a/src/xenia/ui/vulkan/vulkan_util.cc +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -27,8 +27,8 @@ void FlushMappedMemoryRange(const VulkanProvider& provider, assert_false(size != VK_WHOLE_SIZE && memory_size == VK_WHOLE_SIZE); assert_true(memory_size == VK_WHOLE_SIZE || offset <= memory_size); assert_true(memory_size == VK_WHOLE_SIZE || size <= memory_size - offset); - if (!size || - (provider.memory_types_host_coherent() & (uint32_t(1) << memory_type))) { + if (!size || (provider.device_info().memory_types_host_coherent & + (uint32_t(1) << memory_type))) { return; } VkMappedMemoryRange range; @@ -38,7 +38,7 @@ void FlushMappedMemoryRange(const VulkanProvider& provider, range.offset = offset; range.size = size; VkDeviceSize non_coherent_atom_size = - provider.device_properties().limits.nonCoherentAtomSize; + provider.device_info().nonCoherentAtomSize; // On some Android implementations, nonCoherentAtomSize is 0, not 1. if (non_coherent_atom_size > 1) { range.offset = offset / non_coherent_atom_size * non_coherent_atom_size; @@ -89,13 +89,13 @@ bool CreateDedicatedAllocationBuffer( memory_allocate_info.pNext = nullptr; memory_allocate_info.allocationSize = memory_requirements.size; memory_allocate_info.memoryTypeIndex = memory_type; - VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; - if (provider.device_extensions().khr_dedicated_allocation) { + VkMemoryDedicatedAllocateInfo memory_dedicated_allocate_info; + if (provider.device_info().ext_1_1_VK_KHR_dedicated_allocation) { memory_allocate_info_last->pNext = &memory_dedicated_allocate_info; memory_allocate_info_last = reinterpret_cast( &memory_dedicated_allocate_info); memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; memory_dedicated_allocate_info.pNext = nullptr; memory_dedicated_allocate_info.image = VK_NULL_HANDLE; memory_dedicated_allocate_info.buffer = buffer; @@ -154,13 +154,13 @@ bool CreateDedicatedAllocationImage(const VulkanProvider& provider, memory_allocate_info.pNext = nullptr; memory_allocate_info.allocationSize = memory_requirements.size; memory_allocate_info.memoryTypeIndex = memory_type; - VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; - if (provider.device_extensions().khr_dedicated_allocation) { + VkMemoryDedicatedAllocateInfo memory_dedicated_allocate_info; + if (provider.device_info().ext_1_1_VK_KHR_dedicated_allocation) { memory_allocate_info_last->pNext = &memory_dedicated_allocate_info; memory_allocate_info_last = reinterpret_cast( &memory_dedicated_allocate_info); memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; memory_dedicated_allocate_info.pNext = nullptr; memory_dedicated_allocate_info.image = image; memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE; diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index ca3eb60b1..8415a5ffd 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -50,7 +50,7 @@ enum class MemoryPurpose { inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider, VkDeviceSize size) { VkDeviceSize non_coherent_atom_size = - provider.device_properties().limits.nonCoherentAtomSize; + provider.device_info().nonCoherentAtomSize; // On some Android implementations, nonCoherentAtomSize is 0, not 1. if (non_coherent_atom_size > 1) { size = xe::round_up(size, non_coherent_atom_size, false); @@ -61,8 +61,8 @@ inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider, inline uint32_t ChooseHostMemoryType(const VulkanProvider& provider, uint32_t supported_types, bool is_readback) { - supported_types &= provider.memory_types_host_visible(); - uint32_t host_cached = provider.memory_types_host_cached(); + supported_types &= provider.device_info().memory_types_host_visible; + uint32_t host_cached = provider.device_info().memory_types_host_cached; uint32_t memory_type; // For upload, uncached is preferred so writes do not pollute the CPU cache. // For readback, cached is preferred so multiple CPU reads are fast. @@ -107,12 +107,12 @@ void FlushMappedMemoryRange(const VulkanProvider& provider, VkDeviceSize size = VK_WHOLE_SIZE); inline VkExtent2D GetMax2DFramebufferExtent(const VulkanProvider& provider) { - const VkPhysicalDeviceLimits& limits = provider.device_properties().limits; + const VulkanProvider::DeviceInfo& device_info = provider.device_info(); VkExtent2D max_extent; - max_extent.width = - std::min(limits.maxFramebufferWidth, limits.maxImageDimension2D); - max_extent.height = - std::min(limits.maxFramebufferHeight, limits.maxImageDimension2D); + max_extent.width = std::min(device_info.maxFramebufferWidth, + device_info.maxImageDimension2D); + max_extent.height = std::min(device_info.maxFramebufferHeight, + device_info.maxImageDimension2D); return max_extent; } From a90f83d44c47abfae91e2598a07a0eee0355fa68 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 5 May 2024 15:15:48 +0300 Subject: [PATCH 4/4] [Vulkan] Non-seamless cube map filtering --- src/xenia/gpu/gpu_flags.cc | 7 +++++++ src/xenia/gpu/gpu_flags.h | 2 ++ src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 9 ++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/gpu_flags.cc b/src/xenia/gpu/gpu_flags.cc index 1510eeec0..1b9bd065d 100644 --- a/src/xenia/gpu/gpu_flags.cc +++ b/src/xenia/gpu/gpu_flags.cc @@ -28,6 +28,13 @@ DEFINE_bool( "the real reason why they're invalid is found.", "GPU"); +DEFINE_bool( + non_seamless_cube_map, true, + "Disable filtering between cube map faces near edges where possible " + "(Vulkan with VK_EXT_non_seamless_cube_map) to reproduce the Direct3D 9 " + "behavior.", + "GPU"); + // Extremely bright screen borders in 4D5307E6. // Reading between texels with half-pixel offset in 58410954. DEFINE_bool( diff --git a/src/xenia/gpu/gpu_flags.h b/src/xenia/gpu/gpu_flags.h index 5ae64b76e..ecdc73ca7 100644 --- a/src/xenia/gpu/gpu_flags.h +++ b/src/xenia/gpu/gpu_flags.h @@ -20,6 +20,8 @@ DECLARE_bool(vsync); DECLARE_bool(gpu_allow_invalid_fetch_constants); +DECLARE_bool(non_seamless_cube_map); + DECLARE_bool(half_pixel_offset); DECLARE_int32(query_occlusion_fake_sample_count); diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index e056c606c..bff490b9d 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -17,6 +17,7 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/texture_util.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" @@ -760,9 +761,11 @@ VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters, // GetSamplerParameters. VkSamplerCreateInfo sampler_create_info = {}; sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - // TODO(Triang3l): VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT if - // VK_EXT_non_seamless_cube_map and the nonSeamlessCubeMap feature are - // supported. + if (provider.device_info().nonSeamlessCubeMap && + cvars::non_seamless_cube_map) { + sampler_create_info.flags |= + VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT; + } sampler_create_info.magFilter = parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; sampler_create_info.minFilter =