From fad11e353261f923eb97a4ca324e0f574d3906b5 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Sat, 13 Nov 2021 19:06:00 -0800 Subject: [PATCH 01/12] MoltenVK: Add patch to turn on framebuffer fetch in SPIRV-Cross --- Externals/MoltenVK/CMakeLists.txt | 2 ++ Externals/MoltenVK/patch.sh | 13 ++++++++++ ...ter-Enable-use_framebuffer_fetch_sub.patch | 24 +++++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100755 Externals/MoltenVK/patch.sh create mode 100644 Externals/MoltenVK/patches/0001-SPIRVToMSLConverter-Enable-use_framebuffer_fetch_sub.patch diff --git a/Externals/MoltenVK/CMakeLists.txt b/Externals/MoltenVK/CMakeLists.txt index 5202e95970..fdc53313bf 100644 --- a/Externals/MoltenVK/CMakeLists.txt +++ b/Externals/MoltenVK/CMakeLists.txt @@ -8,6 +8,8 @@ ExternalProject_Add(MoltenVK CONFIGURE_COMMAND ${CMAKE_CURRENT_LIST_DIR}/configure.sh ${MOLTENVK_VERSION} + PATCH_COMMAND ${CMAKE_CURRENT_LIST_DIR}/patch.sh ${CMAKE_SOURCE_DIR}/Externals/MoltenVK/patches/ ${MOLTENVK_VERSION} + BUILD_COMMAND make -C macos BUILD_IN_SOURCE ON BUILD_BYPRODUCTS /Package/Release/MoltenVK/dylib/macOS/libMoltenVK.dylib diff --git a/Externals/MoltenVK/patch.sh b/Externals/MoltenVK/patch.sh new file mode 100755 index 0000000000..2a40d7eda9 --- /dev/null +++ b/Externals/MoltenVK/patch.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Applies all patches in the "patches" folder to the cloned MoltenVK git repository. +# +# Usage: patch.sh +# + +set -e + +# Reset the git repository first to ensure that it's in the base state. +git reset --hard $2 + +git apply $1/*.patch diff --git a/Externals/MoltenVK/patches/0001-SPIRVToMSLConverter-Enable-use_framebuffer_fetch_sub.patch b/Externals/MoltenVK/patches/0001-SPIRVToMSLConverter-Enable-use_framebuffer_fetch_sub.patch new file mode 100644 index 0000000000..4cd0c3d8f7 --- /dev/null +++ b/Externals/MoltenVK/patches/0001-SPIRVToMSLConverter-Enable-use_framebuffer_fetch_sub.patch @@ -0,0 +1,24 @@ +From 4ca33b7a9b149c6fbcc1c88ce08fc49f21294f6d Mon Sep 17 00:00:00 2001 +From: OatmealDome +Date: Sat, 31 Jul 2021 19:18:35 -0400 +Subject: [PATCH] SPIRVToMSLConverter: Enable use_framebuffer_fetch_subpasses + +--- + .../MoltenVKShaderConverter/SPIRVToMSLConverter.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp +index 17c79394..97e98004 100644 +--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp ++++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp +@@ -92,6 +92,7 @@ MVK_PUBLIC_SYMBOL SPIRVToMSLConversionOptions::SPIRVToMSLConversionOptions() { + #endif + + mslOptions.pad_fragment_output_components = true; ++ mslOptions.use_framebuffer_fetch_subpasses = true; + } + + MVK_PUBLIC_SYMBOL bool mvk::MSLShaderInput::matches(const mvk::MSLShaderInput& other) const { +-- +2.30.1 (Apple Git-130) + From 8e72136eebb95daea111d1f7b8e5a348194464bb Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 22:31:22 -0400 Subject: [PATCH 02/12] VulkanContext: Set Apple GPUs as supporting framebuffer fetch --- Source/Core/VideoBackends/Vulkan/VulkanContext.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index a571c6c38d..12c6082fab 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -285,7 +285,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsBPTCTextures = false; // Dependent on features. config->backend_info.bSupportsLogicOp = false; // Dependent on features. config->backend_info.bSupportsLargePoints = false; // Dependent on features. - config->backend_info.bSupportsFramebufferFetch = false; // No support. + config->backend_info.bSupportsFramebufferFetch = false; // Dependent on OS and features. config->backend_info.bSupportsCoarseDerivatives = true; // Assumed support. config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support. } @@ -339,6 +339,15 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD properties.limits.pointSizeRange[0] <= 1.0f && properties.limits.pointSizeRange[1] >= 16; + std::string device_name = properties.deviceName; + u32 vendor_id = properties.vendorID; + + // Only Apple family GPUs support framebuffer fetch. + if (vendor_id == 0x106B || device_name.find("Apple") != std::string::npos) + { + config->backend_info.bSupportsFramebufferFetch = true; + } + // Our usage of primitive restart appears to be broken on AMD's binary drivers. // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4. if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART)) From 04ec02c06bccdcc575765da70378e50b0b671f2c Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 23:39:34 -0400 Subject: [PATCH 03/12] ConstantManager: Add logic ops to PixelShaderConstants --- Source/Core/VideoCommon/ConstantManager.h | 4 ++++ Source/Core/VideoCommon/PixelShaderGen.cpp | 2 ++ Source/Core/VideoCommon/PixelShaderManager.cpp | 10 ++++++++++ 3 files changed, 16 insertions(+) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 7144342503..5335af963a 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -15,6 +15,7 @@ using int4 = std::array; enum class DstBlendFactor : u32; enum class SrcBlendFactor : u32; enum class ZTexOp : u32; +enum class LogicOp : u32; struct PixelShaderConstants { @@ -54,6 +55,9 @@ struct PixelShaderConstants DstBlendFactor blend_dst_factor_alpha; u32 blend_subtract; u32 blend_subtract_alpha; + // For shader_framebuffer_fetch logic ops: + u32 logic_op_enable; // bool + LogicOp logic_op_mode; }; struct VertexShaderConstants diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6261f57812..b1362d73f3 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -423,6 +423,8 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, "\tuint blend_dst_factor_alpha;\n" "\tbool blend_subtract;\n" "\tbool blend_subtract_alpha;\n" + "\tbool logic_op_enable;\n" + "\tuint logic_op_mode;\n" "}};\n\n"); out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n" "#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n" diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index f63722c9c1..675bcceca5 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -510,6 +510,16 @@ void PixelShaderManager::SetBlendModeChanged() constants.blend_subtract_alpha = state.subtractAlpha; dirty = true; } + if (constants.logic_op_enable != state.logicopenable) + { + constants.logic_op_enable = state.logicopenable; + dirty = true; + } + if (constants.logic_op_mode != state.logicmode) + { + constants.logic_op_mode = state.logicmode; + dirty = true; + } s_bDestAlphaDirty = true; } From c12b9b013b4219fc712380284e11e6f53c65513a Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 23:40:10 -0400 Subject: [PATCH 04/12] PixelShaderGen: Add logic ops to pixel_shader_uid_data --- Source/Core/VideoCommon/GXPipelineTypes.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 3 +++ Source/Core/VideoCommon/PixelShaderGen.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/GXPipelineTypes.h b/Source/Core/VideoCommon/GXPipelineTypes.h index 231bddefc1..ca3fb89701 100644 --- a/Source/Core/VideoCommon/GXPipelineTypes.h +++ b/Source/Core/VideoCommon/GXPipelineTypes.h @@ -19,7 +19,7 @@ namespace VideoCommon // As pipelines encompass both shader UIDs and render states, changes to either of these should // also increment the pipeline UID version. Incrementing the UID version will cause all UID // caches to be invalidated. -constexpr u32 GX_PIPELINE_UID_VERSION = 3; // Last changed in PR 9532 +constexpr u32 GX_PIPELINE_UID_VERSION = 4; // Last changed in PR 10215 struct GXPipelineUid { diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b1362d73f3..ca585f1c23 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -331,6 +331,9 @@ PixelShaderUid GetPixelShaderUid() uid_data->blend_subtract_alpha = state.subtractAlpha; } + uid_data->logic_op_enable = state.logicopenable; + uid_data->logic_op_mode = u32(state.logicmode.Value()); + return out; } diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index adf996044c..6a7a638ac3 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -59,6 +59,8 @@ struct pixel_shader_uid_data DstBlendFactor blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend + u32 logic_op_enable : 1; // Only used with shader_framebuffer_fetch logic ops + u32 logic_op_mode : 4; // Only used with shader_framebuffer_fetch logic ops u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; From 40eb071562a56acda985cc733b8ac985300c5f22 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Fri, 30 Jul 2021 04:09:22 -0400 Subject: [PATCH 05/12] ShaderCompiler: Add new helper define for input attachment binding --- Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp index a06cab771e..8ec105ceeb 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -50,6 +50,7 @@ static const char SHADER_HEADER[] = R"( #define SAMPLER_BINDING(x) layout(set = 1, binding = x) #define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8)) #define SSBO_BINDING(x) layout(set = 2, binding = x) + #define INPUT_ATTACHMENT_BINDING(x, y, z) layout(set = x, binding = y, input_attachment_index = z) #define VARYING_LOCATION(x) layout(location = x) #define FORCE_EARLY_Z layout(early_fragment_tests) in From f87f704f43a18c8ff6eeeebc44ece07f02985c98 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 23:40:59 -0400 Subject: [PATCH 06/12] ShaderCompiler: Add helpers for Metal framebuffer fetch --- Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp index 8ec105ceeb..1fb61331bb 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -54,6 +54,9 @@ static const char SHADER_HEADER[] = R"( #define VARYING_LOCATION(x) layout(location = x) #define FORCE_EARLY_Z layout(early_fragment_tests) in + // Metal framebuffer fetch helpers. + #define FB_FETCH_VALUE subpassLoad(in_ocol0) + // hlsl to glsl function translation #define API_VULKAN 1 #define float2 vec2 From 426c68b5a00a8de18975739de2bdb6575428669b Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 23:41:26 -0400 Subject: [PATCH 07/12] ShaderCache: Don't turn on logic ops approximation if framebuffer fetch is supported --- Source/Core/VideoCommon/ShaderCache.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 8b1196cabc..9b2ae56f0b 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -585,7 +585,9 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( config.blending_state = blending_state; config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState(); - if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp) + // We can use framebuffer fetch to emulate logic ops in the fragment shader. + if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp && + !g_ActiveConfig.backend_info.bSupportsFramebufferFetch) { WARN_LOG_FMT(VIDEO, "Approximating logic op with blending, this will produce incorrect rendering."); From e0837cb847ddbac93c26a0b7941844ae7cabfec7 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 23:41:37 -0400 Subject: [PATCH 08/12] PixelShaderGen: Add shader logic ops support on Metal --- Source/Core/VideoCommon/PixelShaderGen.cpp | 81 +++++++++++++++++++++- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index ca585f1c23..e3e5e55d28 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -842,6 +842,7 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type, bool per_pixel_depth, bool use_dual_source); static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); +static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data); static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, bool use_dual_source); static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data); @@ -930,10 +931,48 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos uid_data->useDstAlpha); const bool use_shader_blend = !use_dual_source && (uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch); + const bool use_shader_logic_op = +#ifdef __APPLE__ + !host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch; +#else + false; +#endif if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { + bool use_framebuffer_fetch = use_shader_blend || use_shader_logic_op; + +#ifdef __APPLE__ + // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) + // if we want to use it. + if (api_type == APIType::Vulkan) + { + if (use_dual_source) + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); + } + else if (use_shader_blend) + { + // Metal doesn't support a single unified variable for both input and output, so we declare + // the output separately. The input will be defined later below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 real_ocol0;\n"); + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } + + if (use_framebuffer_fetch) + { + // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. + out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); + } + } + else if (use_dual_source) +#else if (use_dual_source) +#endif { if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) { @@ -1009,11 +1048,16 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("void main()\n{{\n"); out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); - if (use_shader_blend) + + if (use_framebuffer_fetch) { // Store off a copy of the initial fb value for blending - out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" - "\tfloat4 ocol0;\n" + out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"); + } + + if (use_shader_blend) + { + out.Write("\tfloat4 ocol0;\n" "\tfloat4 ocol1;\n"); } } @@ -1264,6 +1308,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos WriteFog(out, uid_data); + if (use_shader_logic_op) + WriteLogicOp(out, uid_data); + // Write the color and alpha values to the framebuffer // If using shader blend, we still use the separate alpha WriteColor(out, api_type, uid_data, use_dual_source || use_shader_blend); @@ -1883,6 +1930,34 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); } +static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data) +{ + if (uid_data->logic_op_enable) + { + static constexpr std::array logic_op_mode{ + "int4(0, 0, 0, 0)", // CLEAR + "prev & fb_value", // AND + "prev & ~fb_value", // AND_REVERSE + "prev", // COPY + "~prev & fb_value", // AND_INVERTED + "fb_value", // NOOP + "prev ^ fb_value", // XOR + "prev | fb_value", // OR + "~(prev | fb_value)", // NOR + "~(prev ^ fb_value)", // EQUIV + "~fb_value", // INVERT + "prev | ~fb_value", // OR_REVERSE + "~prev", // COPY_INVERTED + "~prev | fb_value", // OR_INVERTED + "~(prev & fb_value)", // NAND + "int4(255, 255, 255, 255)", // SET + }; + + out.Write("\tint4 fb_value = iround(initial_ocol0 * 255.0);\n"); + out.Write("\tprev = {};\n", logic_op_mode[uid_data->logic_op_mode]); + } +} + static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, bool use_dual_source) { From a77ae14d94767fe5398795837e39361471674390 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Mon, 2 Aug 2021 23:41:46 -0400 Subject: [PATCH 09/12] UberShaderPixel: Add shader logic ops support on Metal --- Source/Core/VideoCommon/UberShaderPixel.cpp | 82 ++++++++++++++++++++- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 36fc6addc1..62979e3fee 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -55,6 +55,13 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, const bool stereo = host_config.stereo; const bool use_dual_source = host_config.backend_dual_source_blend; const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; + const bool use_shader_logic_op = +#ifdef __APPLE__ + !host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch; +#else + false; +#endif + const bool use_framebuffer_fetch = use_shader_blend || use_shader_logic_op; const bool early_depth = uid_data->early_depth != 0; const bool per_pixel_depth = uid_data->per_pixel_depth != 0; const bool bounding_box = host_config.bounding_box; @@ -71,7 +78,37 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // Shader inputs/outputs in GLSL (HLSL is in main). if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { +#ifdef __APPLE__ + // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) + // if we want to use it. + if (api_type == APIType::Vulkan) + { + if (use_dual_source) + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); + } + else if (use_shader_blend) + { + // Metal doesn't support a single unified variable for both input and output, so we declare + // the output separately. The input will be defined later below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 real_ocol0;\n"); + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } + + if (use_framebuffer_fetch) + { + // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. + out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); + } + } + else if (use_dual_source) +#else if (use_dual_source) +#endif { if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) { @@ -679,11 +716,16 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("void main()\n{{\n"); out.Write(" float4 rawpos = gl_FragCoord;\n"); - if (use_shader_blend) + + if (use_framebuffer_fetch) { // Store off a copy of the initial fb value for blending - out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n" - " float4 ocol0;\n" + out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n"); + } + + if (use_shader_blend) + { + out.Write(" float4 ocol0;\n" " float4 ocol1;\n"); } } @@ -1243,6 +1285,40 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " }}\n" "\n"); + if (use_shader_logic_op) + { + static constexpr std::array logic_op_mode{ + "int4(0, 0, 0, 0)", // CLEAR + "TevResult & fb_value", // AND + "TevResult & ~fb_value", // AND_REVERSE + "TevResult", // COPY + "~TevResult & fb_value", // AND_INVERTED + "fb_value", // NOOP + "TevResult ^ fb_value", // XOR + "TevResult | fb_value", // OR + "~(TevResult | fb_value)", // NOR + "~(TevResult ^ fb_value)", // EQUIV + "~fb_value", // INVERT + "TevResult | ~fb_value", // OR_REVERSE + "~TevResult", // COPY_INVERTED + "~TevResult | fb_value", // OR_INVERTED + "~(TevResult & fb_value)", // NAND + "int4(255, 255, 255, 255)", // SET + }; + + out.Write(" // Logic Ops\n" + " if (logic_op_enable) {{\n" + " int4 fb_value = iround(initial_ocol0 * 255.0);" + " switch (logic_op_mode) {{\n"); + for (size_t i = 0; i < logic_op_mode.size(); i++) + { + out.Write(" case {}u: TevResult = {}; break;\n", i, logic_op_mode[i]); + } + + out.Write(" }}\n" + " }}\n"); + } + // D3D requires that the shader outputs be uint when writing to a uint render target for logic op. if (api_type == APIType::D3D && uid_data->uint_output) { From 18b2f6953dfdde2bedf6122a361d307e329bae75 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Sun, 14 Nov 2021 00:25:46 -0800 Subject: [PATCH 10/12] PixelShaderGen: Add shader logic ops support on OpenGL ES To do this, I had to decouple framebuffer fetch from shader blending. We need to be able to access framebuffer fetch input when using shader logic ops. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 64 ++++++++++------------ 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index e3e5e55d28..270e3650f5 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -932,11 +932,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos const bool use_shader_blend = !use_dual_source && (uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch); const bool use_shader_logic_op = -#ifdef __APPLE__ !host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch; -#else - false; -#endif if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { @@ -969,40 +965,24 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); } } - else if (use_dual_source) -#else - if (use_dual_source) + else #endif { - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) + bool has_broken_decoration = + DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION); + + out.Write("{} {} vec4 {};\n", + has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", + use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", + use_shader_blend ? "real_ocol0" : "ocol0"); + + if (use_dual_source) { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); + out.Write("{} out vec4 ocol1;\n", has_broken_decoration ? + "FRAGMENT_OUTPUT_LOCATION(1)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)"); } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); - } - } - else if (use_shader_blend) - { - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so pull out the "real" output value - // and use a temporary for calculations, then set the output value once at the end of the - // shader - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) - { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); - } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); - } - } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); } if (uid_data->per_pixel_depth) @@ -1051,12 +1031,24 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (use_framebuffer_fetch) { - // Store off a copy of the initial fb value for blending - out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"); + // Store off a copy of the initial framebuffer value. + // + // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the + // framebuffer), we read from real_ocol0 or ocol0, depending if shader blending is enabled. + out.Write("#ifdef FB_FETCH_VALUE\n" + "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" + "#else\n" + "\tfloat4 initial_ocol0 = {};\n" + "#endif\n", + use_shader_blend ? "real_ocol0" : "ocol0"); } if (use_shader_blend) { + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader if we are using shader blending. out.Write("\tfloat4 ocol0;\n" "\tfloat4 ocol1;\n"); } From 74a979db097fb540aef175505f622f00137cda7c Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Sun, 14 Nov 2021 00:30:01 -0800 Subject: [PATCH 11/12] UberShaderPixel: Add shader logic ops support on OpenGL ES --- Source/Core/VideoCommon/UberShaderPixel.cpp | 64 +++++++++------------ 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 62979e3fee..97bab4385b 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -56,11 +56,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, const bool use_dual_source = host_config.backend_dual_source_blend; const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; const bool use_shader_logic_op = -#ifdef __APPLE__ !host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch; -#else - false; -#endif const bool use_framebuffer_fetch = use_shader_blend || use_shader_logic_op; const bool early_depth = uid_data->early_depth != 0; const bool per_pixel_depth = uid_data->per_pixel_depth != 0; @@ -105,40 +101,24 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); } } - else if (use_dual_source) -#else - if (use_dual_source) + else #endif { - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) + bool has_broken_decoration = + DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION); + + out.Write("{} {} vec4 {};\n", + has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", + use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", + use_shader_blend ? "real_ocol0" : "ocol0"); + + if (use_dual_source) { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); + out.Write("{} out vec4 ocol1;\n", has_broken_decoration ? + "FRAGMENT_OUTPUT_LOCATION(1)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)"); } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); - } - } - else if (use_shader_blend) - { - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so pull out the "real" output value - // and use a temporary for calculations, then set the output value once at the end of the - // shader - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) - { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); - } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); - } - } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); } if (per_pixel_depth) @@ -719,12 +699,24 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, if (use_framebuffer_fetch) { - // Store off a copy of the initial fb value for blending - out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n"); + // Store off a copy of the initial framebuffer value. + // + // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the + // framebuffer), we read from real_ocol0 or ocol0, depending if shader blending is enabled. + out.Write("#ifdef FB_FETCH_VALUE\n" + " float4 initial_ocol0 = FB_FETCH_VALUE;\n" + "#else\n" + " float4 initial_ocol0 = {};\n" + "#endif\n", + use_shader_blend ? "real_ocol0" : "ocol0"); } if (use_shader_blend) { + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader if we are using shader blending. out.Write(" float4 ocol0;\n" " float4 ocol1;\n"); } From 1c421444aed7673c30429f8b92e8ac3492cfb5b4 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Sun, 14 Nov 2021 00:37:07 -0800 Subject: [PATCH 12/12] ProgramShaderCache: Don't define FB_FETCH_VALUE for GL_EXT_shader_framebuffer_fetch We will automatically choose between real_ocol0 and ocol0 in the fragment shader. --- Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 492d4b956c..1413813cda 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -692,7 +692,6 @@ void ProgramShaderCache::CreateHeader() { case EsFbFetchType::FbFetchExt: framebuffer_fetch_string = "#extension GL_EXT_shader_framebuffer_fetch: enable\n" - "#define FB_FETCH_VALUE real_ocol0\n" "#define FRAGMENT_INOUT inout"; break; case EsFbFetchType::FbFetchArm: