Changes - SailfishOS Open Build Service

Changes of Revision 43

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

[-] [+]	Changed	_service:tar_git:mesa.spec
@@ -16,7 +16,7 @@ Name: mesa Summary: Mesa graphics libraries -Version: 21.3.0+git2 +Version: 21.3.9+git1 Release: 0 Group: System/Libraries License: MIT @@ -168,12 +168,12 @@ %setup -q -n %{name}-%{version}/mesa %build -%meson -Ddri-drivers=%{?with_intel:i915,i965} \ +%meson i-Ddri-drivers=%{?with_intel:i915,i965} \ -Dosmesa=false \ -Ddri3=false \ -Dllvm=false \ -Dshared-llvm=false \ - -Dgallium-drivers=swrast,nouveau,virgl%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro}%{?with_lima:,lima}%{?with_panfrost:,panfrost} \ + -Dgallium-drivers=swrast,nouveau,virgl%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro}%{?with_lima:,lima}%{?with_panfrost:,panfrost}\ -Dvulkan-drivers= \ -Dplatforms=wayland \ -Dglx=disabled \
[-] [+]	Changed	_service ^
@@ -2,7 +2,7 @@ <service name="tar_git"> <param name="url">https://github.com/sailfish-on-dontbeevil/mesa</param> <param name="branch">master</param> - <param name="revision">21.3.0+git2</param> + <param name="revision">21.3.9+git1</param> <param name="token"/> <param name="debian">N</param> <param name="dumb">N</param>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/.gitmodules ^
@@ -1,3 +1,3 @@ [submodule "mesa"] path = mesa - url = https://github.com/sailfishos-mirror/mesa.git + url = https://github.com/sailfish-on-dontbeevil/mesa-1
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/.gitlab-ci.yml ^
@@ -102,7 +102,7 @@ - .ci-run-policy script: - apk --no-cache add graphviz doxygen - - pip3 install "sphinx<4.0" breathe mako sphinx_rtd_theme + - pip3 install sphinx breathe mako sphinx_rtd_theme - docs/doxygen-wrapper.py --out-dir=docs/doxygen_xml - sphinx-build -W -b html docs public
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/.gitlab-ci/all-skips.txt ^
@@ -6,9 +6,6 @@ # reliable to be run in parallel with other tests due to CPU-side timing. dEQP-GLES[0-9].functional.flush_finish. -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4575 -dEQP-VK.wsi.display.get_display_plane_capabilities - # piglit: WGL is Windows-only wgl@.*
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/.pick_status.json ^
@@ -1,5 +1,36851 @@ [ { + "sha": "fc381fa1e341bc9cf71f53a915dc47926a3f621f", + "description": "tu: Actually expose VK_EXT_texel_buffer_alignment", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "main_sha": null, + "because_sha": "3d04c435766a1dc4946a2c5276e0116a5fbb67cd" + }, + { + "sha": "f18429340e05a6559db9f9127595ab07fe807a28", + "description": "lavapipe: Lift fence check into dedicated function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "4f6c7a6025628fe14e06ac864986ab95bdd25457", + "description": "radv: Don't hash ycbcr sampler base object.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null + }, + { + "sha": "012bfde7f3412018f75e335ee9d42c90c7ba3d3e", + "description": "panvk: Hook up emulated secondary command buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "18fced0226dfba4bdfbaf9139aafb97dcea9b85c", + "description": "panvk: Refcount the descriptor set and pipeline layouts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "df92f56d8d0b89092f0984baf165761a4daf287c", + "description": "vulkan/runtime: Add emulated secondary command buffer support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "25542f12d764f7fd6d5ade4da760d09d0d93cf43", + "description": "vulkan/cmd_queue: Fix the allocation scope", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "1437ee749b8a9fee05019582f570b167c0711116", + "description": "vulkan/cmd_queue: Track allocation errors in vk_cmd_queue", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "6cb95877b57d79037e43fff22e855e600bcf4c80", + "description": "vulkan/cmd_queue: Auto-generate more vk_cmd_enqueue_unless_primary_Cmd*", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "3cffffc4415937556be5f94f3edbf23100c199c3", + "description": "vulkan/cmd_queue: Generate enqueue_if_not_primary entrypoints", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "8f29c833da0201271d532f9ea3badbe81a9d077b", + "description": "vulkan/cmd_queue: Add a vk_cmd_queue_execute() helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "e0910f5ef83a5c68099c0179468a7ee2a970cf47", + "description": "Revert \"features: fix some vk extension listings\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "main_sha": null, + "because_sha": "a3e9388953f60d22c188d0e40bb4187be9048800" + }, + { + "sha": "68fe847a2653b89992ed0b5e1f64e54bafc07d22", + "description": "lavapipe: Drop GetPhysicalDeviceQueueFamilyProperties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "dc8fdab71efdc3e516f2a8abdfdc2dec05fce448", + "description": "lavapipe: Use VK_OUTARRAY for GetPhysicalDeviceQueueFamilyProperties[2]", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "main_sha": null, + "because_sha": "b38879f8c5f57b7f1802e433e33181bdf5e72aef" + }, + { + "sha": "91cb714dc12dd1a669177aec20151ce1504d4aac", + "description": "panvk: Drop GetPhysicalDeviceQueueFamilyProperties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "19f56e3fc47ad4753396ea0831878ad94add85ab", + "description": "v3dv: Drop GetPhysicalDeviceQueueFamilyProperties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "2a779f98dc6741ba87def3a54999e2311e5ab489", + "description": "turnip: Drop tu_legacy.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "205bf5d9cbab97bc29b319685d69947b5ccb8bb6", + "description": "radv: Drop GetPhysicalDeviceQueueFamilyProperties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "8d7cbe026e344cfb009b0fc88352c8c10388de14", + "description": "anv: Drop GetPhysicalDeviceQueueFamilyProperties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "25664c6194850280d375d58cec75e8af8346c25d", + "description": "vulkan: Add a 2 wrapper for vkGetPhysicalDeviceQueueFamilyProperties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "a3e9388953f60d22c188d0e40bb4187be9048800", + "description": "features: fix some vk extension listings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "main_sha": null, + "because_sha": null + }, + { + "sha": "cdaa3a899cf08858274a79d4c806f26881bead31", + "description": "anv: Use layerCount for clears and transitions in BeginRendering", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "main_sha": null, + "because_sha": "3501a3f9ed92831ed039f0d54bf295af41ed0195" + },
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/VERSION ^
@@ -1 +1 @@ -21.3.0 +21.3.9
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/android/mesa3d_cross.mk ^
@@ -88,11 +88,12 @@ -Ddri-search-path=/vendor/$(MESA3D_LIB_DIR)/dri \ -Dplatforms=android \ -Dplatform-sdk-version=$(PLATFORM_SDK_VERSION) \ - -Ddri-drivers= \ + -Ddri-drivers=$(subst $(space),$(comma),$(BOARD_MESA3D_CLASSIC_DRIVERS)) \ -Dgallium-drivers=$(subst $(space),$(comma),$(BOARD_MESA3D_GALLIUM_DRIVERS)) \ -Dvulkan-drivers=$(subst $(space),$(comma),$(subst radeon,amd,$(BOARD_MESA3D_VULKAN_DRIVERS))) \ -Dgbm=enabled \ -Degl=enabled \ + -Dcpp_rtti=false \ MESON_BUILD := PATH=/usr/bin:/bin:/sbin:$$PATH ninja -C $(MESON_OUT_DIR)/build @@ -128,7 +129,6 @@ $(MESON_GEN_FILES_TARGET): PRIVATE_IMPORTED_INCLUDES := $(imported_includes) $(MESON_GEN_FILES_TARGET): PRIVATE_LDFLAGS := $(my_ldflags) $(MESON_GEN_FILES_TARGET): PRIVATE_LDLIBS := $(my_ldlibs) -$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_LDFLAGS := $(my_target_global_ldflags) $(MESON_GEN_FILES_TARGET): PRIVATE_TIDY_CHECKS := $(my_tidy_checks) $(MESON_GEN_FILES_TARGET): PRIVATE_TIDY_FLAGS := $(my_tidy_flags) $(MESON_GEN_FILES_TARGET): PRIVATE_ARFLAGS := $(my_arflags) @@ -139,6 +139,11 @@ $(MESON_GEN_FILES_TARGET): PRIVATE_ARM_CFLAGS := $(normal_objects_cflags) +$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_CFLAGS := $(my_target_global_cflags) +$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_CONLYFLAGS := $(my_target_global_conlyflags) +$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_CPPFLAGS := $(my_target_global_cppflags) +$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_LDFLAGS := $(my_target_global_ldflags) + $(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_LIBCRT_BUILTINS := $(my_target_libcrt_builtins) $(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_LIBATOMIC := $(my_target_libatomic) $(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_CRTBEGIN_SO_O := $(my_target_crtbegin_so_o) @@ -284,13 +289,17 @@ $(foreach driver,$(BOARD_MESA3D_VULKAN_DRIVERS), $(eval $(call vulkan_target,$(driver)))) -$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.symlinks.timestamp: MESA3D_GALLIUM_DRI_DIR:=$(MESA3D_GALLIUM_DRI_DIR) -$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.symlinks.timestamp: $(MESON_OUT_DIR)/install/.install.timestamp - # Create Symlinks +$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.targets.timestamp: MESA3D_GALLIUM_DRI_DIR:=$(MESA3D_GALLIUM_DRI_DIR) +$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.targets.timestamp: $(MESON_OUT_DIR)/install/.install.timestamp mkdir -p $(dir $@) + # Create Symlinks for gallium and kmsro drivers ls -1 $(MESA3D_GALLIUM_DRI_DIR)/ \| PATH=/usr/bin:$$PATH xargs -I{} ln -s -f libgallium_dri.so $(dir $@)/{} + # Remove unwanted Symlinks created for classic dri drivers + $(foreach d,$(BOARD_MESA3D_CLASSIC_DRIVERS), rm $(dir $@)/$(d)_dri.so;) + # Copy classic dri drivers + $(foreach d,$(BOARD_MESA3D_CLASSIC_DRIVERS), cp $(MESA3D_GALLIUM_DRI_DIR)/$(d)_dri.so $(dir $@)/$(d)_dri.so;) touch $@ -$($(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN): $(TARGET_OUT_VENDOR)/$(MESA3D_LIB_DIR)/dri/.symlinks.timestamp +$($(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN): $(TARGET_OUT_VENDOR)/$(MESA3D_LIB_DIR)/dri/.targets.timestamp echo "Build $@" touch $@
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/drivers/zink.rst ^
@@ -129,11 +129,17 @@ OpenGL 4.1 ^^^^^^^^^^ -For OpenGL 4.1 support, the following additional ``VkPhysicalDeviceLimits`` -are required: +For OpenGL 4.1 support, the following additional requirements needs to be +supported: + +* ``VkPhysicalDeviceFeatures``: -* ``maxImageDimension2D`` ≥ 16384 -* ``maxViewports`` ≥ 16 + * ``multiViewport`` + +* ``VkPhysicalDeviceLimits`` + + * ``maxImageDimension2D`` ≥ 16384 + * ``maxViewports`` ≥ 16 OpenGL 4.2 ^^^^^^^^^^ @@ -177,7 +183,6 @@ * ``VkPhysicalDeviceFeatures``: * ``robustBufferAccess`` - * ``multiViewport`` * Formats requiring ``VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT``:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/features.txt ^
@@ -331,7 +331,7 @@ GL_EXT_texture_norm16 DONE (freedreno, i965, r600, radeonsi, nvc0i, softpipe, zink) GL_EXT_texture_sRGB_R8 DONE (all drivers that support GLES 3.0+) GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+, panfrost) - GL_KHR_texture_compression_astc_hdr DONE (i965/bxt, panfrost) + GL_KHR_texture_compression_astc_hdr DONE (core only) GL_KHR_texture_compression_astc_sliced_3d DONE (i965/gen9+, r600, radeonsi, panfrost, softpipe, swr, zink, lima) GL_OES_depth_texture_cube_map DONE (all drivers that support GLSL 1.30+) GL_OES_EGL_image DONE (all drivers)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.0.rst ^
@@ -21,7 +21,7 @@ :: - TBD. + a2753c09deef0ba14d35ae8a2ceff3fe5cd13698928c7bb62c2ec8736eb09ce1 mesa-21.3.0.tar.xz New features
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.1.rst ^
@@ -0,0 +1,132 @@ +Mesa 21.3.1 Release Notes / 2021-12-01 +====================================== + +Mesa 21.3.1 is a bug fix release which fixes bugs found since the 21.3.0 release. + +Mesa 21.3.1 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.1 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- GPU Crash in Yuzu 6600xt 5.15 +- [spirv-fuzz] lower_trivial_continues_block: Assertion \`!first_instr \|\| instr_is_continue(first_instr)' failed. +- [RADV] Crash in Metro Exodus in Caspain chapter and Sam's Story +- NIR validation failed after nir_copy_prop +- lima: Corrupted Android-12 UI on Allwinner A64 +- RADV/ACO: Rendering glitches in Forza Horizon 5 windshields +- dEQP-GLES31.imulextended compiling fp64 glsl 4.00 shader + + +Changes +------- + +Connor Abbott (3): + +- ir3/ra: Consider reg file size when swapping killed sources +- ir3/lower_pcopy: Fix shr.b illegal copy lowering +- ir3/lower_pcopy: Fix bug with "illegal" copies and swaps + +Eric Engestrom (2): + +- docs: update sha256sum for 21.3.0 +- .pick_status.json: Update to 1ba231fb75be5bffd806cbd09ac285d1f8f15e3d + +Erico Nunes (1): + +- ci: temporarily disable lima CI + +Iago Toral Quiroga (3): + +- broadcom/compiler: don't allow RF writes from signals after thrend +- broadcom/compiler: fix scoreboard locking checks +- broadcom/compiler: don't move ldvary earlier if current instruction has ldunif + +Ian Romanick (1): + +- glsl/nir: Don't build soft float64 when it cannot be used + +Iván Briano (1): + +- intel/nir: also allow unknown format for getting the size of a storage image + +Kenneth Graunke (3): + +- iris: Make a helper function for cross-batch dependency flushing +- iris: Check for cross-batch flushing whenever a buffer is newly written. +- iris: Tidy code in iris_use_pinned_bo a bit + +Lionel Landwerlin (3): + +- anv: don't try to close fd = -1 +- intel/fs: fix shader call lowering pass +- util/u_trace: refcount payloads + +Mauro Rossi (1): + +- android: define cpp_rtti=false because libLLVM is built w/o RTTI (v2) + +Mike Blumenkrantz (6): + +- zink: block suballocator caching for swapchain/dmabuf images +- zink: set suballocator bo size to aligned allocation size +- zink: stop using VK_IMAGE_LAYOUT_PREINITIALIZED for dmabuf +- zink: always set matching resource export type for dmabuf creation +- zink: fix memory availability reporting +- zink: fail context creation more gracefully + +Mykhailo Skorokhodov (1): + +- nir: Fix read depth for predecessors + +Qiang Yu (1): + +- glx/dri3: fix glXQueryContext does not return GLX_RENDER_TYPE value + +Rhys Perry (4): + +- aco/spill: use spills_entry instead of spills_exit to kill linear VGPRs +- spirv: run nir_copy_prop before nir_rematerialize_derefs_in_use_blocks_impl +- nir/dce: fix DCE of loops with a halt or return instruction in the pre-header +- aco: don't create DPP instructions with SGPR operands + +Roman Stratiienko (1): + +- android.mk: Add missing variables to the make target + +Samuel Pitoiset (4): + +- radv: disable HTILE for D32S8 format and mipmaps on GFX10 +- radv: fix emitting VBO when vertex input dynamic state is used +- radv: add a workaround to fix a segfault with Metro Exodus (Linux native) +- radv: fix resetting the entire vertex input dynamic state + +Thomas H.P. Andersen (1): + +- svga: fix bitwise/logical and mixup + +Vasily Khoruzhick (1): + +- lima: disasm: use last argument as a filename
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.2.rst ^
@@ -0,0 +1,126 @@ +Mesa 21.3.2 Release Notes / 2021-12-17 +====================================== + +Mesa 21.3.2 is a bug fix release which fixes bugs found since the 21.3.1 release. + +Mesa 21.3.2 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.2 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- Flickering and blackscreen on Mpv and Clapper (and also low performance on glxgears) +- DXVK SIGBUS with Turnip on Poco F1 at loading to open world. +- RADV: IsHelperInvocationEXT query is not considered volatile in ACO +- [GraphicsFuzz] dEQP-VK.graphicsfuzz.stable-binarysearch-tree-nested-if-and-conditional +- [bisected] Mesa 21.3.x breaks GBM with NVIDIA closed source driver 495.44 +- [DG2] dEQP-GL[45\|ES31].functional.shaders.builtin_functions.pack_unpack.packhalf2x16_compute fail + + +Changes +------- + +Alyssa Rosenzweig (1): + +- panfrost: Don't shadow Mesa's fui() + +Caio Oliveira (1): + +- nir: Initialize nir_register::divergent + +Danylo Piliaiev (1): + +- turnip: Fix operator precedence in address calculation macros for queries + +Dave Airlie (1): + +- crocus: cleanup bo exports for external objects + +Eric Engestrom (1): + +- .pick_status.json: Update to a65285f54be6d756a8a558f638c18bb4f075222c + +Francisco Jerez (1): + +- intel/fs/xehp: Teach SWSB pass about the exec pipeline of FS_OPCODE_PACK_HALF_2x16_SPLIT. + +Ian Romanick (2): + +- intel/compiler: Don't predicate a WHILE if there is a CONT +- intel/stub: Silence "initialized field overwritten" warning + +Jakob Bornecrantz (1): + +- vulkan-device-select: Don't leak drmDevicePtr + +James Jones (1): + +- gbm: Don't pass default usage flags on ABIs < 1 + +Jason Ekstrand (3): + +- crocus: wm_prog_key::key_alpha_test uses GL enums +- anv: Stop doing too much per-sample shading +- radeonsi/nir: Check for VARYING_SLOT_PRIMITIVE_ID not SYSTEM_VALUE + +Lionel Landwerlin (4): + +- nir/opt_deref: don't try to cast empty structures +- intel/nir: preserve access value when duping intrinsic +- nir/lower_io: include the variable access in the lowered intrinsic +- vulkan: fix missing handling of WSI memory signal + +Mauro Rossi (1): + +- android: add support for classic dri-drivers (v2) + +Michel Zou (1): + +- meson: correctly detect linker arguments + +Nanley Chery (1): + +- iris: Free the local cache bucket in bufmgr_destroy + +Pierre-Eric Pelloux-Prayer (1): + +- radeonsi: fix fast clear / depth decompression corruption + +Rhys Perry (1): + +- radv: have the null winsys set more fields + +Roman Stratiienko (2): + +- v3dv: Fix dEQP-VK.info#instance_extensions test +- v3dv: Fix V3DV_HAS_SURFACE preprocessor condition + +Tapani Pälli (1): + +- anv: allow VK_IMAGE_LAYOUT_UNDEFINED as final layout + +Timur Kristóf (1): + +- aco: Clean up and fix quad group instructions with WQM.
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.3.rst ^
@@ -0,0 +1,108 @@ +Mesa 21.3.3 Release Notes / 2021-12-29 +====================================== + +Mesa 21.3.3 is a bug fix release which fixes bugs found since the 21.3.2 release. + +Mesa 21.3.3 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.3 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- Assassin's Creed Syndicate crashes with Mesa 21.3.0+ ACO +- [21.3 regression] swr: Build failure with MSVC +- anv: dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store fails + + +Changes +------- + +Alyssa Rosenzweig (2): + +- pan/indirect_draw: Don't upload garbage UBO +- panfrost: Fix Secondary Shader field + +Bas Nieuwenhuizen (2): + +- radv: Use correct buffer size for query pool result copies. +- radv: Skip wait timeline ioctl with 0 handles. + +Daniel Schürmann (2): + +- aco/optimizer: fix fneg modifier propagation on VOP3P +- aco/ra: fix get_reg_for_operand() in case of stride mismatches + +Emma Anholt (7): + +- r300: Fix mis-optimization turning -1 - x into 1 - x. +- r300: Move the instruction filter for r500_transform_IF() to the top. +- r300: Ensure that immediates have matching negate flags too. +- r300: Also consider ALU condition modifiers for loop DCE. +- r300: Disable loop unrolling on r500. +- r300/vs: Allocate temps we see a use as a source, too. +- r300/vs: Fix flow control processing just after an endloop. + +Eric Engestrom (2): + +- .pick_status.json: Update to 4942e108909bbe0f53ec5fd52a3c7ae14c60abe6 +- .pick_status.json: Mark d49d092259829ad9e33d0d9fc8eef9759d9fe56e as denominated + +Francisco Jerez (1): + +- intel/fs: Add physical fall-through CFG edge for unconditional BREAK instruction. + +Jason Ekstrand (1): + +- vulkan/log: Don't assert on non-client-visible objects + +Jesse Natalie (1): + +- microsoft/compiler: Implement inot + +Liviu Prodea (1): + +- swr: Fix MSVC build + +Rob Clark (2): + +- freedreno/ir3: Handle instr->address when cloning +- freedreno/computerator: Fix @buf header + +Samuel Pitoiset (1): + +- radv: re-apply "Do not access set layout during vkCmdBindDescriptorSets." + +Tapani Pälli (1): + +- glsl: fix invariant qualifer usage and matching rule for GLSL 4.20 + +Timur Kristóf (2): + +- aco/optimizer_postRA: Fix combining DPP into VALU. +- aco/optimizer_postRA: Fix applying VCC to branches. + +Vinson Lee (1): + +- panfrost: Avoid double unlock.
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.4.rst ^
@@ -0,0 +1,181 @@ +Mesa 21.3.4 Release Notes / 2022-01-12 +====================================== + +Mesa 21.3.4 is a bug fix release which fixes bugs found since the 21.3.3 release. + +Mesa 21.3.4 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.4 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- i965: gen5 exposes EXT_texture_integer incorrectly +- [radeonsi, regression, bisected]: Rendering issues with Factorio +- mesa >= 21.x (with llvmpipe) will break the game JVGS +- Panfrost - RK3399 - FACTORIO - glitches everywhere +- Baldurs Gate 3 (Patch 6) - ribbon-like artifacts on textures +- Game Starsector crashes under certain circumstances with mesa 21.3.+ +- FreeSpace models incorrectly rendered on Polaris cards, causing system freeze +- Incomplete evaluation of nested DEFINE macros +- [r300g, bisected] piglit glsl-fs-discard-04 fails +- Panfrost G52 Firefox terrible glitches on youtube playback + + +Changes +------- + +Alyssa Rosenzweig (3): + +- pan/bi: Fix load_const of 1-bit booleans +- gallium/util: Add pixel->blocks box helper +- lima,panfrost: Correct pixel vs block mismatches + +Bas Nieuwenhuizen (1): + +- radv: Set optimal copy alignment to 1. + +Boris Brezillon (1): + +- microsoft/compiler: Fix dxil_nir_create_bare_samplers() + +Connor Abbott (2): + +- ir3/ra: Fix logic bug in compress_regs_left +- ir3: Bump type mismatch penalty to 3 + +Daniel Schürmann (2): + +- aco: don't allow SDWA on VOP3P instructions +- aco: validate VOP3P opsel correctly + +Danylo Piliaiev (1): + +- tu: fix workaround for depth bounds test without depth test + +Dave Airlie (2): + +- intel/genxml/gen4-5: fix more Raster Operation in BLT to be a uint +- crocus: fail resource allocation properly. + +Emma Anholt (4): + +- freedreno/afuc: Disable the disassembler on 32-bit builds. +- i915g: Turn off FP16 in the vertex shaders. +- r300: Fix omod failing to increase the number of channels stored. +- nir_to_tgsi: Fix a bug in TXP detection after backend lowering. + +Eric Engestrom (3): + +- .pick_status.json: Update to 8a78706643ecad8a1f303cc9358873abc29978b4 +- .pick_status.json: Mark 00bea38242d97e7ace1954f1bc7d32cbf0ce3ee0 as denominated +- .pick_status.json: Mark 2a0253b9b5d21c7571555abe3a1d851468a18740 as denominated + +Filip Gawin (1): + +- r300: fix handling swizzle in transform_source_conflicts + +Henry Goffin (1): + +- intel/compiler/test: Fix build with GCC 7 + +Ian Romanick (1): + +- i965: Disable EXT_texture_integer on Gfx4 and Gfx5 + +Jason Ekstrand (1): + +- Revert "anv: Stop doing too much per-sample shading" + +Lionel Landwerlin (3): + +- anv: don't leave anv_batch fields undefined +- anv: limit compiler valid color outputs using NIR variables +- intel/dev: fixup chv workaround + +Lucas Stach (3): + +- etnaviv: initialize vertex attributes on context reset +- etnaviv: drm: fix size limit in etna_cmd_stream_realloc +- etnaviv: drm: properly handle reviving BOs via a lookup + +Michel Zou (1): + +- zink: fix -Warray-bounds warning + +Mike Blumenkrantz (4): + +- radv: fix xfb query copy param ordering +- zink: always unset vertex shader variant key data when changing last vertex stage +- zink: add extra synchronization for buffer descriptor binds +- zink: use device-local heap for sparse backing allocations + +Pavel Ondračka (1): + +- r300: Remove broken optimization in rc_transform_KILL + +Pierre-Eric Pelloux-Prayer (4): + +- radeonsi/gfx8: use the proper dcc clear size +- vbo/dlist: fix loopback crash +- vbo/dlist: add vertices to incomplete primitives +- radeonsi/gfx10: fix si_texture_get_offset for mipmapped tex + +Qiang Yu (1): + +- glapi: should not add alias function to static_data.py + +Rhys Perry (1): + +- aco: remove pack_half_2x16(a, 0) optimization + +Rohan Garg (1): + +- intel/fs: OpImageQueryLod does not support arrayed images as an operand + +Roman Stratiienko (1): + +- v3dv: Hotfix: Rename remaining V3DV_HAS_SURFACE->V3DV_USE_WSI_PLATFORM + +Samuel Pitoiset (1): + +- radv: add drirc radv_disable_htile_layers and enable it for F1 2021 + +Tapani Pälli (3): + +- iris: unref syncobjs and free r/w dependencies array for slab entries +- mesa: free idalloc storage for display lists +- mesa: free vbo_save_vertex_list store prims + +Timothy Arceri (1): + +- glsl/glcpp: make sure to expand new token after concatenation + +Yiwei Zhang (1): + +- venus: subtract appended header size in vn_CreatePipelineCache + +satmandu (1): + +- Fix compilation on armv7l with gcc 11.2.0
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.5.rst ^
@@ -0,0 +1,143 @@ +Mesa 21.3.5 Release Notes / 2022-01-26 +====================================== + +Mesa 21.3.5 is a bug fix release which fixes bugs found since the 21.3.4 release. + +Mesa 21.3.5 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.5 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- glGetActiveUniform failing with GL_INVALID_VALUE for no reason +- CopelliaSim crashes on adding vision sensor to a scene on Ubuntu 20+, runs fine on Ubuntu 18.04 +- Dirt Rally: Flickering glitches on certain foliage going from Mesa 21.2.5 to 21.3.0 +- FrontFacing input is broken on Intel/Vulkan +- llvmpipe: Unimplemented get_driver_uuid/get_device_uuid causes segfaults in e.g. Wine + + +Changes +------- + +Alejandro Piñeiro (1): + +- vulkan: return default string for undefined enum + +Alyssa Rosenzweig (3): + +- pan/bi: Schedule around blend shader register clobbering +- panfrost: Use u_reduced_prim for primitive checks +- panfrost: Only cull polygons + +Bas Nieuwenhuizen (1): + +- util/fossilize_db: Fix double free in error handling. + +Carsten Haitzler (1): + +- panfrost: Don't double-free when handling error for unsupported GPU + +Charles Baker (1): + +- zink: Output PackHalf2x16 to uint not float + +Emma Anholt (1): + +- softpipe: respect !independent_blend_enable for color masks. + +Eric Engestrom (4): + +- .pick_status.json: Update to 06504fb9e2382e43b889fd6ca642bb785b544d4d +- .pick_status.json: Mark 1b88777e97f635612c560a2f00d349ea271581b1 as denominated +- .pick_status.json: Mark d1530a3f3b1625baa42e84cba9844b6eb4ac76ce as denominated +- .pick_status.json: Mark 58a843ab14e0eecf044a35154da72cdf7ab6f15a as denominated + +Ian Romanick (1): + +- intel/fs: Fix gl_FrontFacing optimization on Gfx12+ + +Kenneth Graunke (1): + +- iris: Fix and refactor check for clear color being fully zero + +Lepton Wu (1): + +- driconf: Fix unhandled tags in static conf + +Lionel Landwerlin (2): + +- intel/fs: disable VRS when omask is written +- anv: fix missing descriptor copy of bufferview/surfacestate content + +Mike Blumenkrantz (10): + +- zink: always set number of timestamp results to 1 for internal qbo +- zink: fix availability buffer sizing/copying for xfb queries +- zink: skip readback of qbos with no results +- Revert "zink: when performing an implicit reset, sync qbos" +- zink: use even more accurate stride values for query result copies +- aux/trace: copy over stream_output_target_offset method from context +- util/vbuf: fix buffer translation sizing +- zink: remove SpvMemorySemanticsMakeVisibleMask from nir_intrinsic_memory_barrier +- zink: check EXT_image_drm_format_modifier for dmabuf support +- zink: stop allocating such massive staging regions for buffer maps + +Pavel Ondračka (1): + +- r300: properly initialize new_vs in r300_draw_init_vertex_shader + +Pierre-Eric Pelloux-Prayer (2): + +- driconf: enable vs_position_always_invariant for Dirt Rally +- mesa: use less temporaries in build_lighting + +Qiang Yu (1): + +- nir: fix nir_tex_instr hash not count is_sparse field + +Rhys Perry (1): + +- nir/unsigned_upper_bound: don't follow 64-bit f2u32() + +Rob Clark (5): + +- mesa/st: Lowered ucp should still mark rast state dirty +- freedreno: Pass shader cache key instead of shader key +- freedreno: Add FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE +- freedreno/a6xx: Fix clip_mask +- freedreno/a5xx: Fix clip_mask + +Stefan Brüns (1): + +- llvmpipe: Add get_{driver,device}_uuid implementations + +Tapani Pälli (2): + +- mesa: refactor GetProgramiv to use program resource list +- mesa: move GetProgramInterfaceiv as a shader_query function + +Yiwei Zhang (1): + +- venus: VkExternalImageFormatProperties is optional
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.6.rst ^
@@ -0,0 +1,149 @@ +Mesa 21.3.6 Release Notes / 2022-02-09 +====================================== + +Mesa 21.3.6 is a bug fix release which fixes bugs found since the 21.3.5 release. + +Mesa 21.3.6 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.6 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- radv: CullDistance fail +- i965: Segmentation fault during glinfo context destruction, regression in 21.3.x +- Vulkan Wayland WSI returns empty surface formats +- [REGRESSION][BISECTED] iris: Qutebrowser/QtWebEngine sporadically flashes the window in white +- Flickering Intel Uhd 620 Graphics +- Broken Terraria & Glitches in Forza Horizon 4 + + +Changes +------- + +Bas Nieuwenhuizen (1): + +- Revert "nir/algebraic: distribute fmul(fadd(a, b), c) when b and c are constants" + +Caio Oliveira (1): + +- anv: Fix subgroupSupportedStages physical property + +Charles Baker (1): + +- Revert "zink: handle vertex buffer offset overflows" + +Connor Abbott (2): + +- ir3: Fix copy-paste mistakes in ir3_block_remove_physical_predecessor() +- ir3/cp: ir3: Prevent propagating shared regs out of loops harder + +Danylo Piliaiev (1): + +- ir3: opt_deref in opt loop to remove unnecessary tex casts + +Dave Airlie (1): + +- crocus: find correct relocation target for the bo. + +Emma Anholt (1): + +- vulkan: Fix leak of error messages + +Eric Engestrom (3): + +- .pick_status.json: Update to cb781fc350108584116280fc597c695d2f476c68 +- .pick_status.json: Mark 15e77504461a30038a054c87cc53a694171c9cf4 as denominated +- .pick_status.json: Mark 960e72417f3e8885699cf384f690853e14ba44da as denominated + +Francisco Jerez (1): + +- intel/fs: Take into account region strides during SIMD lowering decision of SHUFFLE. + +Georg Lehmann (4): + +- vulkan/wsi/wayland: Fix add_wl_shm_format alpha/opaqueness. +- vulkan/wsi/wayland: Convert missing vulkan formats to shm formats. +- vulkan/wsi/wayland: Add modifiers for RGB formats. +- vulkan/wsi/wayland: Fix add_drm_format_modifier aplha/opaqueness. + +Jason Ekstrand (2): + +- anv/pass: Don't set first_subpass_layout for stencil-only attachments +- vulkan/wsi: Set MUTABLE_FORMAT_BIT in the prime path + +Kenneth Graunke (1): + +- i965: Avoid NULL drawbuffer in brw_flush_front + +Lionel Landwerlin (2): + +- intel/fs: don't set allow_sample_mask for CS intrinsics +- intel/nir: fix shader call lowering + +Manas Chaudhary (1): + +- panvk: Fix pointer corruption in panvk_add_wait_event_syncobjs + +Mike Blumenkrantz (15): + +- zink: never use SpvOpImageQuerySizeLod for texel buffers +- zink: reorder fbfetch flag-setting to avoid null deref +- zink: fix vertex buffer mask computation for null buffers +- zink: clamp tbo creation to maxTexelBufferElements +- zink: add vertex shader pipeline bit for generated barrier construction +- zink: fix waiting on current batch id +- zink: cast image atomic op params/results based on image type +- zink: use SpvScopeDevice over SpvScopeWorkgroup for atomic shader ops +- zink: disable PIPE_SHADER_CAP_FP16_CONST_BUFFERS +- llvmpipe: disable PIPE_SHADER_CAP_FP16_CONST_BUFFERS +- llvmpipe: ci updates +- zink: add VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT for query binds +- zink: fix PIPE_CAP_TGSI_BALLOT export conditional +- zink: reject invalid draws +- zink: min/max blit region in coverage functions + +Nanley Chery (3): + +- anv: Disable CCS_E for some 8/16bpp copies on TGL+ +- anv: Use ANV_FAST_CLEAR_DEFAULT_VALUE for CCS on TGL+ +- anv: Re-enable CCS_E on TGL+ + +Paulo Zanoni (1): + +- iris: implement inter-context busy-tracking + +Rhys Perry (3): + +- aco: fix neg(abs(mul(a, b))) if the mul is not VOP3 +- aco: don't encode src2 for v_writelane_b32_e64 +- radv: fix R_02881C_PA_CL_VS_OUT_CNTL with mixed cull/clip distances + +Samuel Pitoiset (1): + +- radv/winsys: fix missing buffer_make_resident() for the null winsys + +Yiwei Zhang (1): + +- tu: VkExternalImageFormatProperties is optional
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.7.rst ^
@@ -0,0 +1,148 @@ +Mesa 21.3.7 Release Notes / 2022-02-23 +====================================== + +Mesa 21.3.7 is a bug fix release which fixes bugs found since the 21.3.6 release. + +Mesa 21.3.7 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.7 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- lavapipe: dEQP-VK.spirv_assembly.instruction.compute.float16.arithmetic_3.step fails +- ANV: Bad output from TransformFeedback . Regression from Mesa 21. Something to do with VB+XFB -> VB+XFB dependency? + + +Changes +------- + +Alyssa Rosenzweig (4): + +- pan/bi: Avoid \FADD.v2f16 hazard in optimizer +- pan/bi: Avoid \FADD.v2f16 hazard in scheduler +- pan/bi: Lower swizzles on CSEL.i32/MUX.i32 +- panvk: Use more reliable assert for UBO pushing + +Bas Nieuwenhuizen (1): + +- radv: Fix preamble argument order. + +Connor Abbott (1): + +- ir3/spill: Fix simplify_phi_nodes with multiple loop nesting + +Dave Airlie (3): + +- lavapipe: fix sampler + sampler view leaks. +- lavapipe: reference gallium fences correctly. +- crocus: fix leak on gen4/5 stencil fallback blit path. + +Emma Anholt (1): + +- i915g: Initialize the rest of the "from_nir" temporary VS struct. + +Eric Engestrom (2): + +- .pick_status.json: Update to dabba7d7263be6ffb6f3676465e92c65952fa824 +- .pick_status.json: Mark b07372312d7053f2ef5c858ceb1fbf9ade5e7c52 as denominated + +Ian Romanick (9): + +- gallivm/nir: Call nir_lower_bool_to_int32 after nir_opt_algebraic_late +- nir: All set-on-comparison opcodes can take all float types +- intel/fs: Don't optimize out 1.0x and -1.0x +- spriv: Produce correct result for GLSLstd450Step with NaN +- spirv: Produce correct result for GLSLstd450Modf with Inf +- spirv: Produce correct result for GLSLstd450Tanh with NaN +- nir: Properly handle various exceptional values in frexp +- nir: Produce correct results for atan with NaN +- nir: Add missing dependency on nir_opcodes.py + +Jason Ekstrand (1): + +- anv: Call vk_command_buffer_finish if create fails + +Jonathan Gray (1): + +- dri: avoid NULL deref of DrawBuffer on flush + +Lionel Landwerlin (2): + +- nir: fix lower_memcpy +- anv/genxml/intel/fs: fix binding shader record entry + +Marcin Ślusarz (1): + +- anv: don't set color state when input state was requested + +Marek Olšák (1): + +- ac/surface: add more elements to meta equations because HTILE can use them + +Mike Blumenkrantz (4): + +- lavapipe: use util_pack_color_union() for generating clear colors +- aux/draw: fix llvm tcs lane vec generation +- zink: always set VkPipelineMultisampleStateCreateInfo::pSampleMask +- zink: always invalidate streamout counter buffer if not resuming + +Nanley Chery (1): + +- iris: Don't fast clear with the view format + +Pavel Ondračka (1): + +- r300: fix transformation of abs modifiers with negate + +Qiang Yu (3): + +- radeonsi: workaround Specviewperf13 Catia hang on GFX9 +- radeonsi: fix depth stencil multi sample texture blit +- glx: fix pbuffer refcount init + +Samuel Pitoiset (1): + +- radv/winsys: fix initializing debug/perftest options if multiple instances + +Tapani Pälli (5): + +- intel/genxml: add PIPE_CONTROL field for L3 read only cache invalidation +- anv: invalidate L3 read only cache when VF cache is invalidated +- iris: invalidate L3 read only cache when VF cache is invalidated +- iris: fix a leak on surface states +- mesa/st: always use DXT5 when transcoding ASTC format + +Thierry Reding (2): + +- tegra: Use private reference count for sampler views +- tegra: Use private reference count for resources + +Timur Kristóf (1): + +- radv: Disable IB2 on compute queues. + +Yiwei Zhang (1): + +- venus: properly destroy deferred ahb image before real image creation
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.8.rst ^
@@ -0,0 +1,183 @@ +Mesa 21.3.8 Release Notes / 2022-03-18 +====================================== + +Mesa 21.3.8 is a bug fix release which fixes bugs found since the 21.3.7 release. + +Mesa 21.3.8 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.8 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- freedreno: deqp cts fails +- radeonsi dEQP-GLES3.functional.buffer.map.write.explicit_flush.* flake crashes +- Square Artifacts Dragons Dogma +- radv: When using VS+PS only, primitive ID is mixed up after NGG culling +- Redraw freeze after upgrade to Xwayland 21.1.3 +- anv doesn't always resolve aux buffers with private bindings on transition to external queue +- [ANV] Rendering corruption in DOOM Eternal + + +Changes +------- + +Adam Jackson (1): + +- meson: Add "amber" option for automatic LTS build configuration + +Alyssa Rosenzweig (6): + +- panfrost: Fix FD resource_get_handle +- panfrost: Handle NULL sampler views +- panfrost: Handle NULL samplers +- panfrost: Flush resources when shadowing +- panfrost: Push twice as many uniforms +- panfrost: Fix set_sampler_views for big GL + +Connor Abbott (4): + +- ir3: Don't always set bindless_tex with readonly images +- ir3/nir: Fix 1d array readonly images +- ir3/ra: Sanitize parallel copy flags better +- util/bitset: Fix off-by-one in __bitset_set_range + +Danylo Piliaiev (1): + +- turnip: Use LATE_Z when there might be depth/stencil feedback loop + +Dave Airlie (5): + +- draw/so: don't use pre clip pos if we have a tes either. +- crocus: change the line width workaround for gfx4/5 +- gallivm/nir: extract a valid texture index according to exec_mask. +- zink: workaround depth texture mode alpha. +- lavapipe: remove broken workaround for zink depth texturing. + +Eric Engestrom (14): + +- .pick_status.json: Update to 2106c3bab6bdea736c468fb1866fd0f372cc0baa +- .pick_status.json: Mark 7ec0e2b89351e6e56cb112e00e6c68c6bbc6faea as denominated +- .pick_status.json: Mark 0136545d169adb75e4f9f6b4de38eef0817c1241 as denominated +- .pick_status.json: Mark 62b8daa889daefb2f191a63f370541bf2b807e88 as denominated +- .pick_status.json: Mark 698ae34844b7199b8acc3b4d74a9cad3b903bdef as denominated +- .pick_status.json: Mark 03a80490a47b0b616566c6f56581560694976b1a as denominated +- .pick_status.json: Mark e1964e1dde7bf44ceeaf3fa8b3869e791af4a369 as denominated +- .pick_status.json: Mark 3ef093f697ad9027ba514c7a4a6a10b7bd95bd47 as denominated +- .pick_status.json: Mark 2d1b506acfe55165511a2bb83acb013353e531ab as denominated +- .pick_status.json: Mark 204ea77b0674fb611155bd3ba2e6169cc8646b3f as denominated +- .pick_status.json: Mark a5c7d34fdf8403b0115d5eead7ca67027e93efc7 as denominated +- .pick_status.json: Mark 432700fc61a33e0c040d47d9b7bd8cfe970d35cc as denominated +- .pick_status.json: Mark 4ed7329236a576b6b6f615787bb722b960f32c6b as denominated +- .pick_status.json: Mark 3f7da0c58447979976eb2928625b1f93154f6c57 as denominated + +Erik Faye-Lund (2): + +- docs: remove incorrect drivers from extension +- docs: fixup zink gl 4.3 requirements + +Icecream95 (6): + +- panfrost: Set PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION +- pan/bi: Check dependencies of both destinations of instructions +- panfrost: Set dirty state in set_shader_buffers +- panfrost: Re-emit descriptors after resource shadowing +- pan/bi: Make disassembler build reproducibly +- panfrost: Fix ubo_mask calculation + +Jason Ekstrand (2): + +- anv: Don't assume depth/stencil attachments have depth +- lavapipe: Reset the free_cmd_buffers list in TrimCommandPool + +Jonathan Gray (6): + +- util: unbreak non-linux mips64 build +- util: fix util_cpu_detect_once() build on OpenBSD +- util/u_atomic: fix build on clang archs without 64-bit atomics +- util: fix build with clang 10 on mips64 +- util: use correct type in sysctl argument +- radv: use MAJOR_IN_SYSMACROS for sysmacros.h include + +Lionel Landwerlin (3): + +- anv: fix conditional render for vkCmdDrawIndirectByteCountEXT +- anv: fix fast clear type value with external images +- intel/fs: fix total_scratch computation + +Marek Olšák (2): + +- amd: add a workaround for an SQ perf counter bug +- radeonsi: fix an assertion failure with register shadowing + +Mike Blumenkrantz (16): + +- gallivm: avoid division by zero when computing cube face +- zink: always update shader variants when rebinding a gfx program +- zink: use a fence for pipeline cache update jobs +- zink: wait on program cache fences before destroying programs +- zink: fix descriptor cache pointer array allocation +- zink: mark fbfetch push sets as non-cached +- zink: stop leaking descriptor sets +- zink: invalidate non-punted recycled descriptor sets that are not valid +- zink: fix 64bit float shader ops +- llvmpipe: fix debug print iterating in set_framebuffer_state +- llvmpipe: clamp surface clear geometry +- lavapipe: update multisample state after blend state +- aux/trace: rzalloc the context struct +- zink: lower dmod on AMD hardware +- lavapipe: skip format checks for EXTENDED_USAGE +- lavapipe: run nir_opt_copy_prop_vars during optimization loop + +Paulo Zanoni (1): + +- iris: fix register spilling on compute shaders on XeHP + +Pierre-Eric Pelloux-Prayer (3): + +- radeonsi: change rounding mode to round to even +- util/slab: add slab_zalloc +- gallium/tc: zero alloc transfers + +Rhys Perry (2): + +- anv: Enable nir_opt_access +- radv: include adjust_frag_coord_z in key + +Rob Clark (1): + +- mesa: Fix discard_framebuffer for fbo vs winsys + +Samuel Pitoiset (2): + +- radv,drirc: move RADV workarounds to 00-radv-defaults.conf +- radv: disable DCC for Fable Anniversary, Dragons Dogma, GTA IV and more + +Timur Kristóf (1): + +- ac/nir/ngg: Fix mixed up primitive ID after culling. + +Xiaohui Gu (1): + +- iris: Mark a dirty update when vs_needs_sgvs_element value changed
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.9.rst ^
@@ -0,0 +1,119 @@ +Mesa 21.3.9 Release Notes / 2022-06-08 +====================================== + +Mesa 21.3.9 is a bug fix release which fixes bugs found since the 21.3.8 release. + +Mesa 21.3.9 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 21.3.9 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- [regression][bisected] MSVC: Build failure in libmesa_util when targeting x86 32-bit +- A crash in radeonsi driver + + +Changes +------- + +Alyssa Rosenzweig (2): + +- panfrost: Emulate GL_CLAMP on Bifrost +- pan/bi: Handle non-2D arrays + +Dave Airlie (2): + +- crocus: don't map scanout buffers as write-back +- intel/perf: use a function to do common allocations + +Emma Anholt (2): + +- vulkan: Make sure we've loaded our connectors when querying plane props. +- intel/perf: Move some static blocks of C code out of the python script. + +Eric Engestrom (3): + +- .pick_status.json: Update to 05d687723530ed3c5c9f7d0addb3b047138613a3 +- .pick_status.json: Update to fc381fa1e341bc9cf71f53a915dc47926a3f621f +- Revert "glx: Fix build errors with --enable-mangling (v2)" + +Erik Faye-Lund (1): + +- Revert "ci: downgrade sphinx to v3.x" + +Georg Lehmann (1): + +- radv: Don't hash ycbcr sampler base object. + +Ian Romanick (1): + +- intel/fs: Force destination types on DP4A instructions + +Icecream95 (2): + +- panfrost: Don't initialise the trampolines array +- panfrost: Optimise recalculation of max sampler view + +Jason Ekstrand (1): + +- anv: Allow MSAA resolve with different numbers of planes + +Jonathan Gray (1): + +- util: fix msvc build + +Lionel Landwerlin (4): + +- anv: fix variable shadowing +- anv: zero-out anv_batch_bo +- anv: emit timestamp & availability using the same part of CS +- anv: flush tile cache with query copy command + +Matt Turner (8): + +- intel/perf: Don't print leading space from desc_units() +- intel/perf: Deduplicate perf counters +- intel/perf: Use a function to initialize perf counters +- intel/perf: Use slimmer intel_perf_query_counter_data struct +- intel/perf: Store indices to strings rather than pointers +- intel/perf: Mark intel_perf_counter_* enums as PACKED +- intel/perf: Fix mistake in description string +- intel/perf: Destination array calculation into function + +Mike Blumenkrantz (9): + +- llvmpipe: fix occlusion queries with early depth test +- anv: fix xfb usage with rasterizer discard +- anv: fix CmdSetColorWriteEnableEXT for maximum rts +- anv: fix some dynamic rasterization discard cases in pipeline construction +- lavapipe: always clone shader nir for shader states +- gallivm: fix oob image detection for cube/1dArray/2dArray/cubeArray +- zink: flag sample locations for re-set on batch flush +- zink: force-add usage when adding last-ref tracking +- zink: only update usage on buffer rebind if rebinds occurred + +Pierre-Eric Pelloux-Prayer (1): + +- radeonsi: don't clear framebuffer.state before dcc decomp
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/meson.build ^
@@ -39,6 +39,13 @@ error('`mirror` is the only build directory layout supported') endif +amber = get_option('amber') +if amber + package_version_suffix=' Amber' +else + package_version_suffix='' +endif + # Arguments for the preprocessor, put these in a separate array from the C and # C++ (cpp in meson terminology) arguments since they need to be added to the # default arguments for both C and C++. @@ -46,13 +53,18 @@ '-D__STDC_CONSTANT_MACROS', '-D__STDC_FORMAT_MACROS', '-D__STDC_LIMIT_MACROS', - '-DPACKAGE_VERSION="@0@"'.format(meson.project_version()), + '-DPACKAGE_VERSION="@0@@1@"'.format(meson.project_version(), package_version_suffix), '-DPACKAGE_BUGREPORT="https://gitlab.freedesktop.org/mesa/mesa/-/issues"', ] c_args = [] cpp_args = [] with_moltenvk_dir = get_option('moltenvk-dir') + +if amber + pre_args += '-DAMBER' +endif + with_vulkan_icd_dir = get_option('vulkan-icd-dir') with_tests = get_option('build-tests') with_aco_tests = get_option('build-aco-tests') @@ -199,7 +211,9 @@ gallium_drivers = get_option('gallium-drivers') if gallium_drivers.contains('auto') - if system_has_kms_drm + if amber + gallium_drivers = [] + elif system_has_kms_drm # TODO: PPC, Sparc if ['x86', 'x86_64'].contains(host_machine.cpu_family()) gallium_drivers = [ @@ -262,7 +276,9 @@ _vulkan_drivers = get_option('vulkan-drivers') if _vulkan_drivers.contains('auto') if system_has_kms_drm - if host_machine.cpu_family().startswith('x86') + if amber + _vulkan_drivers = [] + elif host_machine.cpu_family().startswith('x86') _vulkan_drivers = ['amd', 'intel', 'swrast'] elif ['arm', 'aarch64'].contains(host_machine.cpu_family()) _vulkan_drivers = ['swrast'] @@ -531,8 +547,22 @@ endif endif -with_glvnd = get_option('glvnd') +_glvnd = get_option('glvnd') +if _glvnd == 'true' or _glvnd == 'enabled' or (amber and _glvnd == 'auto') + with_glvnd = true +else + with_glvnd = false +endif + glvnd_vendor_name = get_option('glvnd-vendor-name') +if glvnd_vendor_name == 'auto' + if amber + glvnd_vendor_name = 'amber' + else + glvnd_vendor_name = 'mesa' + endif +endif + if with_glvnd if with_platform_windows error('glvnd cannot be used on Windows') @@ -1211,7 +1241,7 @@ endif else add_project_link_arguments( - cc.get_supported_arguments( + cc.get_supported_link_arguments( '-Wl,--nxcompat', '-Wl,--dynamicbase', '-static-libgcc', @@ -1220,7 +1250,7 @@ language : ['c'], ) add_project_link_arguments( - cpp.get_supported_arguments( + cpp.get_supported_link_arguments( '-Wl,--nxcompat', '-Wl,--dynamicbase', '-static-libgcc',
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/meson_options.txt ^
@@ -302,8 +302,9 @@ ) option( 'glvnd', - type : 'boolean', - value : false, + type : 'combo', + value : 'auto', + choices : ['auto', 'true', 'false', 'enabled', 'disabled'], description : 'Enable GLVND support.' ) option( @@ -321,7 +322,7 @@ option( 'glvnd-vendor-name', type : 'string', - value : 'mesa', + value : 'auto', description : 'Vendor name string to use for glvnd libraries' ) option( @@ -457,14 +458,16 @@ ) option( 'prefer-iris', - type : 'boolean', - value : true, + type : 'combo', + value : 'auto', + choices : ['auto', 'true', 'false'], description : 'Prefer new Intel iris driver over older i965 driver' ) option( 'prefer-crocus', - type : 'boolean', - value : false, + type : 'combo', + value : 'auto', + choices : ['auto', 'true', 'false'], description : 'Prefer new crocus driver over older i965 driver for gen4-7' ) option('egl-lib-suffix', @@ -530,3 +533,9 @@ value : false, description : 'Build gallium VMware/svga driver with mksGuestStats instrumentation.' ) +option( + 'amber', + type : 'boolean', + value : false, + description : 'Configure LTS build to coexist with Mesa >= 22.0' +)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.c ^
@@ -1080,6 +1080,8 @@ } info->has_3d_cube_border_color_mipmap = info->has_graphics \|\| info->family == CHIP_ARCTURUS; + info->never_stop_sq_perf_counters = info->chip_class == GFX10 \|\| + info->chip_class == GFX10_3; info->max_sgpr_alloc = info->family == CHIP_TONGA \|\| info->family == CHIP_ICELAND ? 96 : 104; if (!info->has_graphics && info->family >= CHIP_ALDEBARAN) { @@ -1168,6 +1170,7 @@ fprintf(f, " has_ls_vgpr_init_bug = %i\n", info->has_ls_vgpr_init_bug); fprintf(f, " has_32bit_predication = %i\n", info->has_32bit_predication); fprintf(f, " has_3d_cube_border_color_mipmap = %i\n", info->has_3d_cube_border_color_mipmap); + fprintf(f, " never_stop_sq_perf_counters = %i\n", info->never_stop_sq_perf_counters); fprintf(f, "Display features:\n"); fprintf(f, " use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.h ^
@@ -83,6 +83,7 @@ bool has_cs_regalloc_hang_bug; bool has_32bit_predication; bool has_3d_cube_border_color_mipmap; + bool never_stop_sq_perf_counters; /* Display features. / / There are 2 display DCC codepaths, because display expects unaligned DCC. */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_nir_lower_ngg.c ^
@@ -45,6 +45,7 @@ nir_variable prim_exp_arg_var; nir_variable es_accepted_var; nir_variable gs_accepted_var; + nir_variable gs_vtx_indices_vars[3]; struct u_vector saved_uniforms; @@ -317,11 +318,16 @@ return arg; } -static nir_ssa_def * -ngg_input_primitive_vertex_index(nir_builder b, unsigned vertex) +static void +ngg_nogs_init_vertex_indices_vars(nir_builder b, nir_function_impl impl, lower_ngg_nogs_state st) { - return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u), - nir_imm_int(b, (vertex & 1u) * 16u), nir_imm_int(b, 16u)); + for (unsigned v = 0; v < st->num_vertices_per_primitives; ++v) { + st->gs_vtx_indices_vars[v] = nir_local_variable_create(impl, glsl_uint_type(), "gs_vtx_addr"); + + nir_ssa_def vtx = nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = v / 2u), + nir_imm_int(b, (v & 1u) 16u), nir_imm_int(b, 16u)); + nir_store_var(b, st->gs_vtx_indices_vars[v], vtx, 0x1); + } } static nir_ssa_def * @@ -333,13 +339,8 @@ } else { nir_ssa_def vtx_idx[3] = {0}; - vtx_idx[0] = ngg_input_primitive_vertex_index(b, 0); - vtx_idx[1] = st->num_vertices_per_primitives >= 2 - ? ngg_input_primitive_vertex_index(b, 1) - : nir_imm_zero(b, 1, 32); - vtx_idx[2] = st->num_vertices_per_primitives >= 3 - ? ngg_input_primitive_vertex_index(b, 2) - : nir_imm_zero(b, 1, 32); + for (unsigned v = 0; v < st->num_vertices_per_primitives; ++v) + vtx_idx[v] = nir_load_var(b, st->gs_vtx_indices_vars[v]); return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL, st->use_edgeflags); } @@ -358,12 +359,20 @@ arg = emit_ngg_nogs_prim_exp_arg(b, st); if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) { - / Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. / - nir_ssa_def prim_id = nir_build_load_primitive_id(b); - nir_ssa_def provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx); - nir_ssa_def addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u); + nir_ssa_def prim_valid = nir_ieq_imm(b, nir_ushr_imm(b, arg, 31), 0); + nir_if if_prim_valid = nir_push_if(b, prim_valid); + { + /* Copy Primitive IDs from GS threads to the LDS address + * corresponding to the ES thread of the provoking vertex. + * It will be exported as a per-vertex attribute. + / + nir_ssa_def prim_id = nir_build_load_primitive_id(b); + nir_ssa_def provoking_vtx_idx = nir_load_var(b, st->gs_vtx_indices_vars[st->provoking_vtx_idx]); + nir_ssa_def addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u); - nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u); + nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u); + } + nir_pop_if(b, if_prim_valid); } nir_build_export_primitive_amd(b, arg); @@ -747,6 +756,7 @@ nir_ssa_def vtx_addr = nir_load_var(b, gs_vtxaddr_vars[v]); nir_ssa_def exporter_vtx_idx = nir_build_load_shared(b, 1, 8, vtx_addr, .base = lds_es_exporter_tid, .align_mul = 1u); exporter_vtx_indices[v] = nir_u2u32(b, exporter_vtx_idx); + nir_store_var(b, nogs_state->gs_vtx_indices_vars[v], exporter_vtx_indices[v], 0x1); } nir_ssa_def prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL, nogs_state->use_edgeflags); @@ -1142,7 +1152,7 @@ / Load vertex indices from input VGPRs / nir_ssa_def vtx_idx[3] = {0}; for (unsigned vertex = 0; vertex < 3; ++vertex) - vtx_idx[vertex] = ngg_input_primitive_vertex_index(b, vertex); + vtx_idx[vertex] = nir_load_var(b, nogs_state->gs_vtx_indices_vars[vertex]); nir_ssa_def vtx_addr[3] = {0}; nir_ssa_def pos[3][4] = {0}; @@ -1320,6 +1330,8 @@ nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body)); b->cursor = nir_before_cf_list(&impl->body); + ngg_nogs_init_vertex_indices_vars(b, impl, &state); + if (!can_cull) { /* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space / nir_if if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0)));
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_surface.h ^
@@ -223,7 +223,7 @@ * The gfx10 HTILE equation is chip-specific, it requires 64KB_Z_X, and it varies with: * - number of samples */ - uint16_t gfx10_bits[60]; + uint16_t gfx10_bits[64]; } u; };
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_assembler.cpp ^
@@ -625,6 +625,10 @@ encoding = 0; if (instr->opcode == aco_opcode::v_interp_mov_f32) { encoding = 0x3 & instr->operands[0].constantValue(); + } else if (instr->opcode == aco_opcode::v_writelane_b32_e64) { + encoding \|= instr->operands[0].physReg() << 0; + encoding \|= instr->operands[1].physReg() << 9; + /* Encoding src2 works fine with hardware but breaks some disassemblers. / } else { for (unsigned i = 0; i < instr->operands.size(); i++) encoding \|= instr->operands[i].physReg() << (i 9);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_instruction_selection.cpp ^
@@ -3189,12 +3189,8 @@ case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break; case nir_op_pack_half_2x16_split: { if (dst.regClass() == v1) { - nir_const_value* val = nir_src_as_const_value(instr->src[1].src); - if (val && val->u32 == 0 && ctx->program->chip_class <= GFX9) { - /* upper bits zero on GFX6-GFX9 / - bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0])); - } else if (!ctx->block->fp_mode.care_about_round16_64 \|\| - ctx->block->fp_mode.round16_64 == fp_round_tz) { + if (!ctx->block->fp_mode.care_about_round16_64 \|\| + ctx->block->fp_mode.round16_64 == fp_round_tz) { if (ctx->program->chip_class == GFX8 \|\| ctx->program->chip_class == GFX9) emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst); else @@ -8459,146 +8455,106 @@ } break; } - case nir_intrinsic_quad_broadcast: { - Temp src = get_ssa_temp(ctx, instr->src[0].ssa); - if (!nir_dest_is_divergent(instr->dest)) { - emit_uniform_subgroup(ctx, instr, src); - } else { - Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); - unsigned lane = nir_src_as_const_value(instr->src[1])->u32; - uint32_t dpp_ctrl = dpp_quad_perm(lane, lane, lane, lane); - - if (instr->dest.ssa.bit_size != 1) - src = as_vgpr(ctx, src); - - if (instr->dest.ssa.bit_size == 1) { - assert(src.regClass() == bld.lm); - assert(dst.regClass() == bld.lm); - uint32_t half_mask = 0x11111111u << lane; - Temp mask_tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), - Operand::c32(half_mask), Operand::c32(half_mask)); - Temp tmp = bld.tmp(bld.lm); - bld.sop1(Builder::s_wqm, Definition(tmp), - bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, - bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, - Operand(exec, bld.lm)))); - emit_wqm(bld, tmp, dst); - } else if (instr->dest.ssa.bit_size == 8) { - Temp tmp = bld.tmp(v1); - if (ctx->program->chip_class >= GFX8) - emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp); - else - emit_wqm(bld, - bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) \| dpp_ctrl), - tmp); - bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp); - } else if (instr->dest.ssa.bit_size == 16) { - Temp tmp = bld.tmp(v1); - if (ctx->program->chip_class >= GFX8) - emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp); - else - emit_wqm(bld, - bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) \| dpp_ctrl), - tmp); - bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp); - } else if (instr->dest.ssa.bit_size == 32) { - if (ctx->program->chip_class >= GFX8) - emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), dst); - else - emit_wqm(bld, - bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) \| dpp_ctrl), - dst); - } else if (instr->dest.ssa.bit_size == 64) { - Temp lo = bld.tmp(v1), hi = bld.tmp(v1); - bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); - if (ctx->program->chip_class >= GFX8) { - lo = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl)); - hi = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl)); - } else { - lo = emit_wqm( - bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) \| dpp_ctrl)); - hi = emit_wqm( - bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) \| dpp_ctrl)); - } - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); - emit_split_vector(ctx, dst, 2); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - } - break; - } + case nir_intrinsic_quad_broadcast: case nir_intrinsic_quad_swap_horizontal: case nir_intrinsic_quad_swap_vertical: case nir_intrinsic_quad_swap_diagonal: case nir_intrinsic_quad_swizzle_amd: { Temp src = get_ssa_temp(ctx, instr->src[0].ssa); + if (!nir_dest_is_divergent(instr->dest)) { emit_uniform_subgroup(ctx, instr, src); break; } + + / Quad broadcast lane. / + unsigned lane = 0; + / Use VALU for the bool instructions that don't have a SALU-only special case. / + bool bool_use_valu = instr->dest.ssa.bit_size == 1; + uint16_t dpp_ctrl = 0; + switch (instr->intrinsic) { case nir_intrinsic_quad_swap_horizontal: dpp_ctrl = dpp_quad_perm(1, 0, 3, 2); break; case nir_intrinsic_quad_swap_vertical: dpp_ctrl = dpp_quad_perm(2, 3, 0, 1); break; case nir_intrinsic_quad_swap_diagonal: dpp_ctrl = dpp_quad_perm(3, 2, 1, 0); break; case nir_intrinsic_quad_swizzle_amd: dpp_ctrl = nir_intrinsic_swizzle_mask(instr); break; + case nir_intrinsic_quad_broadcast: + lane = nir_src_as_const_value(instr->src[1])->u32; + dpp_ctrl = dpp_quad_perm(lane, lane, lane, lane); + bool_use_valu = false; + break; default: break; } - if (ctx->program->chip_class < GFX8) - dpp_ctrl \|= (1 << 15); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); + Temp tmp(dst); - if (instr->dest.ssa.bit_size != 1) - src = as_vgpr(ctx, src); - - if (instr->dest.ssa.bit_size == 1) { - assert(src.regClass() == bld.lm); + / Setup source. / + if (bool_use_valu) src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), Operand::c32(-1), src); + else if (instr->dest.ssa.bit_size != 1) + src = as_vgpr(ctx, src); + + / Setup temporary destination. / + if (bool_use_valu) + tmp = bld.tmp(v1); + else if (ctx->program->stage == fragment_fs) + tmp = bld.tmp(dst.regClass()); + + if (instr->dest.ssa.bit_size == 1 && instr->intrinsic == nir_intrinsic_quad_broadcast) { + / Special case for quad broadcast using SALU only. */ + assert(src.regClass() == bld.lm && tmp.regClass() == bld.lm); + + uint32_t half_mask = 0x11111111u << lane; + Operand mask_tmp = bld.lm.bytes() == 4 + ? Operand::c32(half_mask) + : bld.pseudo(aco_opcode::p_create_vector, bld.def(bld.lm), + Operand::c32(half_mask), Operand::c32(half_mask)); + + src = + bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)); + src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, src); + bld.sop1(Builder::s_wqm, Definition(tmp), src); + } else if (instr->dest.ssa.bit_size <= 32 \|\| bool_use_valu) { + unsigned excess_bytes = bool_use_valu ? 0 : 4 - instr->dest.ssa.bit_size / 8; + Definition def = excess_bytes ? bld.def(v1) : Definition(tmp); + if (ctx->program->chip_class >= GFX8) - src = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl); - else - src = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl); - Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src); - emit_wqm(bld, tmp, dst); - } else if (instr->dest.ssa.bit_size == 8) { - Temp tmp = bld.tmp(v1); - if (ctx->program->chip_class >= GFX8) - emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp); - else - emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp); - bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp); - } else if (instr->dest.ssa.bit_size == 16) { - Temp tmp = bld.tmp(v1); - if (ctx->program->chip_class >= GFX8) - emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp); - else - emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp); - bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp); - } else if (instr->dest.ssa.bit_size == 32) { - Temp tmp; - if (ctx->program->chip_class >= GFX8) - tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl); + bld.vop1_dpp(aco_opcode::v_mov_b32, def, src, dpp_ctrl); else - tmp = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl); - emit_wqm(bld, tmp, dst); + bld.ds(aco_opcode::ds_swizzle_b32, def, src, (1 << 15) \| dpp_ctrl); + + if (excess_bytes) + bld.pseudo(aco_opcode::p_split_vector, Definition(tmp), + bld.def(RegClass::get(tmp.type(), excess_bytes)), def.getTemp()); } else if (instr->dest.ssa.bit_size == 64) { Temp lo = bld.tmp(v1), hi = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); + if (ctx->program->chip_class >= GFX8) { - lo = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_ir.cpp ^
@@ -187,7 +187,7 @@ if (!instr->isVALU()) return false; - if (chip < GFX8 \|\| instr->isDPP()) + if (chip < GFX8 \|\| instr->isDPP() \|\| instr->isVOP3P()) return false; if (instr->isSDWA()) @@ -318,6 +318,8 @@ return false; if (instr->format == Format::VOP3) return false; + if (instr->operands.size() > 1 && !instr->operands[1].isOfType(RegType::vgpr)) + return false; } /* there are more cases but those all take 64-bit inputs */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_optimizer.cpp ^
@@ -3135,10 +3135,12 @@ * if 0 - pick selection from fneg->lo * if 1 - pick selection from fneg->hi */ - bool opsel_lo = vop3p->opsel_lo & (1 << i); - bool opsel_hi = vop3p->opsel_hi & (1 << i); - vop3p->neg_lo[i] ^= true ^ (opsel_lo ? fneg->neg_hi[0] : fneg->neg_lo[0]); - vop3p->neg_hi[i] ^= true ^ (opsel_hi ? fneg->neg_hi[0] : fneg->neg_lo[0]); + bool opsel_lo = (vop3p->opsel_lo >> i) & 1; + bool opsel_hi = (vop3p->opsel_hi >> i) & 1; + bool neg_lo = true ^ fneg->neg_lo[0] ^ fneg->neg_lo[1]; + bool neg_hi = true ^ fneg->neg_hi[0] ^ fneg->neg_hi[1]; + vop3p->neg_lo[i] ^= opsel_lo ? neg_hi : neg_lo; + vop3p->neg_hi[i] ^= opsel_hi ? neg_hi : neg_lo; vop3p->opsel_lo ^= ((opsel_lo ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i; vop3p->opsel_hi ^= ((opsel_hi ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i; @@ -3324,12 +3326,16 @@ VOP3_instruction& new_mul = instr->vop3(); if (mul_instr->isVOP3()) { VOP3_instruction& mul = mul_instr->vop3(); - new_mul.neg[0] = mul.neg[0] && !is_abs; - new_mul.neg[1] = mul.neg[1] && !is_abs; - new_mul.abs[0] = mul.abs[0] \|\| is_abs; - new_mul.abs[1] = mul.abs[1] \|\| is_abs; + new_mul.neg[0] = mul.neg[0]; + new_mul.neg[1] = mul.neg[1]; + new_mul.abs[0] = mul.abs[0]; + new_mul.abs[1] = mul.abs[1]; new_mul.omod = mul.omod; } + if (is_abs) { + new_mul.neg[0] = new_mul.neg[1] = false; + new_mul.abs[0] = new_mul.abs[1] = true; + } new_mul.neg[0] ^= true; new_mul.clamp = false;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_optimizer_postRA.cpp ^
@@ -50,18 +50,6 @@ Idx const_or_undef{UINT32_MAX, 2}; Idx written_by_multiple_instrs{UINT32_MAX, 3}; -bool -is_instr_after(Idx second, Idx first) -{ - if (first == not_written_in_block && second != not_written_in_block) - return true; - - if (!first.found() \|\| !second.found()) - return false; - - return second.block > first.block \|\| (second.block == first.block && second.instr > first.instr); -} - struct pr_opt_ctx { Program* program; Block* current_block; @@ -151,6 +139,44 @@ return instr_idx; } +bool +is_clobbered_since(pr_opt_ctx& ctx, PhysReg reg, RegClass rc, const Idx& idx) +{ + /* If we didn't find an instruction, assume that the register is clobbered. / + if (!idx.found()) + return true; + + / TODO: We currently can't keep track of subdword registers. / + if (rc.is_subdword()) + return true; + + unsigned begin_reg = reg.reg(); + unsigned end_reg = begin_reg + rc.size(); + unsigned current_block_idx = ctx.current_block->index; + + for (unsigned r = begin_reg; r < end_reg; ++r) { + Idx& i = ctx.instr_idx_by_regs[current_block_idx][r]; + if (i == clobbered \|\| i == written_by_multiple_instrs) + return true; + else if (i == not_written_in_block) + continue; + + assert(i.found()); + + if (i.block > idx.block \|\| (i.block == idx.block && i.instr > idx.instr)) + return true; + } + + return false; +} + +template <typename T> +bool +is_clobbered_since(pr_opt_ctx& ctx, const T& t, const Idx& idx) +{ + return is_clobbered_since(ctx, t.physReg(), t.regClass(), idx); +} + void try_apply_branch_vcc(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr) { @@ -177,16 +203,19 @@ Idx op0_instr_idx = last_writer_idx(ctx, instr->operands[0]); Idx last_vcc_wr_idx = last_writer_idx(ctx, vcc, ctx.program->lane_mask); - Idx last_exec_wr_idx = last_writer_idx(ctx, exec, ctx.program->lane_mask); / We need to make sure: + * - the instructions that wrote the operand register and VCC are both found * - the operand register used by the branch, and VCC were both written in the current block - * - VCC was NOT written after the operand register - * - EXEC is sane and was NOT written after the operand register + * - EXEC hasn't been clobbered since the last VCC write + * - VCC hasn't been clobbered since the operand register was written + * (ie. the last VCC writer precedes the op0 writer) / if (!op0_instr_idx.found() \|\| !last_vcc_wr_idx.found() \|\| - !is_instr_after(last_vcc_wr_idx, last_exec_wr_idx) \|\| - !is_instr_after(op0_instr_idx, last_vcc_wr_idx)) + op0_instr_idx.block != ctx.current_block->index \|\| + last_vcc_wr_idx.block != ctx.current_block->index \|\| + is_clobbered_since(ctx, exec, ctx.program->lane_mask, last_vcc_wr_idx) \|\| + is_clobbered_since(ctx, vcc, ctx.program->lane_mask, op0_instr_idx)) return; Instruction op0_instr = ctx.get(op0_instr_idx); @@ -346,6 +375,17 @@ void try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr) { + /* We are looking for the following pattern: + * + * v_mov_dpp vA, vB, ... ; move instruction with DPP + * v_xxx vC, vA, ... ; current instr that uses the result from the move + * + * If possible, the above is optimized into: + * + * v_xxx_dpp vC, vB, ... ; current instr modified to use DPP directly + * + / + if (!instr->isVALU() \|\| instr->isDPP() \|\| !can_use_DPP(instr, false)) return; @@ -365,8 +405,8 @@ (!mov->definitions[0].tempId() \|\| ctx.uses[mov->definitions[0].tempId()] > 1)) continue; - Idx mov_src_idx = last_writer_idx(ctx, mov->operands[0]); - if (is_instr_after(mov_src_idx, op_instr_idx)) + / Don't propagate DPP if the source register is overwritten since the move. */ + if (is_clobbered_since(ctx, mov->operands[0], op_instr_idx)) continue; if (i && !can_swap_operands(instr, &instr->opcode))
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_print_asm.cpp ^
@@ -152,12 +152,6 @@ disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size, size_t pos, char* outline, unsigned outline_size) { - /* mask out src2 on v_writelane_b32 / - if (((chip == GFX8 \|\| chip == GFX9) && (binary[pos] & 0xffff8000) == 0xd28a0000) \|\| - (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7610000)) { - binary[pos + 1] = binary[pos + 1] & 0xF803FFFF; - } - size_t l = LLVMDisasmInstruction(disasm, (uint8_t)&binary[pos], (exec_size - pos) * sizeof(uint32_t), pos * 4, outline, outline_size);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_register_allocation.cpp ^
@@ -1898,10 +1898,9 @@ dst = operand.physReg(); } else { + /* clear the operand in case it's only a stride mismatch */ + register_file.clear(src, operand.regClass()); dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index); - update_renames( - ctx, register_file, parallelcopy, instr, - instr->opcode != aco_opcode::p_create_vector ? rename_not_killed_ops : (UpdateRenames)0); } Operand pc_op = operand;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_spill.cpp ^
@@ -1614,7 +1614,7 @@ continue; bool can_destroy = true; - for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[block.linear_preds[0]]) { + for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block.index]) { if (ctx.interferences[pair.second].first.type() == RegType::sgpr && slots[pair.second] / ctx.wave_size == i) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_validate.cpp ^
@@ -235,6 +235,16 @@ if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed()) check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get()); + } else if (instr->isVOP3P()) { + VOP3P_instruction& vop3p = instr->vop3p(); + for (unsigned i = 0; i < instr->operands.size(); i++) { + if (instr->operands[i].hasRegClass() && + instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()) + check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0, + "Unexpected opsel for subdword operand", instr.get()); + } + check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition", + instr.get()); } /* check for undefs */ @@ -720,6 +730,9 @@ if (instr->isSDWA()) return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 && byte % instr->sdwa().sel[index].size() == 0; + if (instr->isVOP3P()) + return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) && + ((instr->vop3p().opsel_hi >> index) & 1) == (byte >> 1); if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1)) return true;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/tests/test_optimizer.cpp ^
@@ -954,13 +954,14 @@ END_TEST BEGIN_TEST(optimizer.dpp) - //>> v1: %a, v1: %b, s2: %c = p_startpgm - if (!setup_cs("v1 v1 s2", GFX10_3)) + //>> v1: %a, v1: %b, s2: %c, s1: %d = p_startpgm + if (!setup_cs("v1 v1 s2 s1", GFX10_3)) return; Operand a(inputs[0]); Operand b(inputs[1]); Operand c(inputs[2]); + Operand d(inputs[3]); /* basic optimization / //! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1 @@ -1028,6 +1029,21 @@ Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c); writeout(8, res8); + / sgprs */ + //! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res9 = v_add_f32 %tmp9, %d + //! p_unit_test 9, %res9 + Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res9 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp9, d); + writeout(9, res9); + + //! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res10 = v_add_f32 %d, %tmp10 + //! p_unit_test 10, %res10 + Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), d, tmp10); + writeout(10, res10); + finish_opt_test(); END_TEST
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/00-radv-defaults.conf ^
@@ -0,0 +1,148 @@ +<?xml version="1.0" standalone="yes"?> +<!-- + +============================================ +Application bugs worked around in this file: +============================================ + +--> + +<!DOCTYPE driconf [ + <!ELEMENT driconf (device+)> + <!ELEMENT device (application \| engine)+> + <!ATTLIST device driver CDATA #IMPLIED + device CDATA #IMPLIED> + <!ELEMENT application (option+)> + <!ATTLIST application name CDATA #REQUIRED + executable CDATA #IMPLIED + executable_regexp CDATA #IMPLIED + sha1 CDATA #IMPLIED + application_name_match CDATA #IMPLIED + application_versions CDATA #IMPLIED> + <!ELEMENT engine (option+)> + + <!-- engine_name_match: A regexp matching the engine name --> + <!-- engine_versions: A version in range format + (version 1 to 4 : "1:4") --> + + <!ATTLIST engine engine_name_match CDATA #REQUIRED + engine_versions CDATA #IMPLIED> + + <!ELEMENT option EMPTY> + <!ATTLIST option name CDATA #REQUIRED + value CDATA #REQUIRED> +]> + +<driconf> + <device driver="radv"> + <!-- Engine workarounds --> + <engine engine_name_match="vkd3d"> + <option name="radv_zero_vram" value="true" /> + </engine> + + <engine engine_name_match="Quantic Dream Engine"> + <option name="radv_zero_vram" value="true" /> + <option name="radv_lower_discard_to_demote" value="true" /> + <option name="radv_disable_tc_compat_htile_general" value="true" /> + </engine> + + <!-- Game workarounds --> + <application name="Shadow Of The Tomb Raider (Native)" application_name_match="ShadowOfTheTomb"> + <option name="radv_report_llvm9_version_string" value="true" /> + <option name="radv_invariant_geom" value="true" /> + </application> + + <application name="Shadow Of The Tomb Raider (DX11/DX12)" application_name_match="SOTTR.exe"> + <option name="radv_invariant_geom" value="true" /> + <option name="radv_split_fma" value="true" /> + </application> + + <application name="RAGE 2" executable="RAGE2.exe"> + <option name="radv_enable_mrt_output_nan_fixup" value="true" /> + </application> + + <application name="Path of Exile (64-bit, Steam)" executable="PathOfExile_x64Steam.exe"> + <option name="radv_no_dynamic_bounds" value="true" /> + <option name="radv_absolute_depth_bias" value="true" /> + </application> + <application name="Path of Exile (32-bit, Steam)" executable="PathOfExileSteam.exe"> + <option name="radv_no_dynamic_bounds" value="true" /> + <option name="radv_absolute_depth_bias" value="true" /> + </application> + <application name="Path of Exile (64-bit)" executable="PathOfExile_x64.exe"> + <option name="radv_no_dynamic_bounds" value="true" /> + <option name="radv_absolute_depth_bias" value="true" /> + </application> + <application name="Path of Exile (32-bit)" executable="PathOfExile.exe"> + <option name="radv_no_dynamic_bounds" value="true" /> + <option name="radv_absolute_depth_bias" value="true" /> + </application> + + <application name="The Surge 2" application_name_match="Fledge"> + <option name="radv_disable_shrink_image_store" value="true" /> + <option name="radv_zero_vram" value="true" /> + </application> + + <application name="World War Z (and World War Z: Aftermath)" application_name_match="WWZ\|wwz"> + <option name="radv_override_uniform_offset_alignment" value="16" /> + <option name="radv_disable_shrink_image_store" value="true" /> + <option name="radv_invariant_geom" value="true" /> + </application> + + <application name="DOOM VFR" application_name_match="DOOM_VFR"> + <option name="radv_no_dynamic_bounds" value="true" /> + </application> + + <application name="DOOM Eternal" application_name_match="DOOMEternal"> + <option name="radv_zero_vram" value="true" /> + </application> + + <application name="No Man's Sky" application_name_match="No Man's Sky"> + <option name="radv_lower_discard_to_demote" value="true" /> + </application> + + <application name="Monster Hunter World" application_name_match="MonsterHunterWorld.exe"> + <option name="radv_invariant_geom" value="true" /> + </application> + + <application name="DOOM (2016)" application_name_match="DOOM$"> + <option name="radv_disable_dcc" value="true" /> + </application> + + <application name="Wolfenstein II" application_name_match="Wolfenstein II The New Colossus"> + <option name="radv_disable_dcc" value="true" /> + </application> + + <application name="RDR2" application_name_match="Red Dead Redemption 2"> + <option name="radv_report_apu_as_dgpu" value="true" /> + </application> + + <application name="Resident Evil Village" application_name_match="re8.exe"> + <option name="radv_invariant_geom" value="true" /> + </application> + + <application name="F1 2021" application_name_match="F1_2021_dx12.exe"> + <option name="radv_disable_htile_layers" value="true" /> + </application> + + <application name="Fable Anniversary" application_name_match="Fable Anniversary.exe"> + <option name="radv_disable_dcc" value="true" /> + </application> + + <application name="Dragon's Dogma Dark Ariser" application_name_match="DDDA.exe"> + <option name="radv_disable_dcc" value="true" /> + </application> + + <application name="Grand Theft Auto IV" application_name_match="GTAIV.exe"> + <option name="radv_disable_dcc" value="true" /> + </application> + + <application name="Star Wars: The Force Unleashed II" application_name_match="SWTFU2.exe"> + <option name="radv_disable_dcc" value="true" /> + </application> + + <application name="Starcraft 2" application_name_match="SC2_x64.exe"> + <option name="radv_disable_dcc" value="true" /> + </application> + </device> +</driconf>
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/layers/radv_metro_exodus.c ^
@@ -0,0 +1,38 @@ +/* + * Copyright © 2021 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + / + +#include "radv_private.h" + +VKAPI_ATTR VkResult VKAPI_CALL +metro_exodus_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t pValue) +{ + RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore); + + /* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/5119. */ + if (semaphore == NULL) { + fprintf(stderr, "RADV: Ignoring vkGetSemaphoreCounterValue() with NULL semaphore (game bug)!\n"); + return VK_SUCCESS; + } + + return radv_GetSemaphoreCounterValue(_device, _semaphore, pValue); +}
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/meson.build ^
@@ -25,12 +25,13 @@ command : [ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'radv', - '--device-prefix', 'sqtt', + '--device-prefix', 'sqtt', '--device-prefix', 'metro_exodus', ], depend_files : vk_entrypoints_gen_depend_files, ) libradv_files = files( + 'layers/radv_metro_exodus.c', 'layers/radv_sqtt_layer.c', 'winsys/null/radv_null_bo.c', 'winsys/null/radv_null_bo.h', @@ -86,6 +87,10 @@ 'vk_format.h', ) +files_drirc = files('00-radv-defaults.conf') + +install_data(files_drirc, install_dir : join_paths(get_option('datadir'), 'drirc.d')) + if not with_platform_windows libradv_files += files( 'winsys/amdgpu/radv_amdgpu_bo.c',
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_cmd_buffer.c ^
@@ -2968,7 +2968,7 @@ } static void -radv_emit_vertex_state(struct radv_cmd_buffer cmd_buffer, bool pipeline_is_dirty) +radv_emit_vertex_input(struct radv_cmd_buffer cmd_buffer, bool pipeline_is_dirty) { struct radv_pipeline pipeline = cmd_buffer->state.pipeline; struct radv_shader_variant vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX); @@ -3059,8 +3059,8 @@ if (states & RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE) radv_emit_color_write_enable(cmd_buffer); - if (states & RADV_CMD_DIRTY_VERTEX_STATE) - radv_emit_vertex_state(cmd_buffer, pipeline_is_dirty); + if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT) + radv_emit_vertex_input(cmd_buffer, pipeline_is_dirty); cmd_buffer->state.dirty &= ~states; } @@ -4497,7 +4497,8 @@ return; } - cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_VERTEX_STATE; + cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_VERTEX_BUFFER \| + RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; } static uint32_t @@ -4564,7 +4565,6 @@ radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); assert(set); - assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); if (!cmd_buffer->device->use_global_bo_list) { for (unsigned j = 0; j < set->header.buffer_count; ++j) @@ -4602,7 +4602,7 @@ radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx); } - for (unsigned j = 0; j < set->header.layout->dynamic_offset_count; ++j, ++dyn_idx) { + for (unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) { unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; uint32_t dst = descriptors_state->dynamic_buffers + idx 4; assert(dyn_idx < dynamicOffsetCount); @@ -4628,7 +4628,7 @@ } } - cmd_buffer->push_constant_stages \|= set->header.layout->dynamic_shader_stages; + cmd_buffer->push_constant_stages \|= layout->set[set_idx].dynamic_offset_stages; } } } @@ -5466,14 +5466,7 @@ cmd_buffer->state.vbo_misaligned_mask = 0; - state->attribute_mask = 0; - state->misaligned_mask = 0; - state->possibly_misaligned_mask = 0; - state->instance_rate_inputs = 0; - state->nontrivial_divisors = 0; - state->post_shuffle = 0; - state->alpha_adjust_lo = 0; - state->alpha_adjust_hi = 0; + memset(state, 0, sizeof(state)); enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class; for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) { @@ -5529,7 +5522,8 @@ state->post_shuffle \|= 1u << loc; } - cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_VERTEX_STATE; + cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_VERTEX_BUFFER \| + RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; } void @@ -5560,6 +5554,11 @@ allow_ib2 = false; } + if (secondary->queue_family_index == RADV_QUEUE_COMPUTE) { + / IB2 packets are not supported on compute queues according to PAL. / + allow_ib2 = false; + } + primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed); primary->scratch_waves_wanted = @@ -6326,8 +6325,9 @@ / Index, vertex and streamout buffers don't change context regs, and * pipeline is already handled. */ - used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER \| RADV_CMD_DIRTY_VERTEX_STATE \| - RADV_CMD_DIRTY_STREAMOUT_BUFFER \| RADV_CMD_DIRTY_PIPELINE); + used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER \| RADV_CMD_DIRTY_VERTEX_BUFFER \| + RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT \| RADV_CMD_DIRTY_STREAMOUT_BUFFER \| + RADV_CMD_DIRTY_PIPELINE); if (cmd_buffer->state.dirty & used_states) return true;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_descriptor_set.c ^
@@ -129,8 +129,8 @@ /* Store block of offsets first, followed by the conversion descriptors (padded to the struct * alignment) / size += num_bindings sizeof(uint32_t); - size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion)); - size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion); + size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion_state)); + size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion_state); } set_layout = @@ -145,7 +145,7 @@ /* We just allocate all the samplers at the end of the struct / uint32_t samplers = (uint32_t )&set_layout->binding[num_bindings]; - struct radv_sampler_ycbcr_conversion ycbcr_samplers = NULL; + struct radv_sampler_ycbcr_conversion_state ycbcr_samplers = NULL; uint32_t ycbcr_sampler_offsets = NULL; if (ycbcr_sampler_count > 0) { @@ -155,8 +155,8 @@ uintptr_t first_ycbcr_sampler_offset = (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings; first_ycbcr_sampler_offset = - ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion)); - ycbcr_samplers = (struct radv_sampler_ycbcr_conversion )first_ycbcr_sampler_offset; + ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion_state)); + ycbcr_samplers = (struct radv_sampler_ycbcr_conversion_state )first_ycbcr_sampler_offset; } else set_layout->ycbcr_sampler_offsets_offset = 0; @@ -198,7 +198,7 @@ if (conversion) { has_ycbcr_sampler = true; max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors, - vk_format_get_plane_count(conversion->format)); + vk_format_get_plane_count(conversion->state.format)); } } } @@ -311,7 +311,7 @@ for (uint32_t i = 0; i < binding->descriptorCount; i++) { if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler) ycbcr_samplers[i] = - radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler; + radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler->state; else ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED; } @@ -495,11 +495,16 @@ layout->set[set].layout = set_layout; layout->set[set].dynamic_offset_start = dynamic_offset_count; + layout->set[set].dynamic_offset_count = 0; + layout->set[set].dynamic_offset_stages = 0; for (uint32_t b = 0; b < set_layout->binding_count; b++) { - dynamic_offset_count += set_layout->binding[b].array_size set_layout->binding[b].dynamic_offset_count; - dynamic_shader_stages \|= set_layout->dynamic_shader_stages; + layout->set[set].dynamic_offset_count += + set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count; + layout->set[set].dynamic_offset_stages \|= set_layout->dynamic_shader_stages; } + dynamic_offset_count += layout->set[set].dynamic_offset_count; + dynamic_shader_stages \|= layout->set[set].dynamic_offset_stages; /* Hash the entire set layout except for the vk_object_base. The * rest of the set layout is carefully constructed to not have @@ -1517,13 +1522,13 @@ vk_object_base_init(&device->vk, &conversion->base, VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION); - conversion->format = pCreateInfo->format; - conversion->ycbcr_model = pCreateInfo->ycbcrModel; - conversion->ycbcr_range = pCreateInfo->ycbcrRange; - conversion->components = pCreateInfo->components; - conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset; - conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset; - conversion->chroma_filter = pCreateInfo->chromaFilter; + conversion->state.format = pCreateInfo->format; + conversion->state.ycbcr_model = pCreateInfo->ycbcrModel; + conversion->state.ycbcr_range = pCreateInfo->ycbcrRange; + conversion->state.components = pCreateInfo->components; + conversion->state.chroma_offsets[0] = pCreateInfo->xChromaOffset; + conversion->state.chroma_offsets[1] = pCreateInfo->yChromaOffset; + conversion->state.chroma_filter = pCreateInfo->chromaFilter; *pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion); return VK_SUCCESS;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_descriptor_set.h ^
@@ -89,7 +89,9 @@ struct { struct radv_descriptor_set_layout layout; uint32_t size; - uint32_t dynamic_offset_start; + uint16_t dynamic_offset_start; + uint16_t dynamic_offset_count; + VkShaderStageFlags dynamic_offset_stages; } set[MAX_SETS]; uint32_t num_sets; @@ -114,7 +116,7 @@ return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0); } -static inline const struct radv_sampler_ycbcr_conversion +static inline const struct radv_sampler_ycbcr_conversion_state * radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout set, unsigned binding_index) { if (!set->ycbcr_sampler_offsets_offset) @@ -125,7 +127,7 @@ if (offsets[binding_index] == 0) return NULL; - return (const struct radv_sampler_ycbcr_conversion )((const char )set + + return (const struct radv_sampler_ycbcr_conversion_state )((const char )set + offsets[binding_index]); } #endif / RADV_DESCRIPTOR_SET_H */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_device.c ^
@@ -31,7 +31,11 @@ #ifdef __FreeBSD__ #include <sys/types.h> -#elif !defined(_WIN32) +#endif +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS #include <sys/sysmacros.h> #endif @@ -909,6 +913,7 @@ DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false) DRI_CONF_RADV_DISABLE_DCC(false) DRI_CONF_RADV_REPORT_APU_AS_DGPU(false) + DRI_CONF_RADV_DISABLE_HTILE_LAYERS(false) DRI_CONF_SECTION_END }; // clang-format on @@ -951,6 +956,9 @@ instance->report_apu_as_dgpu = driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu"); + + instance->disable_htile_layers = + driQueryOptionb(&instance->dri_options, "radv_disable_htile_layers"); } VkResult @@ -1787,8 +1795,8 @@ .lineWidthGranularity = (1.0 / 8.0), .strictLines = false, /* FINISHME / .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 128, - .optimalBufferCopyRowPitchAlignment = 128, + .optimalBufferCopyOffsetAlignment = 1, + .optimalBufferCopyRowPitchAlignment = 1, .nonCoherentAtomSize = 64, }; @@ -2965,7 +2973,15 @@ struct vk_device_dispatch_table dispatch_table; - if (radv_thread_trace_enabled()) { + if (physical_device->instance->vk.app_info.app_name && + !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) { + / Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it + * crashes sometimes. Workaround this game bug by enabling an internal layer. Remove this + * when the game is fixed. + */ + vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false); + } else if (radv_thread_trace_enabled()) { vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true); vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false); } else { @@ -4683,7 +4699,7 @@ result = radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count, - &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs); + &initial_flush_preamble_cs, &initial_preamble_cs, &continue_preamble_cs); if (result != VK_SUCCESS) goto fail; @@ -4845,8 +4861,12 @@ points[syncobj_idx] = submission->wait_values[i]; ++syncobj_idx; } - bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, - true, timeout); + + bool success = true; + if (syncobj_idx > 0) { + success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, + true, timeout); + } free(points); return success ? VK_SUCCESS : VK_TIMEOUT;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_image.c ^
@@ -352,6 +352,11 @@ bool use_htile_for_mips = image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10; + /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. / + if (device->physical_device->rad_info.chip_class == GFX10 && + image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1) + return false; + / Do not enable HTILE for very small images because it seems less performant but make sure it's * allowed with VRS attachments because we need HTILE. */ @@ -360,6 +365,9 @@ !device->attachment_vrs_enabled) return false; + if (device->instance->disable_htile_layers && image->info.array_size > 1) + return false; + return (image->info.levels == 1 \|\| use_htile_for_mips) && !image->shareable; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c ^
@@ -33,7 +33,7 @@ nir_ssa_def image_size; nir_tex_instr origin_tex; nir_deref_instr tex_deref; - const struct radv_sampler_ycbcr_conversion conversion; + const struct radv_sampler_ycbcr_conversion_state conversion; bool unnormalized_coordinates; }; @@ -80,7 +80,7 @@ implicit_downsampled_coords(struct ycbcr_state state, nir_ssa_def old_coords) { nir_builder b = state->builder; - const struct radv_sampler_ycbcr_conversion conversion = state->conversion; + const struct radv_sampler_ycbcr_conversion_state conversion = state->conversion; nir_ssa_def image_size = NULL; nir_ssa_def comp[4] = { NULL, @@ -230,7 +230,7 @@ layout->set[var->data.descriptor_set].layout; const struct radv_descriptor_set_binding_layout binding = &set_layout->binding[var->data.binding]; - const struct radv_sampler_ycbcr_conversion ycbcr_samplers = + const struct radv_sampler_ycbcr_conversion_state ycbcr_samplers = radv_immutable_ycbcr_samplers(set_layout, var->data.binding); if (!ycbcr_samplers) @@ -255,7 +255,7 @@ array_index = nir_src_as_uint(deref->arr.index); array_index = MIN2(array_index, binding->array_size - 1); } - const struct radv_sampler_ycbcr_conversion ycbcr_sampler = ycbcr_samplers + array_index; + const struct radv_sampler_ycbcr_conversion_state *ycbcr_sampler = ycbcr_samplers + array_index; if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED) return false;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_pipeline.c ^
@@ -2711,6 +2711,7 @@ key.invariant_geom = true; key.use_ngg = pipeline->device->physical_device->use_ngg; + key.adjust_frag_coord_z = pipeline->device->adjust_frag_coord_z; return key; } @@ -4448,7 +4449,7 @@ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) \| S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) \| S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) \| - cull_dist_mask << 8 \| clip_dist_mask); + total_mask << 8 \| clip_dist_mask); if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); @@ -4568,7 +4569,7 @@ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) \| S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) \| S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) \| - cull_dist_mask << 8 \| clip_dist_mask); + total_mask << 8 \| clip_dist_mask); radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, S_028A84_PRIMITIVEID_EN(es_enable_prim_id) \|
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_private.h ^
@@ -320,6 +320,7 @@ bool disable_shrink_image_store; bool absolute_depth_bias; bool report_apu_as_dgpu; + bool disable_htile_layers; }; VkResult radv_init_wsi(struct radv_physical_device physical_device); @@ -1046,7 +1047,6 @@ RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32, RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33, RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34, - RADV_CMD_DIRTY_VERTEX_STATE = RADV_CMD_DIRTY_VERTEX_BUFFER \| RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT, }; enum radv_cmd_flush_bits { @@ -2338,8 +2338,7 @@ VkFormat radv_get_aspect_format(struct radv_image image, VkImageAspectFlags mask); -struct radv_sampler_ycbcr_conversion { - struct vk_object_base base; +struct radv_sampler_ycbcr_conversion_state { VkFormat format; VkSamplerYcbcrModelConversion ycbcr_model; VkSamplerYcbcrRange ycbcr_range; @@ -2348,6 +2347,12 @@ VkFilter chroma_filter; }; +struct radv_sampler_ycbcr_conversion { + struct vk_object_base base; + /* The state is hashed for the descriptor set layout. / + struct radv_sampler_ycbcr_conversion_state state; +}; + struct radv_buffer_view { struct vk_object_base base; struct radeon_winsys_bo bo;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_query.c ^
@@ -806,8 +806,8 @@ radv_query_shader(struct radv_cmd_buffer cmd_buffer, VkPipeline pipeline, struct radeon_winsys_bo src_bo, struct radeon_winsys_bo dst_bo, uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride, - uint32_t dst_stride, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask, - uint32_t avail_offset) + uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags, + uint32_t pipeline_stats_mask, uint32_t avail_offset) { struct radv_device device = cmd_buffer->device; struct radv_meta_saved_state saved_state; @@ -833,7 +833,7 @@ cmd_buffer->state.predicating = false; uint64_t src_buffer_size = MAX2(src_stride count, avail_offset + 4 * count - src_offset); - uint64_t dst_buffer_size = count == 1 ? src_stride : dst_stride * count; + uint64_t dst_buffer_size = dst_stride * (count - 1) + dst_size; radv_buffer_init(&src_buffer, device, src_bo, src_buffer_size, src_offset); radv_buffer_init(&dst_buffer, device, dst_bo, dst_buffer_size, dst_offset); @@ -1184,6 +1184,29 @@ } } +static size_t +radv_query_result_size(const struct radv_query_pool pool, VkQueryResultFlags flags) +{ + unsigned values = (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0; + switch (pool->type) { + case VK_QUERY_TYPE_TIMESTAMP: + case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: + case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: + case VK_QUERY_TYPE_OCCLUSION: + values += 1; + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + values += util_bitcount(pool->pipeline_stats_mask); + break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + values += 2; + break; + default: + unreachable("trying to get size of unhandled query type"); + } + return values ((flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4); +} + void radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, @@ -1195,6 +1218,7 @@ struct radeon_cmdbuf cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo); + size_t dst_size = radv_query_result_size(pool, flags); dest_va += dst_buffer->offset + dstOffset; if (!queryCount) @@ -1230,8 +1254,8 @@ } radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo, dst_buffer->bo, firstQuery pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0, - 0); + dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, + flags, 0, 0); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1246,11 +1270,11 @@ radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff); } } - radv_query_shader(cmd_buffer, - &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, - pool->pipeline_stats_mask, pool->availability_offset + 4 * firstQuery); + radv_query_shader( + cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, + pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, + pool->stride, stride, dst_size, queryCount, flags, pool->pipeline_stats_mask, + pool->availability_offset + 4 * firstQuery); break; case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: @@ -1272,8 +1296,8 @@ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0, - 0); + dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, + flags, 0, 0); break; case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1293,8 +1317,8 @@ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0, - 0); + dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, + flags, 0, 0); break; default: unreachable("trying to get results of unhandled query type");
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.c ^
@@ -1762,7 +1762,7 @@ options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug; options->enable_mrt_output_nan_fixup = module && !module->nir && options->key.ps.enable_mrt_output_nan_fixup; - options->adjust_frag_coord_z = device->adjust_frag_coord_z; + options->adjust_frag_coord_z = options->key.adjust_frag_coord_z; options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug; options->debug.func = radv_compiler_debug; options->debug.private_data = &debug_data;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.h ^
@@ -60,6 +60,7 @@ uint32_t optimisations_disabled : 1; uint32_t invariant_geom : 1; uint32_t use_ngg : 1; + uint32_t adjust_frag_coord_z : 1; struct { uint32_t instance_rate_inputs;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c ^
@@ -207,6 +207,20 @@ if (ws) { simple_mtx_unlock(&winsys_creation_mutex); amdgpu_device_deinitialize(dev); + + /* Check that options don't differ from the existing winsys. / + if (((debug_flags & RADV_DEBUG_ALL_BOS) && !ws->debug_all_bos) \|\| + ((debug_flags & RADV_DEBUG_HANG) && !ws->debug_log_bos) \|\| + ((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) \|\| + (perftest_flags != ws->perftest)) { + fprintf(stderr, "amdgpu: Found options that differ from the existing winsys.\n"); + return NULL; + } + + / RADV_DEBUG_ZERO_VRAM is the only option that is allowed to be set again. */ + if (debug_flags & RADV_DEBUG_ZERO_VRAM) + ws->zero_all_vram_allocs = true; + return &ws->base; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/winsys/null/radv_null_bo.c ^
@@ -65,6 +65,13 @@ { } +static VkResult +radv_null_winsys_bo_make_resident(struct radeon_winsys _ws, struct radeon_winsys_bo _bo, + bool resident) +{ + return VK_SUCCESS; +} + static void radv_null_winsys_bo_destroy(struct radeon_winsys _ws, struct radeon_winsys_bo _bo) { @@ -80,4 +87,5 @@ ws->base.buffer_destroy = radv_null_winsys_bo_destroy; ws->base.buffer_map = radv_null_winsys_bo_map; ws->base.buffer_unmap = radv_null_winsys_bo_unmap; + ws->base.buffer_make_resident = radv_null_winsys_bo_make_resident; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/winsys/null/radv_null_winsys.c ^
@@ -135,6 +135,19 @@ info->has_image_load_dcc_bug = info->family == CHIP_DIMGREY_CAVEFISH \|\| info->family == CHIP_VANGOGH; + + info->has_accelerated_dot_product = + info->family == CHIP_ARCTURUS \|\| info->family == CHIP_ALDEBARAN \|\| + info->family == CHIP_VEGA20 \|\| info->family >= CHIP_NAVI12; + + info->address32_hi = info->chip_class >= GFX9 ? 0xffff8000u : 0x0; + + info->has_rbplus = info->family == CHIP_STONEY \|\| info->chip_class >= GFX9; + info->rbplus_allowed = + info->has_rbplus && + (info->family == CHIP_STONEY \|\| info->family == CHIP_VEGA12 \|\| info->family == CHIP_RAVEN \|\| + info->family == CHIP_RAVEN2 \|\| info->family == CHIP_RENOIR \|\| info->chip_class >= GFX10_3); + } static void
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/broadcom/compiler/qpu_schedule.c ^
@@ -492,7 +492,8 @@ int last_thrsw_tick; int last_branch_tick; int last_setmsf_tick; - bool tlb_locked; + bool first_thrsw_emitted; + bool last_thrsw_emitted; bool fixup_ldvary; int ldvary_count; }; @@ -576,10 +577,26 @@ } static bool -pixel_scoreboard_too_soon(struct choose_scoreboard scoreboard, +scoreboard_is_locked(struct choose_scoreboard scoreboard, + bool lock_scoreboard_on_first_thrsw) +{ + if (lock_scoreboard_on_first_thrsw) { + return scoreboard->first_thrsw_emitted && + scoreboard->tick - scoreboard->last_thrsw_tick >= 3; + } + + return scoreboard->last_thrsw_emitted && + scoreboard->tick - scoreboard->last_thrsw_tick >= 3; +} + +static bool +pixel_scoreboard_too_soon(struct v3d_compile c, + struct choose_scoreboard scoreboard, const struct v3d_qpu_instr inst) { - return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst)); + return qpu_inst_is_tlb(inst) && + !scoreboard_is_locked(scoreboard, + c->lock_scoreboard_on_first_thrsw); } static bool @@ -1053,12 +1070,12 @@ if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst)) continue; - / "A scoreboard wait must not occur in the first two - * instructions of a fragment shader. This is either the - * explicit Wait for Scoreboard signal or an implicit wait - * with the first tile-buffer read or write instruction." + /* "Before doing a TLB access a scoreboard wait must have been + * done. This happens either on the first or last thread + * switch, depending on a setting (scb_wait_on_first_thrsw) in + * the shader state." / - if (pixel_scoreboard_too_soon(scoreboard, inst)) + if (pixel_scoreboard_too_soon(c, scoreboard, inst)) continue; / ldunif and ldvary both write r5, but ldunif does so a tick @@ -1131,12 +1148,10 @@ continue; } - /* Don't merge in something that will lock the TLB. - * Hopwefully what we have in inst will release some - * other instructions, allowing us to delay the - * TLB-locking instruction until later. + /* Don't merge TLB instructions before we have acquired + * the scoreboard lock. / - if (!scoreboard->tlb_locked && qpu_inst_is_tlb(inst)) + if (pixel_scoreboard_too_soon(c, scoreboard, inst)) continue; / When we succesfully pair up an ldvary we then try @@ -1273,9 +1288,6 @@ if (inst->sig.ldvary) scoreboard->last_ldvary_tick = scoreboard->tick; - - if (qpu_inst_is_tlb(inst)) - scoreboard->tlb_locked = true; } static void @@ -1490,6 +1502,11 @@ return false; } + if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) && + !inst->sig_magic) { + return false; + } + if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF) return false; @@ -1747,6 +1764,8 @@ merge_inst = inst; } + scoreboard->first_thrsw_emitted = true; + /* If we're emitting the last THRSW (other than program end), then * signal that to the HW by emitting two THRSWs in a row. / @@ -1758,6 +1777,7 @@ struct qinst second_inst = (struct qinst )merge_inst->link.next; second_inst->qpu.sig.thrsw = true; + scoreboard->last_thrsw_emitted = true; } / Make sure the thread end executes within the program lifespan / @@ -1981,6 +2001,17 @@ if (alu_reads_register(inst, false, ldvary_magic, ldvary_index)) return false; + / The implicit ldvary destination may not be written to by a signal + * in the instruction following ldvary. Since we are planning to move + * ldvary to the previous instruction, this means we need to check if + * the current instruction has any other signal that could create this + * conflict. The only other signal that can write to the implicit + * ldvary destination that is compatible with ldvary in the same + * instruction is ldunif. + / + if (inst->sig.ldunif) + return false; + / The previous instruction can't write to the same destination as the * ldvary. */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/broadcom/vulkan/v3dv_device.c ^
@@ -75,23 +75,25 @@ return VK_SUCCESS; } -#define V3DV_HAS_SURFACE (VK_USE_PLATFORM_WIN32_KHR \|\| \ - VK_USE_PLATFORM_WAYLAND_KHR \|\| \ - VK_USE_PLATFORM_XCB_KHR \|\| \ - VK_USE_PLATFORM_XLIB_KHR \|\| \ - VK_USE_PLATFORM_DISPLAY_KHR) +#if defined(VK_USE_PLATFORM_WIN32_KHR) \|\| \ + defined(VK_USE_PLATFORM_WAYLAND_KHR) \|\| \ + defined(VK_USE_PLATFORM_XCB_KHR) \|\| \ + defined(VK_USE_PLATFORM_XLIB_KHR) \|\| \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) +#define V3DV_USE_WSI_PLATFORM +#endif static const struct vk_instance_extension_table instance_extensions = { .KHR_device_group_creation = true, #ifdef VK_USE_PLATFORM_DISPLAY_KHR .KHR_display = true, + .KHR_get_display_properties2 = true, #endif .KHR_external_fence_capabilities = true, .KHR_external_memory_capabilities = true, .KHR_external_semaphore_capabilities = true, - .KHR_get_display_properties2 = true, .KHR_get_physical_device_properties2 = true, -#ifdef V3DV_HAS_SURFACE +#ifdef V3DV_USE_WSI_PLATFORM .KHR_get_surface_capabilities2 = true, .KHR_surface = true, .KHR_surface_protected_capabilities = true, @@ -135,7 +137,7 @@ .KHR_sampler_mirror_clamp_to_edge = true, .KHR_storage_buffer_storage_class = true, .KHR_uniform_buffer_standard_layout = true, -#ifdef V3DV_HAS_SURFACE +#ifdef V3DV_USE_WSI_PLATFORM .KHR_swapchain = true, .KHR_incremental_present = true, #endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glcpp/glcpp-parse.y ^
@@ -1057,6 +1057,7 @@ token = linear_alloc_child(parser->linalloc, sizeof(token_t)); token->type = type; token->value.str = str; + token->expanding = false; return token; } @@ -1069,6 +1070,7 @@ token = linear_alloc_child(parser->linalloc, sizeof(token_t)); token->type = type; token->value.ival = ival; + token->expanding = false; return token; } @@ -1958,6 +1960,10 @@ struct hash_entry entry; macro_t macro; + /* If token is already being expanded return to avoid an infinite loop / + if (token->expanding) + return NULL; + / We only expand identifiers / if (token->type != IDENTIFIER) { return NULL; @@ -1988,14 +1994,15 @@ / Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). / if (_parser_active_list_contains (parser, identifier)) { - / We change the token type here from IDENTIFIER to OTHER to prevent any + /* We change the `expanding` bool to true to prevent any * future expansion of this unexpanded token. / char str; token_list_t expansion; token_t final; str = linear_strdup(parser->linalloc, token->value.str); - final = _token_create_str(parser, OTHER, str); + final = _token_create_str(parser, token->type, str); + final->expanding = true; expansion = _token_list_create(parser); _token_list_append(parser, expansion, final); return expansion;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glcpp/glcpp.h ^
@@ -103,6 +103,7 @@ } while (0) struct token { + bool expanding; int type; YYSTYPE value; YYLTYPE location;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glsl_parser.yy ^
@@ -2047,9 +2047,9 @@ * output from one shader stage will still match an input of a subsequent * stage without the input being declared as invariant." * - * On the desktop side, this text first appears in GLSL 4.30. + * On the desktop side, this text first appears in GLSL 4.20. */ - if (state->is_version(430, 300) && $$.flags.q.in) + if (state->is_version(420, 300) && $$.flags.q.in) _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs"); } \| interpolation_qualifier type_qualifier
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glsl_to_nir.cpp ^
@@ -39,6 +39,7 @@ #include "main/errors.h" #include "main/mtypes.h" #include "main/shaderobj.h" +#include "main/context.h" #include "util/u_math.h" /* @@ -2616,6 +2617,13 @@ glsl_float64_funcs_to_nir(struct gl_context ctx, const nir_shader_compiler_options options) { + /* It's not possible to use float64 on GLSL ES, so don't bother trying to + * build the support code. The support code depends on higher versions of + * desktop GLSL, so it will fail to compile (below) anyway. + / + if (!_mesa_is_desktop_gl(ctx) \|\| ctx->Const.GLSLVersion < 400) + return NULL; + / We pretend it's a vertex shader. Ultimately, the stage shouldn't * matter because we're not optimizing anything here. */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/link_varyings.cpp ^
@@ -319,13 +319,13 @@ return; } - /* The GLSL 4.30 and GLSL ES 3.00 specifications say: + /* The GLSL 4.20 and GLSL ES 3.00 specifications say: * * "As only outputs need be declared with invariant, an output from * one shader stage will still match an input of a subsequent stage * without the input being declared as invariant." * - * while GLSL 4.20 says: + * while GLSL 4.10 says: * * "For variables leaving one shader and coming into another shader, * the invariant keyword has to be used in both shaders, or a link @@ -337,7 +337,7 @@ * and fragment shaders must match." */ if (input->data.explicit_invariant != output->data.explicit_invariant && - prog->data->Version < (prog->IsES ? 300 : 430)) { + prog->data->Version < (prog->IsES ? 300 : 420)) { linker_error(prog, "%s shader output `%s' %s invariant qualifier, " "but %s shader input %s invariant qualifier\n",
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/meson.build ^
@@ -20,6 +20,8 @@ nir_depends = files('nir_opcodes.py', 'nir_intrinsics.py') +nir_algebraic_depends = files('nir_opcodes.py', 'nir_algebraic.py') + nir_builder_opcodes_h = custom_target( 'nir_builder_opcodes.h', input : 'nir_builder_opcodes_h.py', @@ -62,7 +64,7 @@ output : 'nir_opt_algebraic.c', command : [prog_python, '@INPUT@'], capture : true, - depend_files : files('nir_algebraic.py'), + depend_files : nir_algebraic_depends, ) nir_intrinsics_h = custom_target( @@ -366,8 +368,6 @@ link_with : _libnir, ) -nir_algebraic_py = files('nir_algebraic.py') - if with_tests test( 'nir_builder',
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir.c ^
@@ -154,6 +154,7 @@ reg->num_components = 0; reg->bit_size = 32; reg->num_array_elems = 0; + reg->divergent = false; exec_list_push_tail(list, &reg->node);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir.h ^
@@ -5532,6 +5532,7 @@ bool nir_opt_combine_stores(nir_shader shader, nir_variable_mode modes); +bool nir_copy_prop_impl(nir_function_impl impl); bool nir_copy_prop(nir_shader shader); bool nir_opt_copy_prop_vars(nir_shader shader);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_builtin_builder.c ^
@@ -223,7 +223,28 @@ tmp); /* sign fixup / - return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); + nir_ssa_def result = nir_fmul(b, tmp, nir_fsign(b, y_over_x)); + + /* The fmin and fmax above will filter out NaN values. This leads to + * non-NaN results for NaN inputs. Work around this by doing + * + * !isnan(y_over_x) ? ... : y_over_x; + / + if (b->exact \|\| + nir_is_float_control_signed_zero_inf_nan_preserve(b->shader->info.float_controls_execution_mode, bit_size)) { + const bool exact = b->exact; + + b->exact = true; + nir_ssa_def is_not_nan = nir_feq(b, y_over_x, y_over_x); + b->exact = exact; + + /* The extra 1.0y_over_x ensures that subnormal results are flushed to + zero. + / + result = nir_bcsel(b, is_not_nan, result, nir_fmul_imm(b, y_over_x, 1.0)); + } + + return result; } nir_ssa_def
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_deref.c ^
@@ -1052,6 +1052,10 @@ if (!glsl_type_is_struct(parent->type)) return false; + /* Empty struct */ + if (glsl_get_length(parent->type) < 1) + return false; + if (glsl_get_struct_field_offset(parent->type, 0) != 0) return false;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_from_ssa.c ^
@@ -935,9 +935,10 @@ static void place_phi_read(nir_builder b, nir_register reg, - nir_ssa_def def, nir_block block, unsigned depth) + nir_ssa_def def, nir_block block, struct set visited_blocks) { - if (block != def->parent_instr->block) { + / Search already visited blocks to avoid back edges in tree / + if (_mesa_set_search(visited_blocks, block) == NULL) { / Try to go up the single-successor tree / bool all_single_successors = true; set_foreach(block->predecessors, entry) { @@ -948,22 +949,16 @@ } } - if (all_single_successors && depth < 32) { + if (all_single_successors) { / All predecessors of this block have exactly one successor and it * is this block so they must eventually lead here without * intersecting each other. Place the reads in the predecessors * instead of this block. - * - * We only let this function recurse 32 times because it can recurse - * indefinitely in the presence of infinite loops. Because we're - * crawling a single-successor chain, it doesn't matter where we - * place it so it's ok to stop at an arbitrary distance. - * - * TODO: One day, we could detect back edges and avoid the recursion - * that way. / + _mesa_set_add(visited_blocks, block); + set_foreach(block->predecessors, entry) { - place_phi_read(b, reg, def, (nir_block )entry->key, depth + 1); + place_phi_read(b, reg, def, (nir_block )entry->key, visited_blocks); } return; } @@ -992,6 +987,8 @@ { nir_builder b; nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node)); + struct set visited_blocks = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); bool progress = false; nir_foreach_instr_safe(instr, block) { @@ -1010,7 +1007,9 @@ nir_foreach_phi_src(src, phi) { assert(src->src.is_ssa); - place_phi_read(&b, reg, src->src.ssa, src->pred, 0); + _mesa_set_add(visited_blocks, src->src.ssa->parent_instr->block); + place_phi_read(&b, reg, src->src.ssa, src->pred, visited_blocks); + _mesa_set_clear(visited_blocks, NULL); } nir_instr_remove(&phi->instr); @@ -1018,6 +1017,8 @@ progress = true; } + _mesa_set_destroy(visited_blocks, NULL); + return progress; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_instr_set.c ^
@@ -272,6 +272,7 @@ hash = HASH(hash, instr->is_array); hash = HASH(hash, instr->is_shadow); hash = HASH(hash, instr->is_new_style_shadow); + hash = HASH(hash, instr->is_sparse); unsigned component = instr->component; hash = HASH(hash, component); for (unsigned i = 0; i < 4; ++i)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_frexp.c ^
@@ -35,7 +35,6 @@ nir_ssa_def abs_x = nir_fabs(b, x); nir_ssa_def zero = nir_imm_floatN_t(b, 0, x->bit_size); nir_ssa_def sign_mantissa_mask, exponent_value; - nir_ssa_def is_not_zero = nir_fneu(b, abs_x, zero); switch (x->bit_size) { case 16: @@ -89,18 +88,31 @@ 32 bits using nir_unpack_64_2x32_split_y. / nir_ssa_def upper_x = nir_unpack_64_2x32_split_y(b, x); - nir_ssa_def zero32 = nir_imm_int(b, 0); + / If x is ±0, ±Inf, or NaN, return x unmodified. / nir_ssa_def new_upper = - nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask), - nir_bcsel(b, is_not_zero, exponent_value, zero32)); + nir_bcsel(b, + nir_iand(b, + nir_flt(b, zero, abs_x), + nir_fisfinite(b, x)), + nir_ior(b, + nir_iand(b, upper_x, sign_mantissa_mask), + exponent_value), + upper_x); nir_ssa_def lower_x = nir_unpack_64_2x32_split_x(b, x); return nir_pack_64_2x32_split(b, lower_x, new_upper); } else { - return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), - nir_bcsel(b, is_not_zero, exponent_value, zero)); + / If x is ±0, ±Inf, or NaN, return x unmodified. */ + return nir_bcsel(b, + nir_iand(b, + nir_flt(b, zero, abs_x), + nir_fisfinite(b, x)), + nir_ior(b, + nir_iand(b, x, sign_mantissa_mask), + exponent_value), + x); } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_io.c ^
@@ -306,6 +306,9 @@ nir_intrinsic_set_range(load, state->type_size(var->type, var->data.bindless)); + if (nir_intrinsic_has_access(load)) + nir_intrinsic_set_access(load, var->data.access); + nir_intrinsic_set_dest_type(load, dest_type); if (load->intrinsic != nir_intrinsic_load_uniform) { @@ -412,6 +415,9 @@ nir_intrinsic_set_write_mask(store, write_mask); + if (nir_intrinsic_has_access(store)) + nir_intrinsic_set_access(store, var->data.access); + if (array_index) store->src[1] = nir_src_for_ssa(array_index);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_memcpy.c ^
@@ -111,11 +111,14 @@ uint64_t size = nir_src_as_uint(cpy->src[2]); uint64_t offset = 0; while (offset < size) { - uint64_t remaining = offset - size; - /* For our chunk size, we choose the largest power-of-two that - * divides size with a maximum of 16B (a vec4). + uint64_t remaining = size - offset; + /* Find the largest chunk size power-of-two (MSB in remaining) + * and limit our chunk to 16B (a vec4). It's important to do as + * many 16B chunks as possible first so that the index + * computation is correct for + * memcpy_(load\|store)_deref_elem_imm. / - unsigned copy_size = 1u << MIN2(ffsll(remaining) - 1, 4); + unsigned copy_size = 1u << MIN2(util_last_bit64(remaining) - 1, 4); const struct glsl_type copy_type = copy_type_for_byte_size(copy_size);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opcodes.py ^
@@ -830,10 +830,10 @@ # These comparisons for integer-less hardware return 1.0 and 0.0 for true # and false respectively -binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than +binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal -binop("seq", tfloat32, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal -binop("sne", tfloat32, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal +binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal +binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal # SPIRV shifts are undefined for shift-operands >= bitsize, # but SM5 shifts are defined to use only the least significant bits.
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_algebraic.py ^
@@ -445,8 +445,6 @@ # (a + #b) * #c => (a * #c) + (#b * #c) (('imul', ('iadd(is_used_once)', a, '#b'), '#c'), ('iadd', ('imul', a, c), ('imul', b, c))), - (('~fmul', ('fadd(is_used_once)', a, '#b'), '#c'), ('fadd', ('fmul', a, c), ('fmul', b, c)), - '!options->avoid_ternary_with_two_constants'), # ((a + #b) + c) * #d => ((a + c) * #d) + (#b * #d) (('imul', ('iadd(is_used_once)', ('iadd(is_used_once)', a, '#b'), c), '#d'),
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_copy_propagate.c ^
@@ -162,7 +162,7 @@ return progress; } -static bool +bool nir_copy_prop_impl(nir_function_impl *impl) { bool progress = false;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_dce.c ^
@@ -166,18 +166,21 @@ case nir_cf_node_loop: { nir_loop loop = nir_cf_node_as_loop(cf_node); + struct loop_state inner_state; + inner_state.preheader = nir_cf_node_as_block(nir_cf_node_prev(cf_node)); + inner_state.header_phis_changed = false; + / Fast path if the loop has no continues: we can remove instructions * as we mark the others live. / - if (nir_loop_first_block(loop)->predecessors->entries == 1) { + struct set predecessors = nir_loop_first_block(loop)->predecessors; + if (predecessors->entries == 1 && + _mesa_set_next_entry(predecessors, NULL)->key == inner_state.preheader) { progress \|= dce_cf_list(&loop->body, defs_live, parent_loop); break; } /* Mark instructions as live until there is no more progress. / - struct loop_state inner_state; - inner_state.preheader = nir_cf_node_as_block(nir_cf_node_prev(cf_node)); - inner_state.header_phis_changed = false; do { / dce_cf_list() resets inner_state.header_phis_changed itself, so * it doesn't have to be done here.
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_range_analysis.c ^
@@ -1465,7 +1465,6 @@ case nir_op_b32csel: case nir_op_ubfe: case nir_op_bfm: - case nir_op_f2u32: case nir_op_fmul: case nir_op_extract_u8: case nir_op_extract_i8: @@ -1476,6 +1475,7 @@ case nir_op_u2u8: case nir_op_u2u16: case nir_op_u2u32: + case nir_op_f2u32: if (nir_ssa_scalar_chase_alu_src(scalar, 0).def->bit_size > 32) { /* If src is >32 bits, return max */ return max;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/spirv/vtn_cfg.c ^
@@ -1387,6 +1387,8 @@ vtn_foreach_instruction(b, func->start_block->label, func->end, vtn_handle_phi_second_pass); + if (func->nir_func->impl->structured) + nir_copy_prop_impl(impl); nir_rematerialize_derefs_in_use_blocks_impl(impl); /*
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/spirv/vtn_glsl450.c ^
@@ -332,9 +332,22 @@ break; case GLSLstd450Modf: { + nir_ssa_def inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size); + nir_ssa_def sign_bit = + nir_imm_intN_t(&b->nb, (uint64_t)1 << (src[0]->bit_size - 1), + src[0]->bit_size); nir_ssa_def sign = nir_fsign(nb, src[0]); nir_ssa_def abs = nir_fabs(nb, src[0]); - dest->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + + /* NaN input should produce a NaN results, and ±Inf input should provide + * ±0 result. The fmul(sign(x), ffract(x)) calculation will already + * produce the expected NaN. To get ±0, directly compare for equality + * with Inf instead of using fisfinite (which is false for NaN). + / + dest->def = nir_bcsel(nb, + nir_ieq(nb, abs, inf), + nir_iand(nb, src[0], sign_bit), + nir_fmul(nb, sign, nir_ffract(nb, abs))); struct vtn_pointer i_ptr = vtn_value(b, w[6], vtn_value_type_pointer)->pointer; struct vtn_ssa_value whole = vtn_create_ssa_value(b, i_ptr->type->type); @@ -344,17 +357,45 @@ } case GLSLstd450ModfStruct: { + nir_ssa_def inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size); + nir_ssa_def sign_bit = + nir_imm_intN_t(&b->nb, (uint64_t)1 << (src[0]->bit_size - 1), + src[0]->bit_size); nir_ssa_def sign = nir_fsign(nb, src[0]); nir_ssa_def abs = nir_fabs(nb, src[0]); vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); - dest->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + + / See GLSLstd450Modf for explanation of the Inf and NaN handling. / + dest->elems[0]->def = nir_bcsel(nb, + nir_ieq(nb, abs, inf), + nir_iand(nb, src[0], sign_bit), + nir_fmul(nb, sign, nir_ffract(nb, abs))); dest->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); break; } - case GLSLstd450Step: - dest->def = nir_sge(nb, src[1], src[0]); + case GLSLstd450Step: { + / The SPIR-V Extended Instructions for GLSL spec says: + * + * Result is 0.0 if x < edge; otherwise result is 1.0. + * + * Here src[1] is x, and src[0] is edge. The direct implementation is + * + * bcsel(src[1] < src[0], 0.0, 1.0) + * + * This is effectively b2f(!(src1 < src0)). Previously this was + * implemented using sge(src1, src0), but that produces incorrect + * results for NaN. Instead, we use the identity b2f(!x) = 1 - b2f(x). + / + const bool exact = nb->exact; + nb->exact = true; + + nir_ssa_def cmp = nir_slt(nb, src[1], src[0]); + + nb->exact = exact; + dest->def = nir_fsub(nb, nir_imm_floatN_t(nb, 1.0f, cmp->bit_size), cmp); break; + } case GLSLstd450Length: dest->def = nir_fast_length(nb, src[0]); @@ -479,11 +520,35 @@ nir_ssa_def x = nir_fclamp(nb, src[0], nir_imm_floatN_t(nb, -clamped_x, bit_size), nir_imm_floatN_t(nb, clamped_x, bit_size)); - dest->def = - nir_fdiv(nb, nir_fsub(nb, nir_fexp(nb, x), - nir_fexp(nb, nir_fneg(nb, x))), - nir_fadd(nb, nir_fexp(nb, x), - nir_fexp(nb, nir_fneg(nb, x)))); + + / The clamping will filter out NaN values causing an incorrect result. + * The comparison is carefully structured to get NaN result for NaN and + * get -0 for -0. + * + * result = abs(s) > 0.0 ? ... : s; + / + const bool exact = nb->exact; + + nb->exact = true; + nir_ssa_def is_regular = nir_flt(nb, + nir_imm_floatN_t(nb, 0, bit_size), + nir_fabs(nb, src[0])); + + /* The extra 1.0s ensures that subnormal inputs are flushed to zero + when that is selected by the shader. + / + nir_ssa_def flushed = nir_fmul(nb, + src[0], + nir_imm_floatN_t(nb, 1.0, bit_size)); + nb->exact = exact; + + dest->def = nir_bcsel(nb, + is_regular, + nir_fdiv(nb, nir_fsub(nb, nir_fexp(nb, x), + nir_fexp(nb, nir_fneg(nb, x))), + nir_fadd(nb, nir_fexp(nb, x), + nir_fexp(nb, nir_fneg(nb, x)))), + flushed); break; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/egl/main/egldefines.h ^
@@ -46,7 +46,11 @@ #define _EGL_MAX_PBUFFER_WIDTH 4096 #define _EGL_MAX_PBUFFER_HEIGHT 4096 +#ifdef AMBER +#define _EGL_VENDOR_STRING "Mesa Project (Amber)" +#else #define _EGL_VENDOR_STRING "Mesa Project" +#endif #ifdef __cplusplus }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/egl/meson.build ^
@@ -165,7 +165,7 @@ files_egl += [g_egldispatchstubs_h, g_egldispatchstubs_c] files_egl += files('main/eglglvnd.c', 'main/egldispatchstubs.c') glvnd_config = configuration_data() - glvnd_config.set('glvnd_vendor_name', get_option('glvnd-vendor-name')) + glvnd_config.set('glvnd_vendor_name', glvnd_vendor_name) configure_file( configuration: glvnd_config, input : 'main/50_mesa.json',
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/etnaviv/drm/etnaviv_bo.c ^
@@ -86,7 +86,11 @@ bo = etna_bo_ref(entry->data); /* don't break the bucket if this bo was found in one */ - list_delinit(&bo->list); + if (list_is_linked(&bo->list)) { + VG_BO_OBTAIN(bo); + etna_device_ref(bo->dev); + list_delinit(&bo->list); + } } return bo;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c ^
@@ -56,13 +56,13 @@ void buffer; / - * Increase the command buffer size by 1 kiB. Here we pick 1 kiB + * Increase the command buffer size by 4 kiB. Here we pick 4 kiB * increment to prevent it from growing too much too quickly. / size = ALIGN(stream->size + n, 1024); / Command buffer is too big for older kernel versions / - if (size >= 32768) + if (size > 0x4000) goto error; buffer = realloc(stream->buffer, size 4); @@ -75,7 +75,7 @@ return; error: - WARN_MSG("command buffer too long, forcing flush."); + DEBUG_MSG("command buffer too long, forcing flush."); etna_cmd_stream_force_flush(stream); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/afuc/meson.build ^
@@ -18,6 +18,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +if with_tests + diff = find_program('diff') +endif + afuc_parser = custom_target( 'parser.[ch]', input: 'parser.y', @@ -55,50 +59,7 @@ build_by_default : with_tools.contains('freedreno'), install: install_fd_decode_tools, ) - -disasm = executable( - 'afuc-disasm', - [ - 'disasm.c', - 'emu.c', - 'emu.h', - 'emu-ds.c', - 'emu-regs.c', - 'emu-ui.c', - 'util.c', - 'util.h', - ], - include_directories: [ - inc_freedreno, - inc_freedreno_rnn, - inc_include, - inc_src, - inc_util, - ], - link_with: [ - libfreedreno_rnn, - ], - dependencies: [ - ], - build_by_default : with_tools.contains('freedreno'), - install: install_fd_decode_tools, -) - if with_tests - diff = find_program('diff') - - disasm_fw = custom_target('afuc_test.asm', - output: 'afuc_test.asm', - command: [disasm, '-u', files('../.gitlab-ci/reference/afuc_test.fw'), '-g', '630'], - capture: true - ) - test('afuc-disasm', - diff, - args: ['-u', files('../.gitlab-ci/reference/afuc_test.asm'), disasm_fw], - suite: 'freedreno', - workdir: meson.source_root() - ) - asm_fw = custom_target('afuc_test.fw', output: 'afuc_test.fw', command: [asm, '-g', '6', files('../.gitlab-ci/traces/afuc_test.asm'), '@OUTPUT@'], @@ -110,3 +71,48 @@ workdir: meson.source_root() ) endif + +# Disasm requires mmaping >4GB +if cc.sizeof('size_t') > 4 + disasm = executable( + 'afuc-disasm', + [ + 'disasm.c', + 'emu.c', + 'emu.h', + 'emu-ds.c', + 'emu-regs.c', + 'emu-ui.c', + 'util.c', + 'util.h', + ], + include_directories: [ + inc_freedreno, + inc_freedreno_rnn, + inc_include, + inc_src, + inc_util, + ], + link_with: [ + libfreedreno_rnn, + ], + dependencies: [ + ], + build_by_default : with_tools.contains('freedreno'), + install: install_fd_decode_tools, + ) + + if with_tests + disasm_fw = custom_target('afuc_test.asm', + output: 'afuc_test.asm', + command: [disasm, '-u', files('../.gitlab-ci/reference/afuc_test.fw'), '-g', '630'], + capture: true + ) + test('afuc-disasm', + diff, + args: ['-u', files('../.gitlab-ci/reference/afuc_test.asm'), disasm_fw], + suite: 'freedreno', + workdir: meson.source_root() + ) + endif +endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a630-fails.txt ^
@@ -262,8 +262,6 @@ spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8I- swizzled- border color only,Fail spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8UI- swizzled- border color only,Fail -spec@arb_texture_view@rendering-layers-image,Fail -spec@arb_texture_view@rendering-layers-image@layers rendering of image1DArray,Fail spec@arb_timer_query@timestamp-get,Fail spec@arb_transform_feedback3@arb_transform_feedback3-ext_interleaved_two_bufs_vs,Fail spec@arb_transform_feedback3@gl_skipcomponents1-1,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ci/traces-freedreno.yml ^
@@ -45,9 +45,9 @@ # checksum: 4b707f385256b380c936186db8c251cb # 1 minute - device: freedreno-a530 - checksum: 130dbeac42683b46fed4b268c5aad984 + checksum: a71d62bb2c0fabeca41468628777b441 - device: freedreno-a630 - checksum: 139861e52f9425b4adb7c0b90b885f91 + checksum: 339dce29ae08569652438116829510c7 - path: xonotic/xonotic-keybench-high.trace expectations: # Skipped since it's long on a530. @@ -327,9 +327,9 @@ #- device: freedreno-a306 # checksum: 0c57ccc3989b75a940b28ea1cc09cb0d - device: freedreno-a530 - checksum: 4715d72a7958f2fd5a387c16b3a01579 + checksum: bc19f0f58935fdb348f401396e6845e1 - device: freedreno-a630 - checksum: 1e397c5c34c9c50350a8db1a060a6bbb + checksum: f546f840e916ab0f11f8df0e4eee584d - path: glmark2/shading:shading=blinn-phong-inf.trace expectations: - device: freedreno-a306 @@ -422,7 +422,7 @@ - path: gputest/gimark.trace expectations: - device: freedreno-a630 - checksum: dd8fb768033d09f6edc98b4cfff02c6f + checksum: e58167bd8eeb8952facbc00ff0449135 - path: gputest/pixmark-julia-fp32.trace expectations: - device: freedreno-a630 @@ -452,11 +452,11 @@ - path: gputest/plot3d.trace expectations: - device: freedreno-a306 - checksum: 302943895dbdd7730958fb0175f23b7f + checksum: f6ecd9b8afc692b0cdb459b9b30db8d4 - device: freedreno-a530 - checksum: 755aa5b521237ddf9fea3181d2ba2b75 + checksum: 4faafe5fab0d8ec6d7b549c94f663c92 - device: freedreno-a630 - checksum: 302aec1ced68e22182460b617b0f2aef + checksum: 0a6a16c394a413f02ec2ebcc3251e366 # Note: Requires GL4 for tess. - path: gputest/tessmark.trace expectations: @@ -473,9 +473,9 @@ - path: humus/AmbientAperture.trace expectations: - device: freedreno-a306 - checksum: 3d9243cbd0659cb58b16cade2be3f2c2 + checksum: 8d4c52f0af9c09710d358f24c73fae3c - device: freedreno-a530 - checksum: c55c1ba5683306980956b5f89563f343 + checksum: aab5c853e383e1cda56663d65f6925ad - device: freedreno-a630 checksum: 83fd7bce0fc1e1f30bd143b7d30ca890 - path: humus/CelShading.trace @@ -536,7 +536,7 @@ expectations: # a306/a630 would need higher GL version to run - device: freedreno-a630 - checksum: e93cf9682c9ca5ed6a6effe5b7fdd386 + checksum: 0e32ca8fc815a7250f38a07faeafb21b - path: pathfinder/canvas_text_v2.trace expectations: # a306/a630 would need higher GL version to run
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3.c ^
@@ -402,9 +402,9 @@ { for (unsigned i = 0; i < block->physical_predecessors_count; i++) { if (block->physical_predecessors[i] == pred) { - if (i < block->predecessors_count - 1) { + if (i < block->physical_predecessors_count - 1) { block->physical_predecessors[i] = - block->physical_predecessors[block->predecessors_count - 1]; + block->physical_predecessors[block->physical_predecessors_count - 1]; } block->physical_predecessors_count--; @@ -490,6 +490,11 @@ new_reg = reg; } + if (instr->address) { + assert(instr->srcs_count > 0); + new_instr->address = new_instr->srcs[instr->srcs_count - 1]; + } + return new_instr; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_compiler_nir.c ^
@@ -1130,10 +1130,10 @@ struct ir3_block b = ctx->block; struct tex_src_info info = {0}; nir_intrinsic_instr bindless_tex = ir3_bindless_resource(intr->src[0]); - ctx->so->bindless_tex = true; if (bindless_tex) { /* Bindless case / + ctx->so->bindless_tex = true; info.flags \|= IR3_INSTR_B; / Gather information required to determine which encoding to @@ -1235,11 +1235,20 @@ } info.flags \|= flags; - for (unsigned i = 0; i < ncoords; i++) - coords[i] = src0[i]; - - if (ncoords == 1) - coords[ncoords++] = create_immed(b, 0); + /* hw doesn't do 1d, so we treat it as 2d with height of 1, and patch up the + * y coord. Note that the array index must come after the fake y coord. + */ + enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr); + if (dim == GLSL_SAMPLER_DIM_1D \|\| dim == GLSL_SAMPLER_DIM_BUF) { + coords[0] = src0[0]; + coords[1] = create_immed(b, 0); + for (unsigned i = 1; i < ncoords; i++) + coords[i + 1] = src0[i]; + ncoords++; + } else { + for (unsigned i = 0; i < ncoords; i++) + coords[i] = src0[i]; + } sam = emit_sam(ctx, OPC_ISAM, info, type, 0b1111, ir3_create_collect(b, coords, ncoords), NULL);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_cp.c ^
@@ -303,6 +303,22 @@ return valid_swap; } +/* Values that are uniform inside a loop can become divergent outside + * it if the loop has a divergent trip count. This means that we can't + * propagate a copy of a shared to non-shared register if it would + * make the shared reg's live range extend outside of its loop. Users + * outside the loop would see the value for the thread(s) that last + * exited the loop, rather than for their own thread. + / +static bool +is_valid_shared_copy(struct ir3_instruction dst_instr, + struct ir3_instruction src_instr, + struct ir3_register src_reg) +{ + return !(src_reg->flags & IR3_REG_SHARED) \|\| + dst_instr->block->loop_id == src_instr->block->loop_id; +} + /** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src @@ -316,22 +332,14 @@ { struct ir3_instruction src = ssa(reg); - / Values that are uniform inside a loop can become divergent outside - * it if the loop has a divergent trip count. This means that we can't - * propagate a copy of a shared to non-shared register if it would - * make the shared reg's live range extend outside of its loop. Users - * outside the loop would see the value for the thread(s) that last - * exited the loop, rather than for their own thread. - / - if ((src->dsts[0]->flags & IR3_REG_SHARED) && - src->block->loop_id != instr->block->loop_id) - return false; - if (is_eligible_mov(src, instr, true)) { / simple case, no immed/const/relativ, only mov's w/ ssa src: / struct ir3_register src_reg = src->srcs[0]; unsigned new_flags = reg->flags; + if (!is_valid_shared_copy(instr, src, src_reg)) + return false; + combine_flags(&new_flags, src); if (ir3_valid_flags(instr, n, new_flags)) { @@ -357,6 +365,9 @@ struct ir3_register *src_reg = src->srcs[0]; unsigned new_flags = reg->flags; + if (!is_valid_shared_copy(instr, src, src_reg)) + return false; + if (src_reg->flags & IR3_REG_ARRAY) return false;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_delay.c ^
@@ -98,7 +98,7 @@ / bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) != (consumer->srcs[n]->flags & IR3_REG_HALF); - unsigned penalty = mismatched_half ? 2 : 0; + unsigned penalty = mismatched_half ? 3 : 0; if ((is_mad(consumer->opc) \|\| is_madsh(consumer->opc)) && (n == 2)) { / special case, 3rd src to cat3 not required on first cycle */ return 1 + penalty;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_lower_parallelcopy.c ^
@@ -109,11 +109,18 @@ .flags = entry->flags & ~IR3_REG_HALF, }); + /* If src and dst are within the same full register, then swapping src + * with tmp above will also move dst to tmp. Account for that here. + / + unsigned dst = + (entry->src.reg & ~1u) == (entry->dst & ~1u) ? + tmp + (entry->dst & 1u) : entry->dst; + / Do the original swap with src replaced with tmp / do_swap(compiler, instr, &(struct copy_entry){ .src = {.reg = tmp + (entry->src.reg & 1)}, - .dst = entry->dst, + .dst = dst, .flags = entry->flags, }); @@ -192,9 +199,16 @@ .flags = entry->flags & ~IR3_REG_HALF, }); + / Similar to in do_swap(), account for src being swapped with tmp if + * src and dst are in the same register. + / + struct copy_src src = entry->src; + if (!src.flags && (src.reg & ~1u) == (entry->dst & ~1u)) + src.reg = tmp + (src.reg & 1u); + do_copy(compiler, instr, &(struct copy_entry){ - .src = entry->src, + .src = src, .dst = tmp + (entry->dst & 1), .flags = entry->flags, }); @@ -223,12 +237,12 @@ cov->cat1.src_type = TYPE_U32; ir3_instr_move_before(cov, instr); } else { - / shr.b dst, src, h(16) / + / shr.b dst, src, (16) / struct ir3_instruction shr = ir3_instr_create(instr->block, OPC_SHR_B, 1, 2); ir3_dst_create(shr, dst_num, entry->flags); ir3_src_create(shr, src_num, entry->flags & ~IR3_REG_HALF); - ir3_src_create(shr, 0, entry->flags \| IR3_REG_IMMED)->uim_val = 16; + ir3_src_create(shr, 0, IR3_REG_IMMED)->uim_val = 16; ir3_instr_move_before(shr, instr); } return;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_nir.c ^
@@ -210,6 +210,7 @@ progress \|= OPT(s, nir_lower_phis_to_scalar, false); progress \|= OPT(s, nir_copy_prop); + progress \|= OPT(s, nir_opt_deref); progress \|= OPT(s, nir_opt_dce); progress \|= OPT(s, nir_opt_cse); static int gcm = -1;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_parser.y ^
@@ -674,7 +674,7 @@ } buf_header_addr_reg: -\| '(' T_CONSTANT ')' { + '(' T_CONSTANT ')' { assert(($2 & 0x1) == 0); /* half-reg not allowed / unsigned reg = $2 >> 1; @@ -682,6 +682,7 @@ / reserve space in immediates for the actual value to be plugged in later: */ add_const($2, 0, 0, 0, 0); } +\| buf_header: T_A_BUF const_val { int idx = info->num_bufs++;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_ra.c ^
@@ -780,10 +780,12 @@ return false; } + unsigned conflicting_file_size = + reg_file_size(file, conflicting->interval.reg); unsigned avail_start, avail_end; bool evicted = false; BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict, - reg_file_size(file, conflicting->interval.reg)) { + conflicting_file_size) { unsigned size = avail_end - avail_start; /* non-half registers must be aligned / @@ -820,6 +822,10 @@ conflicting->physreg_end - conflicting->physreg_start) continue; + if (killed->physreg_end > conflicting_file_size \|\| + conflicting->physreg_end > reg_file_size(file, killed->interval.reg)) + continue; + / We can't swap the killed range if it partially/fully overlaps the * space we're trying to allocate or (in speculative mode) if it's * already been swapped and will overlap when we actually evict. @@ -962,9 +968,9 @@ assert(!interval->frozen); /* Killed sources don't count because they go at the end and can - * overlap the register we're trying to add. + * overlap the register we're trying to add, unless it's a source. / - if (!interval->is_killed && !is_source) { + if (!interval->is_killed \|\| is_source) { removed_size += interval->physreg_end - interval->physreg_start; if (interval->interval.reg->flags & IR3_REG_HALF) { removed_half_size += interval->physreg_end - @@ -1322,7 +1328,8 @@ struct ra_parallel_copy entry = &ctx->parallel_copies[i]; struct ir3_register reg = ir3_dst_create(pcopy, INVALID_REG, - entry->interval->interval.reg->flags & ~IR3_REG_SSA); + entry->interval->interval.reg->flags & + (IR3_REG_HALF \| IR3_REG_ARRAY)); reg->size = entry->interval->interval.reg->size; reg->wrmask = entry->interval->interval.reg->wrmask; assign_reg(pcopy, reg, ra_interval_get_num(entry->interval)); @@ -1332,7 +1339,8 @@ struct ra_parallel_copy entry = &ctx->parallel_copies[i]; struct ir3_register reg = ir3_src_create(pcopy, INVALID_REG, - entry->interval->interval.reg->flags & ~IR3_REG_SSA); + entry->interval->interval.reg->flags & + (IR3_REG_HALF \| IR3_REG_ARRAY)); reg->size = entry->interval->interval.reg->size; reg->wrmask = entry->interval->interval.reg->wrmask; assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags)); @@ -1768,8 +1776,9 @@ pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i]; } - struct ir3_register dst_reg = - ir3_dst_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA); + unsigned flags = reg->flags & (IR3_REG_HALF \| IR3_REG_ARRAY); + + struct ir3_register dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags); dst_reg->wrmask = reg->wrmask; dst_reg->size = reg->size; assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags)); @@ -1778,8 +1787,7 @@ pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i]; } - struct ir3_register src_reg = - ir3_src_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA); + struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags); src_reg->wrmask = reg->wrmask; src_reg->size = reg->size; assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_spill.c ^
@@ -1783,15 +1783,31 @@ return true; } +static struct ir3_register * +simplify_phi_def(struct ir3_register def) +{ + if (def->instr->opc == OPC_META_PHI) { + struct ir3_instruction phi = def->instr; + + /* Note: this function is always called at least once after visiting the + * phi, so either there has been a simplified phi in the meantime, in + * which case we will set progress=true and visit the definition again, or + * phi->data already has the most up-to-date value. Therefore we don't + * have to recursively check phi->data. + / + if (phi->data) + return phi->data; + } + + return def; +} + static void simplify_phi_srcs(struct ir3_instruction instr) { foreach_src (src, instr) { - if (src->def && src->def->instr->opc == OPC_META_PHI) { - struct ir3_instruction phi = src->def->instr; - if (phi->data) - src->def = phi->data; - } + if (src->def) + src->def = simplify_phi_def(src->def); } } @@ -1821,6 +1837,10 @@ simplify_phi_srcs(instr); } + / Visit phi nodes in the sucessors to make sure that phi sources are + * always visited at least once after visiting the definition they + * point to. See note in simplify_phi_def() for why this is necessary. + / for (unsigned i = 0; i < 2; i++) { struct ir3_block succ = block->successors[i]; if (!succ) @@ -1828,11 +1848,13 @@ foreach_instr (instr, &succ->instr_list) { if (instr->opc != OPC_META_PHI) break; - if (instr->flags & IR3_INSTR_UNUSED) - continue; - - simplify_phi_srcs(instr); - progress \|= simplify_phi_node(instr); + if (instr->flags & IR3_INSTR_UNUSED) { + if (instr->data) + instr->data = simplify_phi_def(instr->data); + } else { + simplify_phi_srcs(instr); + progress \|= simplify_phi_node(instr); + } } } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/meson.build ^
@@ -27,7 +27,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) ir3_nir_imul_c = custom_target( @@ -39,7 +39,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) ir3_parser = custom_target(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_cmd_buffer.c ^
@@ -293,7 +293,7 @@ * setting the SINGLE_PRIM_MODE field to the same value that the blob does * for advanced_blend in sysmem mode if a feedback loop is detected. / - if (subpass->feedback) { + if (subpass->feedback_loop_color \|\| subpass->feedback_loop_ds) { tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) \| @@ -477,7 +477,6 @@ tu6_apply_depth_bounds_workaround(struct tu_device device, uint32_t *rb_depth_cntl) { - return; if (!device->physical_device->info->a6xx.depth_bounds_require_depth_test_quirk) return; @@ -3832,7 +3831,8 @@ bool depth_write = tu6_writes_depth(cmd, depth_test_enable); bool stencil_write = tu6_writes_stencil(cmd); - if (cmd->state.pipeline->lrz.fs_has_kill && + if ((cmd->state.pipeline->lrz.fs_has_kill \|\| + cmd->state.pipeline->subpass_feedback_loop_ds) && (depth_write \|\| stencil_write)) { zmode = cmd->state.lrz.valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_formats.c ^
@@ -484,7 +484,7 @@ const struct tu_physical_device physical_device, const VkPhysicalDeviceImageFormatInfo2 pImageFormatInfo, VkExternalMemoryHandleTypeFlagBits handleType, - VkExternalMemoryProperties external_properties) + VkExternalImageFormatProperties external_properties) { VkExternalMemoryFeatureFlagBits flags = 0; VkExternalMemoryHandleTypeFlags export_flags = 0; @@ -526,11 +526,14 @@ handleType); } - *external_properties = (VkExternalMemoryProperties) { - .externalMemoryFeatures = flags, - .exportFromImportedHandleTypes = export_flags, - .compatibleHandleTypes = compat_flags, - }; + if (external_properties) { + external_properties->externalMemoryProperties = + (VkExternalMemoryProperties) { + .externalMemoryFeatures = flags, + .exportFromImportedHandleTypes = export_flags, + .compatibleHandleTypes = compat_flags, + }; + } return VK_SUCCESS; } @@ -597,7 +600,7 @@ if (external_info && external_info->handleType != 0) { result = tu_get_external_image_format_properties( physical_device, base_info, external_info->handleType, - &external_props->externalMemoryProperties); + external_props); if (result != VK_SUCCESS) goto fail; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_pass.c ^
@@ -448,7 +448,7 @@ continue; for (unsigned k = 0; k < subpass->input_count; k++) { if (subpass->input_attachments[k].attachment == a) { - subpass->feedback = true; + subpass->feedback_loop_color = true; break; } } @@ -458,7 +458,7 @@ for (unsigned k = 0; k < subpass->input_count; k++) { if (subpass->input_attachments[k].attachment == subpass->depth_stencil_attachment.attachment) { - subpass->feedback = true; + subpass->feedback_loop_ds = true; break; } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_pipeline.c ^
@@ -273,6 +273,8 @@ VkFormat depth_attachment_format; uint32_t render_components; uint32_t multiview_mask; + + bool subpass_feedback_loop_ds; }; static bool @@ -3077,6 +3079,7 @@ return VK_ERROR_OUT_OF_HOST_MEMORY; (pipeline)->layout = builder->layout; + (pipeline)->subpass_feedback_loop_ds = builder->subpass_feedback_loop_ds; (pipeline)->executables_mem_ctx = ralloc_context(NULL); util_dynarray_init(&(pipeline)->executables, (pipeline)->executables_mem_ctx); @@ -3190,6 +3193,8 @@ const struct tu_subpass subpass = &pass->subpasses[create_info->subpass]; + builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds; + builder->multiview_mask = subpass->multiview_mask; builder->rasterizer_discard =
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_private.h ^
@@ -1262,6 +1262,8 @@ struct tu_lrz_pipeline lrz; + bool subpass_feedback_loop_ds; + void executables_mem_ctx; / tu_pipeline_executable / struct util_dynarray executables; @@ -1610,8 +1612,8 @@ uint32_t resolve_count; bool resolve_depth_stencil; - / True if there is any feedback loop at all. / - bool feedback; + bool feedback_loop_color; + bool feedback_loop_ds; / True if we must invalidate UCHE thanks to a feedback loop. */ bool feedback_invalidate;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_query.c ^
@@ -117,7 +117,7 @@ query_iova(struct occlusion_query_slot, pool, query, field) #define pipeline_stat_query_iova(pool, query, field) \ - pool->bo.iova + pool->stride * query + \ + pool->bo.iova + pool->stride * (query) + \ offsetof(struct pipeline_stat_query_slot, field) #define primitive_query_iova(pool, query, field, i) \ @@ -125,9 +125,9 @@ offsetof(struct primitive_slot_value, values[i]) #define perf_query_iova(pool, query, field, i) \ - pool->bo.iova + pool->stride * query + \ + pool->bo.iova + pool->stride * (query) + \ sizeof(struct query_slot) + \ - sizeof(struct perfcntr_query_slot) * i + \ + sizeof(struct perfcntr_query_slot) * (i) + \ offsetof(struct perfcntr_query_slot, field) #define query_available_iova(pool, query) \ @@ -135,11 +135,11 @@ #define query_result_iova(pool, query, type, i) \ pool->bo.iova + pool->stride * (query) + \ - sizeof(struct query_slot) + sizeof(type) * i + sizeof(struct query_slot) + sizeof(type) * (i) #define query_result_addr(pool, query, type, i) \ - pool->bo.map + pool->stride * query + \ - sizeof(struct query_slot) + sizeof(type) * i + pool->bo.map + pool->stride * (query) + \ + sizeof(struct query_slot) + sizeof(type) * (i) #define query_is_available(slot) slot->available
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/draw/draw_llvm.c ^
@@ -3515,8 +3515,9 @@ LLVMValueRef counter = LLVMGetParam(variant_coro, 6); LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length)); for (i = 0; i < vector_length; i++) { - LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), lp_build_const_int32(gallivm, i), ""); - invocvec = LLVMBuildInsertElement(builder, invocvec, idx, idx, ""); + LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i); + LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, ""); + invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, ""); } system_values.invocation_id = invocvec;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c ^
@@ -316,7 +316,7 @@ draw->rasterizer->clip_halfz, (draw->vs.edgeflag_output ? TRUE : FALSE) ); - draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL ); + draw_pt_so_emit_prepare( fpme->so_emit, (gs == NULL && tes == NULL)); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, out_prim,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/driver_trace/tr_context.c ^
@@ -2191,7 +2191,7 @@ if (!trace_enabled()) goto error1; - tr_ctx = ralloc(NULL, struct trace_context); + tr_ctx = rzalloc(NULL, struct trace_context); if (!tr_ctx) goto error1; @@ -2271,6 +2271,8 @@ TR_CTX_INIT(create_stream_output_target); TR_CTX_INIT(stream_output_target_destroy); TR_CTX_INIT(set_stream_output_targets); + /* this is lavapipe-only and can't be traced */ + tr_ctx->base.stream_output_target_offset = pipe->stream_output_target_offset; TR_CTX_INIT(resource_copy_region); TR_CTX_INIT(blit); TR_CTX_INIT(flush_resource);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h ^
@@ -132,8 +132,10 @@ return 1; case PIPE_SHADER_CAP_FP16: case PIPE_SHADER_CAP_FP16_DERIVATIVES: - case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: return lp_has_fp16(); + //enabling this breaks GTF-GL46.gtf21.GL2Tests.glGetUniform.glGetUniform + case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: + return 0; case PIPE_SHADER_CAP_INT64_ATOMICS: return 0; case PIPE_SHADER_CAP_INT16:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c ^
@@ -732,8 +732,7 @@ break; } case nir_op_fisfinite32: - result = lp_build_isfinite(get_flt_bld(bld_base, src_bit_size[0]), src[0]); - break; + unreachable("Should have been lowered in nir_opt_algebraic_late."); case nir_op_flog2: result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; @@ -2458,7 +2457,6 @@ NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options); } while (progress); - nir_lower_bool_to_int32(nir); do { progress = false; @@ -2469,4 +2467,9 @@ NIR_PASS_V(nir, nir_opt_cse); } } while (progress); + + if (nir_lower_bool_to_int32(nir)) { + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c ^
@@ -1438,6 +1438,7 @@ { struct lp_build_nir_soa_context bld = (struct lp_build_nir_soa_context )bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = bld_base->base.gallivm->builder; params->type = bld_base->base.type; params->context_ptr = bld->context_ptr; @@ -1491,10 +1492,25 @@ return; } - if (params->texture_index_offset) - params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder, - params->texture_index_offset, - lp_build_const_int32(bld_base->base.gallivm, 0), ""); + if (params->texture_index_offset) { + struct lp_build_loop_state loop_state; + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, ""); + LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, ""); + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); + + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, if_cond); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset, + loop_state.counter, ""); + LLVMBuildStore(builder, value_ptr, res_store); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), + NULL, LLVMIntUGE); + LLVMValueRef idx_val = LLVMBuildLoad(builder, res_store, ""); + params->texture_index_offset = idx_val; + } params->type = bld_base->base.type; bld->sampler->emit_tex_sample(bld->sampler,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c ^
@@ -1726,7 +1726,10 @@ /* ima = +0.5 / abs(coord); / LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5); LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); - LLVMValueRef ima = lp_build_div(coord_bld, posHalf, absCoord); + / avoid div by zero */ + LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero); + LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord); + LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero); return ima; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c ^
@@ -4660,7 +4660,7 @@ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height); out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); } - if (dims >= 3) { + if (dims >= 3 \|\| layer_coord) { out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c ^
@@ -2106,7 +2106,7 @@ switch (instr->op) { case nir_texop_tex: if (nir_tex_instr_src_size(instr, nir_tex_instr_src_index(instr, nir_tex_src_backend1)) > - instr->coord_components + instr->is_shadow) + MAX2(instr->coord_components, 2) + instr->is_shadow) tex_opcode = TGSI_OPCODE_TXP; else tex_opcode = TGSI_OPCODE_TEX;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/util/u_box.h ^
@@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "util/u_math.h" +#include "util/format/u_format.h" static inline void u_box_1d(unsigned x, unsigned w, struct pipe_box box) @@ -239,4 +240,22 @@ dst->depth = MAX2(src->depth >> l, 1); } +/ Converts a box specified in pixels to an equivalent box specified + * in blocks, where the boxes represent a region-of-interest of an image with + * the given format. This is trivial (a copy) for uncompressed formats. + / +static inline void +u_box_pixels_to_blocks(struct pipe_box blocks, + const struct pipe_box *pixels, enum pipe_format format) +{ + u_box_3d( + pixels->x / util_format_get_blockwidth(format), + pixels->y / util_format_get_blockheight(format), + pixels->z, + DIV_ROUND_UP(pixels->width, util_format_get_blockwidth(format)), + DIV_ROUND_UP(pixels->height, util_format_get_blockheight(format)), + pixels->depth, + blocks); +} + #endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/util/u_threaded_context.c ^
@@ -2148,11 +2148,9 @@ * only get resource_copy_region. / if (usage & PIPE_MAP_DISCARD_RANGE) { - struct threaded_transfer ttrans = slab_alloc(&tc->pool_transfers); + struct threaded_transfer ttrans = slab_zalloc(&tc->pool_transfers); uint8_t map; - ttrans->staging = NULL; - u_upload_alloc(tc->base.stream_uploader, 0, box->width + (box->x % tc->map_buffer_alignment), tc->map_buffer_alignment, &ttrans->b.offset,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/util/u_vbuf.c ^
@@ -491,9 +491,10 @@ * themselves, meaning that if stride < element_size, the mapped size will * be too small and conversion will overrun the map buffer * - * instead, add the size of the largest possible attribute to ensure the map is large enough + * instead, add the size of the largest possible attribute to the final attribute's offset + * in order to ensure the map is large enough / - unsigned last_offset = offset + size - vb->stride; + unsigned last_offset = size - vb->stride; size = MAX2(size, last_offset + sizeof(double)4); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_batch.c ^
@@ -264,21 +264,30 @@ crocus_batch_reset(batch); } -static struct drm_i915_gem_exec_object2 * -find_validation_entry(struct crocus_batch batch, struct crocus_bo bo) +static int +find_exec_index(struct crocus_batch batch, struct crocus_bo bo) { unsigned index = READ_ONCE(bo->index); if (index < batch->exec_count && batch->exec_bos[index] == bo) - return &batch->validation_list[index]; + return index; /* May have been shared between multiple active batches / for (index = 0; index < batch->exec_count; index++) { if (batch->exec_bos[index] == bo) - return &batch->validation_list[index]; + return index; } + return -1; +} + +static struct drm_i915_gem_exec_object2 +find_validation_entry(struct crocus_batch batch, struct crocus_bo bo) +{ + int index = find_exec_index(batch, bo); - return NULL; + if (index == -1) + return NULL; + return &batch->validation_list[index]; } static void @@ -410,7 +419,7 @@ (struct drm_i915_gem_relocation_entry) { .offset = offset, .delta = target_offset, - .target_handle = target->index, + .target_handle = find_exec_index(batch, target), .presumed_offset = entry->offset, };
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_blit.c ^
@@ -433,6 +433,7 @@ info->src.level, &info->src.box, NULL); + pipe_surface_release(ctx, &dst_view); } return; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_bufmgr.c ^
@@ -430,6 +430,9 @@ bo->index = -1; bo->kflags = 0; + if (flags & BO_ALLOC_SCANOUT) + bo->scanout = 1; + if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) { struct drm_i915_gem_caching arg = { .handle = bo->gem_handle, @@ -610,6 +613,16 @@ entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); _mesa_hash_table_remove(bufmgr->handle_table, entry); + + list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) { + struct drm_gem_close close = { .handle = export->gem_handle }; + intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close); + + list_del(&export->link); + free(export); + } + } else { + assert(list_is_empty(&bo->exports)); } /* Close this object / @@ -1001,6 +1014,9 @@ static bool can_map_cpu(struct crocus_bo bo, unsigned flags) { + if (bo->scanout) + return false; + if (bo->cache_coherent) return true;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_bufmgr.h ^
@@ -141,12 +141,18 @@ / bool userptr; + /* + * Boolean of if this is used for scanout. + / + bool scanout; + /* Pre-computed hash using _mesa_hash_pointer for cache tracking sets / uint32_t hash; }; #define BO_ALLOC_ZEROED (1 << 0) #define BO_ALLOC_COHERENT (1 << 1) +#define BO_ALLOC_SCANOUT (1 << 2) /* * Allocate a buffer object.
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_resource.c ^
@@ -688,9 +688,10 @@ devinfo->ver < 6) return NULL; - UNUSED const bool isl_surf_created_successfully = + const bool isl_surf_created_successfully = crocus_resource_configure_main(screen, res, templ, modifier, 0); - assert(isl_surf_created_successfully); + if (!isl_surf_created_successfully) + return NULL; const char name = "miptree"; @@ -698,6 +699,10 @@ if (templ->usage == PIPE_USAGE_STAGING) flags \|= BO_ALLOC_COHERENT; + / Scanout buffers need to be WC. */ + if (templ->bind & PIPE_BIND_SCANOUT) + flags \|= BO_ALLOC_SCANOUT; + uint64_t aux_size = 0; uint32_t aux_preferred_alloc_flags;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_state.c ^
@@ -1983,9 +1983,9 @@ * "Grid Intersection Quantization" rules as specified by the * "Zero-Width (Cosmetic) Line Rasterization" section of the docs. / - line_width = 0.0f; + / hack around this for gfx4/5 fps counters in hud. / + line_width = GFX_VER < 6 ? 1.5f : 0.0f; } - return line_width; } @@ -4750,6 +4750,22 @@ key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; } +static inline GLenum +compare_func_to_gl(enum pipe_compare_func pipe_func) +{ + static const unsigned map[] = { + [PIPE_FUNC_NEVER] = GL_NEVER, + [PIPE_FUNC_LESS] = GL_LESS, + [PIPE_FUNC_EQUAL] = GL_EQUAL, + [PIPE_FUNC_LEQUAL] = GL_LEQUAL, + [PIPE_FUNC_GREATER] = GL_GREATER, + [PIPE_FUNC_NOTEQUAL] = GL_NOTEQUAL, + [PIPE_FUNC_GEQUAL] = GL_GEQUAL, + [PIPE_FUNC_ALWAYS] = GL_ALWAYS, + }; + return map[pipe_func]; +} + /* * Populate FS program key fields based on the current state. */ @@ -4836,7 +4852,7 @@ #if GFX_VER <= 5 if (fb->nr_cbufs > 1 && zsa->cso.alpha_enabled) { - key->alpha_test_func = zsa->cso.alpha_func; + key->alpha_test_func = compare_func_to_gl(zsa->cso.alpha_func); key->alpha_test_ref = zsa->cso.alpha_ref_value; } #endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_context.c ^
@@ -407,6 +407,7 @@ { struct etna_cmd_stream stream = ctx->stream; struct etna_screen screen = ctx->screen; + uint32_t dummy_attribs[VIVS_NFE_GENERIC_ATTRIB__LEN] = { 0 }; etna_set_state(stream, VIVS_GL_API_MODE, VIVS_GL_API_MODE_OPENGL); etna_set_state(stream, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x00000001); @@ -467,6 +468,20 @@ VIVS_VS_ICACHE_INVALIDATE_UNK4); } + /* It seems that some GPUs (at least some GC400 have shown this behavior) + * come out of reset with random vertex attributes enabled and also don't + * disable them on the write to the first config register as normal. Enabling + * all attributes seems to provide the GPU with the required edge to actually + * disable the unused attributes on the next draw. + */ + if (screen->specs.halti >= 5) { + etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0), + VIVS_NFE_GENERIC_ATTRIB__LEN, dummy_attribs); + } else { + etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), + screen->specs.halti >= 0 ? 16 : 12, dummy_attribs); + } + ctx->dirty = ~0L; ctx->dirty_sampler_views = ~0L; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c ^
@@ -454,7 +454,7 @@ struct ir3_shader_variant vs, struct ir3_shader_variant hs, struct ir3_shader_variant ds, struct ir3_shader_variant gs, struct ir3_shader_variant fs, - const struct ir3_shader_key key) in_dt + const struct ir3_cache_key key) in_dt { struct fd_context ctx = fd_context(data); struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c ^
@@ -560,7 +560,7 @@ struct ir3_shader_variant vs, struct ir3_shader_variant hs, struct ir3_shader_variant ds, struct ir3_shader_variant gs, struct ir3_shader_variant fs, - const struct ir3_shader_key key) in_dt + const struct ir3_cache_key key) in_dt { struct fd_context ctx = fd_context(data); struct fd4_program_state *state = CALLOC_STRUCT(fd4_program_state);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c ^
@@ -91,6 +91,7 @@ .vastc_srgb = fd5_ctx->vastc_srgb, .fastc_srgb = fd5_ctx->fastc_srgb, }, + .clip_plane_enable = ctx->rasterizer->clip_plane_enable, }, .rasterflat = ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c ^
@@ -658,7 +658,7 @@ OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) + if (dirty & (FD_DIRTY_PROG \| FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE)) fd5_program_emit(ctx, ring, emit); if (dirty & FD_DIRTY_RASTERIZER) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c ^
@@ -250,9 +250,12 @@ setup_stages(emit, s); bool do_streamout = (s[VS].v->shader->stream_output.num_outputs > 0); - uint8_t clip_mask = s[VS].v->clip_mask, cull_mask = s[VS].v->cull_mask; + uint8_t clip_mask = s[VS].v->clip_mask, + cull_mask = s[VS].v->cull_mask; uint8_t clip_cull_mask = clip_mask \| cull_mask; + clip_mask &= ctx->rasterizer->clip_plane_enable; + fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS; pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); @@ -711,7 +714,7 @@ struct ir3_shader_variant vs, struct ir3_shader_variant hs, struct ir3_shader_variant ds, struct ir3_shader_variant gs, struct ir3_shader_variant fs, - const struct ir3_shader_key key) in_dt + const struct ir3_cache_key key) in_dt { struct fd_context ctx = fd_context(data); struct fd5_program_state *state = CALLOC_STRUCT(fd5_program_state);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h ^
@@ -52,7 +52,7 @@ const struct ir3_shader_variant so); void fd5_program_emit(struct fd_context ctx, struct fd_ringbuffer ring, - struct fd5_emit emit); + struct fd5_emit emit) in_dt; void fd5_prog_init(struct pipe_context pctx);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_context.c ^
@@ -151,7 +151,8 @@ BIT(FD6_GROUP_ZSA)); fd_context_add_map(ctx, FD_DIRTY_ZSA \| FD_DIRTY_BLEND \| FD_DIRTY_PROG, BIT(FD6_GROUP_LRZ) \| BIT(FD6_GROUP_LRZ_BINNING)); - fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_PROG)); + fd_context_add_map(ctx, FD_DIRTY_PROG \| FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE, + BIT(FD6_GROUP_PROG)); fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER)); fd_context_add_map(ctx, FD_DIRTY_FRAMEBUFFER \| FD_DIRTY_RASTERIZER_DISCARD \|
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_draw.c ^
@@ -156,6 +156,7 @@ .sample_shading = (ctx->min_samples > 1), .msaa = (ctx->framebuffer.samples > 1), }, + .clip_plane_enable = ctx->rasterizer->clip_plane_enable, }, .rasterflat = ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, @@ -195,7 +196,7 @@ ir3_fixup_shader_state(&ctx->base, &emit.key.key); - if (!(ctx->dirty & FD_DIRTY_PROG)) { + if (!(ctx->gen_dirty & BIT(FD6_GROUP_PROG))) { emit.prog = fd6_ctx->prog; } else { fd6_ctx->prog = fd6_emit_get_prog(&emit);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_program.c ^
@@ -351,8 +351,10 @@ static void setup_stateobj(struct fd_ringbuffer ring, struct fd_context ctx, struct fd6_program_state state, - const struct ir3_shader_key key, bool binning_pass) assert_dt + const struct ir3_cache_key cache_key, + bool binning_pass) assert_dt { + const struct ir3_shader_key key = &cache_key->key; uint32_t pos_regid, psize_regid, color_regid[8], posz_regid; uint32_t clip0_regid, clip1_regid; uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; @@ -536,6 +538,8 @@ cull_mask = last_shader->cull_mask; uint8_t clip_cull_mask = clip_mask \| cull_mask; + clip_mask &= cache_key->clip_plane_enable; + /* If we have streamout, link against the real FS, rather than the * dummy FS used for binning pass state, to ensure the OUTLOC's * match. Depending on whether we end up doing sysmem or gmem, @@ -1184,7 +1188,7 @@ struct ir3_shader_variant vs, struct ir3_shader_variant hs, struct ir3_shader_variant ds, struct ir3_shader_variant gs, struct ir3_shader_variant fs, - const struct ir3_shader_key key) in_dt + const struct ir3_cache_key key) in_dt { struct fd_context ctx = fd_context(data); struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_context.h ^
@@ -172,8 +172,9 @@ * from hw perspective: / FD_DIRTY_RASTERIZER_DISCARD = BIT(24), - FD_DIRTY_BLEND_DUAL = BIT(25), -#define NUM_DIRTY_BITS 26 + FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25), + FD_DIRTY_BLEND_DUAL = BIT(26), +#define NUM_DIRTY_BITS 27 / additional flag for state requires updated resource tracking: */ FD_DIRTY_RESOURCE = BIT(31),
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_state.c ^
@@ -38,6 +38,8 @@ #include "freedreno_texture.h" #include "freedreno_util.h" +#define get_safe(ptr, field) ((ptr) ? (ptr)->field : 0) + /* All the generic state handling.. In case of CSO's that are specific * to the GPU version, when the bind and the delete are common they can * go in here. @@ -434,7 +436,8 @@ { struct fd_context ctx = fd_context(pctx); struct pipe_scissor_state old_scissor = fd_context_get_scissor(ctx); - bool discard = ctx->rasterizer && ctx->rasterizer->rasterizer_discard; + bool discard = get_safe(ctx->rasterizer, rasterizer_discard); + unsigned clip_plane_enable = get_safe(ctx->rasterizer, clip_plane_enable); ctx->rasterizer = hwcso; fd_context_dirty(ctx, FD_DIRTY_RASTERIZER); @@ -453,8 +456,11 @@ if (old_scissor != fd_context_get_scissor(ctx)) fd_context_dirty(ctx, FD_DIRTY_SCISSOR); - if (ctx->rasterizer && (discard != ctx->rasterizer->rasterizer_discard)) + if (discard != get_safe(ctx->rasterizer, rasterizer_discard)) fd_context_dirty(ctx, FD_DIRTY_RASTERIZER_DISCARD); + + if (clip_plane_enable != get_safe(ctx->rasterizer, clip_plane_enable)) + fd_context_dirty(ctx, FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE); } static void
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_util.h ^
@@ -106,12 +106,14 @@ #include <unistd.h> #include <sys/types.h> +#include <sys/syscall.h> #define DBG(fmt, ...) \ do { \ if (FD_DBG(MSGS)) \ - mesa_logi("%5d: %s:%d: " fmt, gettid(), __FUNCTION__, __LINE__, \ - ##__VA_ARGS__); \ + mesa_logi("%5d: %s:%d: " fmt, ((pid_t)syscall(SYS_gettid)), \ + __FUNCTION__, __LINE__, \ + ##__VA_ARGS__); \ } while (0) #define perf_debug_message(debug, type, ...) \
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c ^
@@ -157,7 +157,7 @@ cache->data, bs, variants[MESA_SHADER_VERTEX], variants[MESA_SHADER_TESS_CTRL], variants[MESA_SHADER_TESS_EVAL], variants[MESA_SHADER_GEOMETRY], variants[MESA_SHADER_FRAGMENT], - &key->key); + key); state->key = key; / NOTE: uses copy of key in state obj, because pointer passed by caller
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h ^
@@ -27,6 +27,8 @@ #ifndef IR3_CACHE_H_ #define IR3_CACHE_H_ +#include "pipe/p_state.h" + #include "ir3/ir3_shader.h" /* @@ -39,6 +41,11 @@ struct ir3_cache_key { struct ir3_shader_state vs, hs, ds, gs, fs; // 5 pointers struct ir3_shader_key key; // 7 dwords + + / Additional state that effects the cached program state, but + * not the compiled shader: + / + unsigned clip_plane_enable : PIPE_MAX_CLIP_PLANES; }; / per-gen backend program state object should subclass this for it's @@ -54,7 +61,7 @@ void data, struct ir3_shader_variant bs, /* binning pass vs / struct ir3_shader_variant vs, struct ir3_shader_variant hs, struct ir3_shader_variant ds, struct ir3_shader_variant gs, - struct ir3_shader_variant fs, const struct ir3_shader_key key); + struct ir3_shader_variant fs, const struct ir3_cache_key key); void (destroy_state)(void data, struct ir3_program_state state); };
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/i915/i915_screen.c ^
@@ -283,7 +283,13 @@ / return 0; + / i915 can't do these, and even if gallivm NIR can we call nir_to_tgsi + * manually and TGSI can't. + */ case PIPE_SHADER_CAP_INT16: + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_FP16_DERIVATIVES: + case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: return 0; case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: @@ -343,9 +349,6 @@ case PIPE_SHADER_CAP_SUBROUTINES: return 0; case PIPE_SHADER_CAP_INT64_ATOMICS: - case PIPE_SHADER_CAP_FP16: - case PIPE_SHADER_CAP_FP16_DERIVATIVES: - case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: case PIPE_SHADER_CAP_INT16: case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: return 0;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/i915/i915_state.c ^
@@ -608,7 +608,7 @@ { struct i915_context i915 = i915_context(pipe); - struct pipe_shader_state from_nir; + struct pipe_shader_state from_nir = { PIPE_SHADER_IR_TGSI }; if (templ->type == PIPE_SHADER_IR_NIR) { nir_shader s = templ->ir.nir; @@ -619,7 +619,6 @@ * per-stage, and i915 FS can't do native integers. So, convert to TGSI, * where the draw path does support non-native-integers. */ - from_nir.type = PIPE_SHADER_IR_TGSI; from_nir.tokens = nir_to_tgsi(s, pipe->screen); templ = &from_nir; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_batch.c ^
@@ -290,6 +290,42 @@ MAX2(batch->max_gem_handle, iris_get_backing_bo(bo)->gem_handle); } +static void +flush_for_cross_batch_dependencies(struct iris_batch batch, + struct iris_bo bo, + bool writable) +{ + if (batch->measure && bo == batch->measure->bo) + return; + + /* When a batch uses a buffer for the first time, or newly writes a buffer + * it had already referenced, we may need to flush other batches in order + * to correctly synchronize them. + / + for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { + struct iris_batch other_batch = batch->other_batches[b]; + int other_index = find_exec_index(other_batch, bo); + + /* If the buffer is referenced by another batch, and either batch + * intends to write it, then flush the other batch and synchronize. + * + * Consider these cases: + * + * 1. They read, we read => No synchronization required. + * 2. They read, we write => Synchronize (they need the old value) + * 3. They write, we read => Synchronize (we need their new value) + * 4. They write, we write => Synchronize (order writes) + * + * The read/read case is very common, as multiple batches usually + * share a streaming state buffer or shader assembly buffer, and + * we want to avoid synchronizing in this case. + / + if (other_index != -1 && + (writable \|\| BITSET_TEST(other_batch->bos_written, other_index))) + iris_batch_flush(other_batch); + } +} + /* * Add a buffer to the current batch's validation list. * @@ -320,44 +356,17 @@ int existing_index = find_exec_index(batch, bo); - if (existing_index != -1) { - /* The BO is already in the list; mark it writable / - if (writable) - BITSET_SET(batch->bos_written, existing_index); + if (existing_index == -1) { + flush_for_cross_batch_dependencies(batch, bo, writable); - return; - } + ensure_exec_obj_space(batch, 1); + add_bo_to_batch(batch, bo, writable); + } else if (writable && !BITSET_TEST(batch->bos_written, existing_index)) { + flush_for_cross_batch_dependencies(batch, bo, writable); - if (!batch->measure \|\| bo != batch->measure->bo) { - / This is the first time our batch has seen this BO. Before we use it, - * we may need to flush and synchronize with other batches. - / - for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { - struct iris_batch other_batch = batch->other_batches[b]; - int other_index = find_exec_index(other_batch, bo); - - /* If the buffer is referenced by another batch, and either batch - * intends to write it, then flush the other batch and synchronize. - * - * Consider these cases: - * - * 1. They read, we read => No synchronization required. - * 2. They read, we write => Synchronize (they need the old value) - * 3. They write, we read => Synchronize (we need their new value) - * 4. They write, we write => Synchronize (order writes) - * - * The read/read case is very common, as multiple batches usually - * share a streaming state buffer or shader assembly buffer, and - * we want to avoid synchronizing in this case. - / - if (other_index != -1 && - (writable \|\| BITSET_TEST(other_batch->bos_written, other_index))) - iris_batch_flush(other_batch); - } + / The BO is already in the list; mark it writable / + BITSET_SET(batch->bos_written, existing_index); } - - ensure_exec_obj_space(batch, 1); - add_bo_to_batch(batch, bo, writable); } static void @@ -708,6 +717,12 @@ move_syncobj_to_batch(batch, &deps->write_syncobjs[other_batch_idx], I915_EXEC_FENCE_WAIT); + / If it's being written by our screen, wait on it too. This is relevant + * when there are multiple contexts on the same screen. / + if (deps->write_syncobjs[batch_idx]) + move_syncobj_to_batch(batch, &deps->write_syncobjs[batch_idx], + I915_EXEC_FENCE_WAIT); + struct iris_syncobj batch_syncobj = iris_batch_get_signal_syncobj(batch); if (write) { @@ -720,6 +735,8 @@ move_syncobj_to_batch(batch, &deps->read_syncobjs[other_batch_idx], I915_EXEC_FENCE_WAIT); + move_syncobj_to_batch(batch, &deps->read_syncobjs[batch_idx], + I915_EXEC_FENCE_WAIT); } else { /* If we're reading, replace the other read from our batch index. */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_blit.c ^
@@ -337,6 +337,16 @@ } } +static bool +clear_color_is_fully_zero(const struct iris_resource res) +{ + return !res->aux.clear_color_unknown && + res->aux.clear_color.u32[0] == 0 && + res->aux.clear_color.u32[1] == 0 && + res->aux.clear_color.u32[2] == 0 && + res->aux.clear_color.u32[3] == 0; +} + /* * The pipe->blit() driver hook. * @@ -590,10 +600,7 @@ * original format (e.g. A8_UNORM/R8_UINT). / out_clear_supported = (devinfo->ver >= 11 && !is_render_target) \|\| - (res->aux.clear_color.u32[0] == 0 && - res->aux.clear_color.u32[1] == 0 && - res->aux.clear_color.u32[2] == 0 && - res->aux.clear_color.u32[3] == 0); + clear_color_is_fully_zero(res); break; default: *out_aux_usage = ISL_AUX_USAGE_NONE;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_bufmgr.c ^
@@ -594,19 +594,26 @@ assert(!slab->bo->aux_map_address); - if (aux_map_ctx) { - /* Since we're freeing the whole slab, all buffers allocated out of it - * must be reclaimable. We require buffers to be idle to be reclaimed - * (see iris_can_reclaim_slab()), so we know all entries must be idle. - * Therefore, we can safely unmap their aux table entries. - / - for (unsigned i = 0; i < pslab->num_entries; i++) { - struct iris_bo bo = &slab->entries[i]; - if (bo->aux_map_address) { - intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size); - bo->aux_map_address = 0; + /* Since we're freeing the whole slab, all buffers allocated out of it + * must be reclaimable. We require buffers to be idle to be reclaimed + * (see iris_can_reclaim_slab()), so we know all entries must be idle. + * Therefore, we can safely unmap their aux table entries. + / + for (unsigned i = 0; i < pslab->num_entries; i++) { + struct iris_bo bo = &slab->entries[i]; + if (aux_map_ctx && bo->aux_map_address) { + intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size); + bo->aux_map_address = 0; + } + + /* Unref read/write dependency syncobjs and free the array. / + for (int d = 0; d < bo->deps_size; d++) { + for (int b = 0; b < IRIS_BATCH_COUNT; b++) { + iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL); + iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL); } } + free(bo->deps); } iris_bo_unreference(slab->bo); @@ -1659,6 +1666,16 @@ list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { list_del(&bo->head); + + bo_free(bo); + } + } + + for (int i = 0; i < bufmgr->num_local_buckets; i++) { + struct bo_cache_bucket bucket = &bufmgr->local_cache_bucket[i]; + + list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { + list_del(&bo->head); bo_free(bo); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_clear.c ^
@@ -321,7 +321,8 @@ iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, p_res, res->aux.usage, level, true); - blorp_fast_clear(&blorp_batch, &surf, format, ISL_SWIZZLE_IDENTITY, + blorp_fast_clear(&blorp_batch, &surf, res->surf.format, + ISL_SWIZZLE_IDENTITY, level, box->z, box->depth, box->x, box->y, box->x + box->width, box->y + box->height);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_program.c ^
@@ -2820,7 +2820,8 @@ if (ice->state.vs_uses_draw_params != uses_draw_params \|\| ice->state.vs_uses_derived_draw_params != uses_derived_draw_params \|\| - ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag) { + ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag \|\| + ice->state.vs_needs_sgvs_element != needs_sgvs_element) { ice->state.dirty \|= IRIS_DIRTY_VERTEX_BUFFERS \| IRIS_DIRTY_VERTEX_ELEMENTS; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_state.c ^
@@ -2934,6 +2934,7 @@ pipe_resource_reference(&surf->surface_state.ref.res, NULL); pipe_resource_reference(&surf->surface_state_read.ref.res, NULL); free(surf->surface_state.cpu); + free(surf->surface_state_read.cpu); free(surf); } @@ -6921,10 +6922,9 @@ iris_emit_cmd(batch, GENX(CFE_STATE), cfe) { cfe.MaximumNumberofThreads = devinfo->max_cs_threads * devinfo->subslice_total - 1; - if (prog_data->total_scratch > 0) { - cfe.ScratchSpaceBuffer = - iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4; - } + uint32_t scratch_addr = pin_scratch_space(ice, batch, prog_data, + MESA_SHADER_COMPUTE); + cfe.ScratchSpaceBuffer = scratch_addr >> 4; } } @@ -7861,6 +7861,13 @@ pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; pc.StateCacheInvalidationEnable = flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; +#if GFX_VER >= 12 + /* Invalidates the L3 cache part in which index & vertex data is loaded + * when VERTEX_BUFFER_STATE::L3BypassDisable is set. + */ + pc.L3ReadOnlyCacheInvalidationEnable = + flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; +#endif pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; pc.ConstantCacheInvalidationEnable = flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/lima/ci/gitlab-ci.yml ^
@@ -1,4 +1,4 @@ -lima-mali450-test:arm64: +.lima-mali450-test:arm64: extends: - .lava-test:arm64 - .lima-rules
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/lima/meson.build ^
@@ -92,7 +92,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) liblima = static_library(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/lima/standalone/lima_disasm.c ^
@@ -166,7 +166,7 @@ } char filename = NULL; - filename = argv[n]; + filename = argv[argc - 1]; uint32_t size = 0; uint32_t prog = extract_shader_binary(filename, &size, &is_frag);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-quick_shader.txt ^
@@ -188,7 +188,6 @@ spec/glsl-4.00/execution/conversion/vert-conversion-explicit-dvec3-vec3: fail spec/glsl-4.00/execution/conversion/vert-conversion-explicit-dvec4-vec4: fail spec/glsl-4.50/execution/ssbo-atomiccompswap-int: fail -spec/glsl-es-1.00/linker/glsl-mismatched-uniform-precision-unused: fail spec/intel_shader_atomic_float_minmax/execution/shared-atomiccompswap-float: skip spec/intel_shader_atomic_float_minmax/execution/shared-atomicexchange-float: skip spec/intel_shader_atomic_float_minmax/execution/shared-atomicmax-float: skip
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml ^
@@ -37,7 +37,7 @@ - path: gputest/pixmark-piano.trace expectations: - device: gl-vmware-llvmpipe - checksum: 4262587e893cf98c61a8467a15677181 + checksum: b580ae01560380461a103975cab77393 - path: gputest/triangle.trace expectations: - device: gl-vmware-llvmpipe @@ -169,7 +169,7 @@ - path: bgfx/39-assao.rdc expectations: - device: gl-vmware-llvmpipe - checksum: bc6f44e63010db07e7ba588b216e38b1 + checksum: 5d9c6dd6399db34ac81951cd7152ec1c - path: bgfx/40-svt.rdc expectations: - device: gl-vmware-llvmpipe
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_screen.c ^
@@ -560,6 +560,18 @@ return 0; } +static void +llvmpipe_get_driver_uuid(struct pipe_screen pscreen, char uuid) +{ + memset(uuid, 0, PIPE_UUID_SIZE); +} + +static void +llvmpipe_get_device_uuid(struct pipe_screen pscreen, char uuid) +{ + memset(uuid, 0, PIPE_UUID_SIZE); +} + static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_scmp = true, .lower_flrp32 = true, @@ -1040,6 +1052,9 @@ screen->base.get_timestamp = llvmpipe_get_timestamp; + screen->base.get_driver_uuid = llvmpipe_get_driver_uuid; + screen->base.get_device_uuid = llvmpipe_get_device_uuid; + screen->base.finalize_nir = llvmpipe_finalize_nir; screen->base.get_disk_shader_cache = lp_get_disk_shader_cache;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c ^
@@ -304,6 +304,7 @@ #define LATE_DEPTH_TEST 0x2 #define EARLY_DEPTH_WRITE 0x4 #define LATE_DEPTH_WRITE 0x8 +#define EARLY_DEPTH_TEST_INFERRED 0x10 //only with EARLY_DEPTH_TEST static int find_output_by_semantic( const struct tgsi_shader_info info, @@ -647,10 +648,10 @@ key->stencil[1].writemask))) depth_mode = LATE_DEPTH_TEST \| LATE_DEPTH_WRITE; else - depth_mode = EARLY_DEPTH_TEST \| LATE_DEPTH_WRITE; + depth_mode = EARLY_DEPTH_TEST \| LATE_DEPTH_WRITE \| EARLY_DEPTH_TEST_INFERRED; } else - depth_mode = EARLY_DEPTH_TEST \| EARLY_DEPTH_WRITE; + depth_mode = EARLY_DEPTH_TEST \| EARLY_DEPTH_WRITE \| EARLY_DEPTH_TEST_INFERRED; } else { depth_mode = LATE_DEPTH_TEST \| LATE_DEPTH_WRITE; @@ -1141,8 +1142,10 @@ if (key->min_samples == 1) s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), ""); - / if the shader writes sample mask use that / - if (shader->info.base.writes_samplemask) { + / if the shader writes sample mask use that, + * but only if this isn't genuine early-depth to avoid breaking occlusion query / + if (shader->info.base.writes_samplemask && + (!(depth_mode & EARLY_DEPTH_TEST) \|\| (depth_mode & (EARLY_DEPTH_TEST_INFERRED)))) { LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, ""); out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx); LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, ""); @@ -1258,6 +1261,23 @@ key->multisample ? s_mask : lp_build_mask_value(&mask), counter); } + / if this is genuine early-depth in the shader, write samplemask now + * after occlusion count has been updated + / + if (key->multisample && shader->info.base.writes_samplemask && + (depth_mode & (EARLY_DEPTH_TEST_INFERRED \| EARLY_DEPTH_TEST)) == EARLY_DEPTH_TEST) { + / if the shader writes sample mask use that / + LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, ""); + out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx); + LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, ""); + LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, out_smask_idx, ""); + LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int_vec(gallivm, int_type, 0), ""); + smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, ""); + + s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, ""); + } + + if (key->multisample) { / store the sample mask for this loop */ LLVMBuildStore(builder, s_mask, s_mask_ptr);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c ^
@@ -67,13 +67,13 @@ const struct util_format_description *depth_desc = util_format_description(depth_format); - if (lp->framebuffer.zsbuf && lp->framebuffer.zsbuf->context != pipe) { + if (fb->zsbuf && fb->zsbuf->context != pipe) { debug_printf("Illegal setting of fb state with zsbuf created in " "another context\n"); } for (i = 0; i < fb->nr_cbufs; i++) { - if (lp->framebuffer.cbufs[i] && - lp->framebuffer.cbufs[i]->context != pipe) { + if (fb->cbufs[i] && + fb->cbufs[i]->context != pipe) { debug_printf("Illegal setting of fb state with cbuf %d created in " "another context\n", i); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_surface.c ^
@@ -311,6 +311,9 @@ if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe)) return; + width = MIN2(width, dst->texture->width0 - dstx); + height = MIN2(height, dst->texture->height0 - dsty); + if (dst->texture->nr_samples > 1) { struct pipe_box box; u_box_2d(dstx, dsty, width, height, &box); @@ -379,6 +382,9 @@ if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe)) return; + width = MIN2(width, dst->texture->width0 - dstx); + height = MIN2(height, dst->texture->height0 - dsty); + if (dst->texture->nr_samples > 1) { uint64_t zstencil = util_pack64_z_stencil(dst->format, depth, stencil); struct pipe_box box;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_cmdstream.c ^
@@ -120,31 +120,27 @@ static unsigned translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest) { - /* Bifrost doesn't support the GL_CLAMP wrap mode, so instead use - * CLAMP_TO_EDGE and CLAMP_TO_BORDER. On Midgard, CLAMP is broken for - * nearest filtering, so use CLAMP_TO_EDGE in that case. / + / CLAMP is only supported on Midgard, where it is broken for nearest + * filtering. Use CLAMP_TO_EDGE in that case. + / switch (w) { case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT; - case PIPE_TEX_WRAP_CLAMP: - return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE : -#if PAN_ARCH <= 5 - MALI_WRAP_MODE_CLAMP; -#else - MALI_WRAP_MODE_CLAMP_TO_BORDER; -#endif case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER; + +#if PAN_ARCH <= 5 + case PIPE_TEX_WRAP_CLAMP: + return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE : + MALI_WRAP_MODE_CLAMP; case PIPE_TEX_WRAP_MIRROR_CLAMP: return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE : -#if PAN_ARCH <= 5 - MALI_WRAP_MODE_MIRRORED_CLAMP; -#else - MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER; + MALI_WRAP_MODE_MIRRORED_CLAMP; #endif - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER; + default: unreachable("Invalid wrap"); } } @@ -1367,6 +1363,12 @@ for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) { struct panfrost_sampler_view view = ctx->sampler_views[stage][i]; + + if (!view) { + memset(&out[i], 0, sizeof(out[i])); + continue; + } + struct pipe_sampler_view pview = &view->base; struct panfrost_resource rsrc = pan_resource(pview->texture); @@ -1384,6 +1386,11 @@ for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) { struct panfrost_sampler_view view = ctx->sampler_views[stage][i]; + if (!view) { + trampolines[i] = 0; + continue; + } + panfrost_update_sampler_view(view, &ctx->base); trampolines[i] = panfrost_get_tex_desc(batch, stage, view); @@ -1411,8 +1418,11 @@ SAMPLER); struct mali_sampler_packed out = (struct mali_sampler_packed ) T.cpu; - for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) - out[i] = ctx->samplers[stage][i]->hw; + for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) { + struct panfrost_sampler_state st = ctx->samplers[stage][i]; + + out[i] = st ? st->hw : (struct mali_sampler_packed){0}; + } return T.gpu; } @@ -2715,7 +2725,8 @@ } } - bool points = info->mode == PIPE_PRIM_POINTS; + enum pipe_prim_type prim = u_reduced_prim(info->mode); + bool polygon = (prim == PIPE_PRIM_TRIANGLES); void prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE); #if PAN_ARCH >= 6 @@ -2731,8 +2742,17 @@ cfg.four_components_per_vertex = true; cfg.draw_descriptor_is_64b = true; cfg.front_face_ccw = rast->front_ccw; - cfg.cull_front_face = rast->cull_face & PIPE_FACE_FRONT; - cfg.cull_back_face = rast->cull_face & PIPE_FACE_BACK; + + / + * From the Gallium documentation, + * pipe_rasterizer_state::cull_face "indicates which faces of + * polygons to cull". Points and lines are not considered + * polygons and should be drawn even if all faces are culled. + * The hardware does not take primitive type into account when + * culling, so we need to do that check ourselves. + / + cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT); + cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK); cfg.position = pos; cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT]; cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT]; @@ -2746,9 +2766,7 @@ be set to 0 and the provoking vertex is selected with the * PRIMITIVE.first_provoking_vertex field. / - if (info->mode == PIPE_PRIM_LINES \|\| - info->mode == PIPE_PRIM_LINE_LOOP \|\| - info->mode == PIPE_PRIM_LINE_STRIP) { + if (prim == PIPE_PRIM_LINES) { / The logic is inverted across arches. */ cfg.flat_shading_vertex = rast->flatshade_first ^ (PAN_ARCH <= 5); @@ -2769,7 +2787,7 @@ } } - panfrost_emit_primitive_size(ctx, points, psiz, prim_size); + panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size); } static void
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_context.c ^
@@ -631,28 +631,43 @@ unsigned new_nr = 0; unsigned i; - assert(start_slot == 0); + for (i = 0; i < num_views; ++i) { + struct pipe_sampler_view view = views ? views[i] : NULL; + unsigned p = i + start_slot; - if (!views) - num_views = 0; + if (view) + new_nr = p + 1; - for (i = 0; i < num_views; ++i) { - if (views[i]) - new_nr = i + 1; if (take_ownership) { - pipe_sampler_view_reference((struct pipe_sampler_view )&ctx->sampler_views[shader][i], + pipe_sampler_view_reference((struct pipe_sampler_view )&ctx->sampler_views[shader][p], NULL); - ctx->sampler_views[shader][i] = (struct panfrost_sampler_view )views[i]; + ctx->sampler_views[shader][i] = (struct panfrost_sampler_view )view; } else { - pipe_sampler_view_reference((struct pipe_sampler_view )&ctx->sampler_views[shader][i], - views[i]); + pipe_sampler_view_reference((struct pipe_sampler_view )&ctx->sampler_views[shader][p], + view); } } - for (; i < ctx->sampler_view_count[shader]; i++) { - pipe_sampler_view_reference((struct pipe_sampler_view )&ctx->sampler_views[shader][i], + for (; i < num_views + unbind_num_trailing_slots; i++) { + unsigned p = i + start_slot; + pipe_sampler_view_reference((struct pipe_sampler_view )&ctx->sampler_views[shader][p], NULL); } + + / If the sampler view count is higher than the greatest sampler view + * we touch, it can't change / + if (ctx->sampler_view_count[shader] > start_slot + num_views + unbind_num_trailing_slots) + return; + + / If we haven't set any sampler views here, search lower numbers for + * set sampler views */ + if (new_nr == 0) { + for (i = 0; i < start_slot; ++i) { + if (ctx->sampler_views[shader][i]) + new_nr = i + 1; + } + } + ctx->sampler_view_count[shader] = new_nr; } @@ -668,6 +683,8 @@ util_set_shader_buffers_mask(ctx->ssbo[shader], &ctx->ssbo_mask[shader], buffers, start, count); + + ctx->dirty_shader[shader] \|= PAN_DIRTY_STAGE_SSBO; } static void
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_resource.c ^
@@ -153,31 +153,15 @@ return true; } } else if (handle->type == WINSYS_HANDLE_TYPE_FD) { - if (scanout) { - struct drm_prime_handle args = { - .handle = scanout->handle, - .flags = DRM_CLOEXEC, - }; - - int ret = drmIoctl(dev->ro->kms_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); - if (ret == -1) - return false; + int fd = panfrost_bo_export(rsrc->image.data.bo); - handle->stride = scanout->stride; - handle->handle = args.fd; - - return true; - } else { - int fd = panfrost_bo_export(rsrc->image.data.bo); - - if (fd < 0) - return false; + if (fd < 0) + return false; - handle->handle = fd; - handle->stride = rsrc->image.layout.slices[0].line_stride; - handle->offset = rsrc->image.layout.slices[0].offset; - return true; - } + handle->handle = fd; + handle->stride = rsrc->image.layout.slices[0].line_stride; + handle->offset = rsrc->image.layout.slices[0].offset; + return true; } return false; @@ -839,7 +823,8 @@ struct panfrost_context ctx = pan_context(pctx); struct panfrost_device dev = pan_device(pctx->screen); struct panfrost_resource rsrc = pan_resource(resource); - int bytes_per_pixel = util_format_get_blocksize(rsrc->image.layout.format); + enum pipe_format format = rsrc->image.layout.format; + int bytes_per_block = util_format_get_blocksize(format); struct panfrost_bo bo = rsrc->image.data.bo; /* Can't map tiled/compressed directly / @@ -916,6 +901,9 @@ } if (create_new_bo) { + / Make sure we re-emit any descriptors using this resource / + panfrost_dirty_state_all(ctx); + / If the BO is used by one of the pending batches or if it's * not ready yet (still accessed by one of the already flushed * batches), we try to allocate a new one to avoid waiting. @@ -942,6 +930,12 @@ panfrost_bo_unreference(bo); rsrc->image.data.bo = newbo; + /* Swapping out the BO will invalidate batches + * accessing this resource, flush them but do + * not wait for them. + / + panfrost_flush_batches_accessing_rsrc(ctx, rsrc, "Resource shadowing"); + if (!copy_resource && drm_is_afbc(rsrc->image.layout.modifier)) panfrost_resource_init_afbc_headers(rsrc); @@ -970,9 +964,17 @@ } } + / For access to compressed textures, we want the (x, y, w, h) + * region-of-interest in blocks, not pixels. Then we compute the stride + * between rows of blocks as the width in blocks times the width per + * block, etc. + / + struct pipe_box box_blocks; + u_box_pixels_to_blocks(&box_blocks, box, format); + if (rsrc->image.layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) { - transfer->base.stride = box->width bytes_per_pixel; - transfer->base.layer_stride = transfer->base.stride * box->height; + transfer->base.stride = box_blocks.width * bytes_per_block; + transfer->base.layer_stride = transfer->base.stride * box_blocks.height; transfer->map = ralloc_size(transfer, transfer->base.layer_stride * box->depth); assert(box->depth == 1); @@ -1013,9 +1015,9 @@ return bo->ptr.cpu + rsrc->image.layout.slices[level].offset - + transfer->base.box.z * transfer->base.layer_stride - + transfer->base.box.y * rsrc->image.layout.slices[level].line_stride - + transfer->base.box.x * bytes_per_pixel; + + box->z * transfer->base.layer_stride + + box_blocks.y * rsrc->image.layout.slices[level].line_stride + + box_blocks.x * bytes_per_block; } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_screen.c ^
@@ -121,6 +121,7 @@ case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: @@ -153,7 +154,6 @@ case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_BLEND_EQUATION_SEPARATE: @@ -284,6 +284,8 @@ return MAX_VARYING; /* Removed in v6 (Bifrost) / + case PIPE_CAP_GL_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_ALPHA_TEST: return dev->arch <= 5; @@ -841,8 +843,6 @@ if (dev->debug & PAN_DBG_NO_AFBC) dev->has_afbc = false; - dev->ro = ro; - / Check if we're loading against a supported GPU model. */ switch (dev->gpu_id) { @@ -862,6 +862,8 @@ return NULL; } + dev->ro = ro; + screen->base.destroy = panfrost_destroy_screen; screen->base.get_name = panfrost_get_name;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/r3xx_fragprog.c ^
@@ -120,7 +120,6 @@ /* This transformation needs to be done before any of the IF * instructions are modified. */ {"transform KILP", 1, 1, rc_transform_KILL, NULL}, - {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, {"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one},
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/r3xx_vertprog.c ^
@@ -22,6 +22,7 @@ #include "radeon_compiler.h" +#include <stdbool.h> #include <stdio.h> #include "r300_reg.h" @@ -559,15 +560,33 @@ struct rc_instruction * LastRead; }; +static int get_reg(struct radeon_compiler c, struct temporary_allocation ta, bool hwtemps, + unsigned int orig) +{ + if (!ta[orig].Allocated) { + int j; + for (j = 0; j < c->max_temp_regs; ++j) + { + if (!hwtemps[j]) + break; + } + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[ta[orig].HwTemp] = true; + } + + return ta[orig].HwTemp; +} + static void allocate_temporary_registers(struct radeon_compiler c, void user) { struct r300_vertex_program_compiler compiler = (struct r300_vertex_program_compiler)c; struct rc_instruction inst; struct rc_instruction end_loop = NULL; unsigned int num_orig_temps = 0; - char hwtemps[RC_REGISTER_MAX_INDEX]; + bool hwtemps[RC_REGISTER_MAX_INDEX]; struct temporary_allocation ta; - unsigned int i, j; + unsigned int i; memset(hwtemps, 0, sizeof(hwtemps)); @@ -638,28 +657,17 @@ for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { unsigned int orig = inst->U.I.SrcReg[i].Index; - inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + inst->U.I.SrcReg[i].Index = get_reg(c, ta, hwtemps, orig); if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = 0; + hwtemps[ta[orig].HwTemp] = false; } } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { unsigned int orig = inst->U.I.DstReg.Index; - - if (!ta[orig].Allocated) { - for(j = 0; j < c->max_temp_regs; ++j) { - if (!hwtemps[j]) - break; - } - ta[orig].Allocated = 1; - ta[orig].HwTemp = j; - hwtemps[ta[orig].HwTemp] = 1; - } - - inst->U.I.DstReg.Index = ta[orig].HwTemp; + inst->U.I.DstReg.Index = get_reg(c, ta, hwtemps, orig); } } } @@ -695,10 +703,10 @@ new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; - memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = temp; inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[i].RelAddr = 0; } } return 1; @@ -724,10 +732,13 @@ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = tmpreg; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst_mov->U.I.SrcReg[0].Negate = 0; + inst_mov->U.I.SrcReg[0].Abs = 0; - reset_srcreg(&inst->U.I.SrcReg[2]); inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[2].Index = tmpreg; + inst->U.I.SrcReg[2].RelAddr = false; } } @@ -739,10 +750,13 @@ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = tmpreg; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst_mov->U.I.SrcReg[0].Negate = 0; + inst_mov->U.I.SrcReg[0].Abs = 0; - reset_srcreg(&inst->U.I.SrcReg[1]); inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[1].Index = tmpreg; + inst->U.I.SrcReg[1].RelAddr = false; } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/r500_fragprog.c ^
@@ -44,16 +44,15 @@ struct rc_instruction * inst_if, void data) { + if (inst_if->U.I.Opcode != RC_OPCODE_IF) + return 0; + struct rc_variable writer; struct rc_list * writer_list, * list_ptr; struct rc_list * var_list = rc_get_variables(c); unsigned int generic_if = 0; unsigned int alu_chan; - if (inst_if->U.I.Opcode != RC_OPCODE_IF) { - return 0; - } - writer_list = rc_variable_list_get_writers( var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); if (!writer_list) { @@ -220,8 +219,6 @@ return 1; return 0; - } else if (reg.File == RC_FILE_INLINE) { - return 1; } else { /* ALU instructions support almost everything */ relevant = 0;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c ^
@@ -253,8 +253,13 @@ if(opcode->HasDstReg){ int src = 0; unsigned int srcmasks[3]; - rc_compute_sources_for_writemask(ptr, - ptr->U.I.DstReg.WriteMask, srcmasks); + unsigned int writemask = ptr->U.I.DstReg.WriteMask; + if (ptr->U.I.WriteALUResult == RC_ALURESULT_X) + writemask \|= RC_MASK_X; + else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W) + writemask \|= RC_MASK_W; + + rc_compute_sources_for_writemask(ptr, writemask, srcmasks); for(src=0; src < opcode->NumSrcRegs; src++){ mark_used(&s, ptr->U.I.SrcReg[src].File,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c ^
@@ -499,22 +499,6 @@ } } -void rc_unroll_loops(struct radeon_compiler c, void user) -{ - struct rc_instruction * inst; - struct loop_info loop; - - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { - if (build_loop_info(c, &loop, inst)) { - try_unroll_loop(c, &loop); - } - } - } -} - void rc_emulate_loops(struct radeon_compiler c, void user) { struct emulate_loop_state * s = &c->loop_state;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h ^
@@ -50,8 +50,6 @@ void rc_transform_loops(struct radeon_compiler c, void user); -void rc_unroll_loops(struct radeon_compiler * c, void user); - void rc_emulate_loops(struct radeon_compiler c, void user); #endif / RADEON_EMULATE_LOOPS_H */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_optimize.c ^
@@ -26,6 +26,8 @@ * / +#include "util/u_math.h" + #include "radeon_dataflow.h" #include "radeon_compiler.h" @@ -653,11 +655,12 @@ / XXX It would be nice to use is_src_uniform_constant here, but that * function only works if the register's file is RC_FILE_NONE / for(i = 0; i < 4; i++ ) { + if (!(inst_add->U.I.DstReg.WriteMask & (1 << i))) + continue; + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); - if(((1 << i) & inst_add->U.I.DstReg.WriteMask) - && swz != RC_SWIZZLE_ONE) { + if (swz != RC_SWIZZLE_ONE \|\| inst_add->U.I.SrcReg[0].Negate & (1 << i)) return 0; - } } / Check src1. / @@ -832,8 +835,15 @@ return 0; } - / Rewrite the instructions / writemask_sum = rc_variable_writemask_sum(writer_list->Item); + + / rc_normal_rewrite_writemask can't expand a previous writemask to store + * more channels replicated. + / + if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask)) + return 0; + + / Rewrite the instructions / for (var = writer_list->Item; var; var = var->Friend) { struct rc_variable writer = var; unsigned conversion_swizzle = rc_make_conversion_swizzle(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_program_alu.c ^
@@ -1215,12 +1215,6 @@ * * === OR === * - * IF Temp[0].x -\ - * KILL - > KIL -abs(Temp[0].x) - * ENDIF -/ - * - * === OR === - * * IF Temp[0].x -> IF Temp[0].x * ... -> ... * ELSE -> ELSE @@ -1265,21 +1259,6 @@ * block, because -0.0 is considered negative. / inst->U.I.SrcReg[0] = negate(absolute(if_inst->U.I.SrcReg[0])); - - if (inst->Prev->U.I.Opcode != RC_OPCODE_IF - && inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { - - / Optimize the special case: - * IF Temp[0].x - * KILP - * ENDIF - / - - / Remove IF / - rc_remove_instruction(inst->Prev); - / Remove ENDIF */ - rc_remove_instruction(inst->Next); - } } } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_vert_fc.c ^
@@ -257,10 +257,10 @@ if (fc_state.BranchDepth != 0 \|\| fc_state.LoopDepth != 1) { lower_endloop(inst, &fc_state); + /* Skip the new PRED_RESTORE / + inst = inst->Next; } fc_state.LoopDepth--; - / Skip PRED_RESTORE */ - inst = inst->Next; break; case RC_OPCODE_IF: lower_if(inst, &fc_state);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_vs_draw.c ^
@@ -312,15 +312,17 @@ struct r300_vertex_shader vs) { struct draw_context draw = r300->draw; - struct pipe_shader_state new_vs; struct tgsi_shader_info info; struct vs_transform_context transform; const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */; + struct pipe_shader_state new_vs = { + .type = PIPE_SHADER_IR_TGSI, + .tokens = tgsi_alloc_tokens(newLen) + }; unsigned i; tgsi_scan_shader(vs->state.tokens, &info); - new_vs.tokens = tgsi_alloc_tokens(newLen); if (new_vs.tokens == NULL) return;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/navi10-piglit-quick-fail.csv ^
@@ -115,10 +115,6 @@ spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail spec@egl_ext_protected_content@conformance,Fail spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail -spec@ext_framebuffer_multisample@turn-on-off 2,Fail -spec@ext_framebuffer_multisample@turn-on-off 4,Fail -spec@ext_framebuffer_multisample@turn-on-off 6,Fail -spec@ext_framebuffer_multisample@turn-on-off 8,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail spec@ext_texture_integer@fbo-integer,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/radeonsi-stoney-fails.txt ^
@@ -54,10 +54,6 @@ spec@egl_khr_surfaceless_context@viewport,Fail spec@egl_mesa_configless_context@basic,Fail spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail -spec@ext_framebuffer_multisample@turn-on-off 2,Fail -spec@ext_framebuffer_multisample@turn-on-off 4,Fail -spec@ext_framebuffer_multisample@turn-on-off 6,Fail -spec@ext_framebuffer_multisample@turn-on-off 8,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/raven-piglit-quick-fail.csv ^
@@ -201,10 +201,6 @@ spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail spec@egl_ext_protected_content@conformance,Fail spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail -spec@ext_framebuffer_multisample@turn-on-off 2,Fail -spec@ext_framebuffer_multisample@turn-on-off 4,Fail -spec@ext_framebuffer_multisample@turn-on-off 6,Fail -spec@ext_framebuffer_multisample@turn-on-off 8,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail spec@ext_texture_integer@fbo-integer,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/sienna_cichlid-piglit-quick-fail.csv ^
@@ -116,10 +116,6 @@ spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail spec@egl_ext_protected_content@conformance,Fail spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail -spec@ext_framebuffer_multisample@turn-on-off 2,Fail -spec@ext_framebuffer_multisample@turn-on-off 4,Fail -spec@ext_framebuffer_multisample@turn-on-off 6,Fail -spec@ext_framebuffer_multisample@turn-on-off 8,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail spec@ext_texture_integer@fbo-integer,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/traces-radeonsi.yml ^
@@ -5,27 +5,27 @@ - path: glmark2/desktop:windows=4:effect=blur:blur-radius=5:passes=1:separable=true.trace expectations: - device: gl-radeonsi-stoney - checksum: d8c9bf6295525e65e318adeff02520e2 + checksum: 740fa8f8e9a9d815cf160b1893370755 - path: glmark2/jellyfish.trace expectations: - device: gl-radeonsi-stoney - checksum: f68bf374e535ad4a43a08786b0d536d8 + checksum: 5bc7d5c250b7d568313c4afd064082f6 - path: glxgears/glxgears-2.trace expectations: - device: gl-radeonsi-stoney - checksum: eb9b3d497be567f02a6e039fa32f2b13 + checksum: ef3653f50d4853d3e9cb3244c799565a - path: 0ad/0ad.trace expectations: - device: gl-radeonsi-stoney - checksum: 8fb8cd54f1ff908952fe0b6dd9f28999 + checksum: 1a089d8584a9e68e7ab08eada954741b - path: pathfinder/demo.trace expectations: - device: gl-radeonsi-stoney - checksum: 8ff636268dfa0d54b6f15d70d15e354d + checksum: c81c85f9b247dd1b06c3dd5b669cc283 - path: pathfinder/canvas_moire.trace expectations: - device: gl-radeonsi-stoney - checksum: 505b9cad6e65c13463a0786944f8b679 + checksum: 78dd2357ad6e5ffc049a75bfb11c5497 - path: pathfinder/canvas_text_v2.trace expectations: - device: gl-radeonsi-stoney @@ -33,11 +33,11 @@ - path: gputest/furmark.trace expectations: - device: gl-radeonsi-stoney - checksum: 84c499203944cdc59e70450c324bb8df + checksum: 4ceea12000bb5995b915228d2d4b49c7 - path: gputest/pixmark-piano.trace expectations: - device: gl-radeonsi-stoney - checksum: 58a86d233d03e2a174cb79c16028f916 + checksum: 86ebe6ff8038975de8724fa9536edb7e - path: gputest/triangle.trace expectations: - device: gl-radeonsi-stoney @@ -45,47 +45,47 @@ - path: humus/Portals.trace expectations: - device: gl-radeonsi-stoney - checksum: fc7d00efe380cacbd4e9ef9b231aea2f + checksum: 5b96333495b794691e4ed071ae92ff19 - path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=false.trace expectations: - device: gl-radeonsi-stoney - checksum: 5af6e31cc78320cb3f9db483c7a426e0 + checksum: 5db05161041946e8971f39f12bbd847c - path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=true.trace expectations: - device: gl-radeonsi-stoney - checksum: 6e9dc5a7dc5a9cbb2b396bfce88a2084 + checksum: e2154c522fcdb4f43b31b31c17adda74 - path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=subdata:interleave=false.trace expectations: - device: gl-radeonsi-stoney - checksum: 35e384f833f37411f15bf8ef80ca1914 + checksum: 70298e48479147af2d848a5441fb5f47 - path: glmark2/bump:bump-render=height.trace expectations: - device: gl-radeonsi-stoney - checksum: 526cf3805b9b64bb8edea1b7d86b9cae + checksum: f5129b06e401a5fefa18a9895b18deec - path: glmark2/bump:bump-render=high-poly.trace expectations: - device: gl-radeonsi-stoney - checksum: e026d36eaa71ecd957b47c7e6a5a100b + checksum: 3fc1adf0caa289b3296a80c2c13834ca - path: glmark2/bump:bump-render=normals.trace expectations: - device: gl-radeonsi-stoney - checksum: 4508a1be8c33a63fbfa695b141edf48b + checksum: 596fd7a084d3a7a6b61b322593385f45 - path: glmark2/conditionals:vertex-steps=0:fragment-steps=0.trace expectations: - device: gl-radeonsi-stoney - checksum: d12ecac5894705295e4fa076d77a72ab + checksum: fb2eda378ace8ca8b81d73d20cbfbbf7 - path: glmark2/conditionals:vertex-steps=0:fragment-steps=5.trace expectations: - device: gl-radeonsi-stoney - checksum: 8999ff7eda7d7cf25440b96ab0efd4ee + checksum: b8575de0e043f540b12f13209054d000 - path: glmark2/conditionals:vertex-steps=5:fragment-steps=0.trace expectations: - device: gl-radeonsi-stoney - checksum: e65fdae9fe7bbd95c5cc0fb0c3eb7bf4 + checksum: f70625a1f9bd9d2c211e330032b86f85 - path: glmark2/desktop:windows=4:effect=shadow.trace expectations: - device: gl-radeonsi-stoney - checksum: 2aff87605464dd3f61aefd4e1dc0bffd + checksum: 384015de55daf7dd406c9463576018e9 - path: glmark2/effect2d:kernel=0,1,0;1,-4,1;0,1,0;.trace expectations: - device: gl-radeonsi-stoney @@ -97,87 +97,87 @@ - path: glmark2/function:fragment-steps=5:fragment-complexity=low.trace expectations: - device: gl-radeonsi-stoney - checksum: 13149880306d2183703a5c327f4d750a + checksum: 9efd8bb5df15f9483a18a00f9650caa9 - path: glmark2/function:fragment-steps=5:fragment-complexity=medium.trace expectations: - device: gl-radeonsi-stoney - checksum: c6983ffb3a74e234f84e5d817f876f54 + checksum: 9bdd506c0404cb11a7148cb08b429d1b - path: glmark2/build:use-vbo=false.trace expectations: - device: gl-radeonsi-stoney - checksum: 203a0205580b2c39ed8dcbed57b18f3c + checksum: 506b1910317b04e5d32aacf2bd70bd0d - path: glmark2/build:use-vbo=true.trace expectations: - device: gl-radeonsi-stoney - checksum: 437034f264c469757683e51e3e25beca + checksum: 793dc29115ae442b279276adb89d0999 - path: glmark2/ideas:speed=10000.trace expectations: - device: gl-radeonsi-stoney - checksum: 381d973b00b46fcc15f72808eabb6237 + checksum: 1ae057093620f868aad846167f04c6e0 - path: glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-loop=false.trace expectations: - device: gl-radeonsi-stoney - checksum: 5a32f7917c130581fae23e58b71fd740 + checksum: 6fb2f9bce414879e3751bb51d1a8d481 - path: glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-uniform=false.trace expectations: - device: gl-radeonsi-stoney - checksum: 20586c936a7051ce63503df6f9785d01 + checksum: 27fabda45ca2a989c21b4ec386a2e8f6 - path: glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-uniform=true.trace expectations: - device: gl-radeonsi-stoney - checksum: 255e412701afdc4a7b62654e93b92cc9 + checksum: 05ac8be6e2e0c03ea1caec85f037cddd - path: glmark2/pulsar:quads=5:texture=false:light=false.trace expectations: - device: gl-radeonsi-stoney - checksum: 42f913c6119a685da4450ea116060614 + checksum: 0b62b9c04e4c00f44eba64b366c47783 - path: glmark2/refract.trace expectations: - device: gl-radeonsi-stoney - checksum: 41d105bdd10a354f6d161c67f715b7f9 + checksum: c711f3a07f6aa9e0f19c544c6d7c2000 - path: glmark2/shading:shading=blinn-phong-inf.trace expectations: - device: gl-radeonsi-stoney - checksum: 4a2cf8a13b248f470e58f785d0a9207d + checksum: 429c6bbdf99d573cc4eaaee3c0471257 - path: glmark2/shading:shading=cel.trace expectations: - device: gl-radeonsi-stoney - checksum: 8325ce4073135c03eec241087e51a584 + checksum: 330d9a8375970e42ba5ddc3142dc6477 - path: glmark2/shading:shading=gouraud.trace expectations: - device: gl-radeonsi-stoney - checksum: fbe5b7d038866f6cd4fc801b062e4ce5 + checksum: e22908309d41af8c9753c5c7cae73b29 - path: glmark2/shading:shading=phong.trace expectations: - device: gl-radeonsi-stoney - checksum: 65f9468a37d683b4c1f38d34f09a97db + checksum: 2ac9f1b6ba39f8924b374d18181edeeb - path: glmark2/shadow.trace expectations: - device: gl-radeonsi-stoney - checksum: abd705b0ae76cf6f19905bfea1d3db76 + checksum: 9215a1525dfe5b12999652b3a3ba05d8 - path: glmark2/terrain.trace expectations: - device: gl-radeonsi-stoney - checksum: 59690f1438a1e44fc655d16ce8bb348b + checksum: 80a1bba6ff969c9a82c68de0306f2b61 - path: glmark2/texture:texture-filter=linear.trace expectations: - device: gl-radeonsi-stoney - checksum: 54bf32b499f3ebfe0e727e5716e54b1a + checksum: 928479421abda4823a673393cd59ff81 - path: glmark2/texture:texture-filter=mipmap.trace expectations: - device: gl-radeonsi-stoney - checksum: 3a3abce164eef2be10f58604b22583f2 + checksum: cb94bca58ed8f41c5f6f6dda3fb15600 - path: glmark2/texture:texture-filter=nearest.trace expectations:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_blit.c ^
@@ -395,11 +395,12 @@ si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, false, true /* no DCC /); } -static void si_decompress_sampler_depth_textures(struct si_context sctx, +static bool si_decompress_sampler_depth_textures(struct si_context sctx, struct si_samplers textures) { unsigned i; unsigned mask = textures->needs_depth_decompress_mask; + bool need_flush = false; while (mask) { struct pipe_sampler_view view; @@ -418,7 +419,14 @@ si_decompress_depth(sctx, tex, sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, view->u.tex.first_level, view->u.tex.last_level, 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); + + if (tex->need_flush_after_depth_decompression) { + need_flush = true; + tex->need_flush_after_depth_decompression = false; + } } + + return need_flush; } static void si_blit_decompress_color(struct si_context sctx, struct si_texture tex, @@ -757,6 +765,7 @@ void si_decompress_textures(struct si_context sctx, unsigned shader_mask) { unsigned compressed_colortex_counter, mask; + bool need_flush = false; if (sctx->blitter_running) return; @@ -774,7 +783,7 @@ unsigned i = u_bit_scan(&mask); if (sctx->samplers[i].needs_depth_decompress_mask) { - si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]); + need_flush \|= si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]); } if (sctx->samplers[i].needs_color_decompress_mask) { si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); @@ -784,6 +793,16 @@ } } + if (sctx->chip_class == GFX10_3 && need_flush) { + /* This fixes a corruption with the following sequence: + * - fast clear depth + * - decompress depth + * - draw + * (see https://gitlab.freedesktop.org/drm/amd/-/issues/1810#note_1170171) + */ + sctx->b.flush(&sctx->b, NULL, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW); + } + if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) { if (sctx->uses_bindless_samplers) si_decompress_resident_textures(sctx);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_clear.c ^
@@ -352,7 +352,7 @@ return false; dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset; - clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size * num_layers; + clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size; } si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value); @@ -829,6 +829,8 @@ clear_value = !zstex->htile_stencil_disabled ? 0xfffff30f : 0xfffc000f; } + zstex->need_flush_after_depth_decompression = sctx->chip_class == GFX10_3; + assert(num_clears < ARRAY_SIZE(info)); si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, zstex->surface.meta_offset, zstex->surface.meta_size, clear_value); @@ -934,6 +936,8 @@ } } + zstex->need_flush_after_depth_decompression = update_db_depth_clear && sctx->chip_class == GFX10_3; + /* Update DB_DEPTH_CLEAR. */ if (update_db_depth_clear && zstex->depth_clear_value[level] != (float)depth) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_perfcounter.c ^
@@ -158,7 +158,10 @@ radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) \| EVENT_INDEX(0)); radeon_set_uconfig_reg( R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) \| S_036020_PERFMON_SAMPLE_ENABLE(1)); + S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ? + V_036020_CP_PERFMON_STATE_START_COUNTING : + V_036020_CP_PERFMON_STATE_STOP_COUNTING) \| + S_036020_PERFMON_SAMPLE_ENABLE(1)); radeon_end(); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pipe.h ^
@@ -374,6 +374,7 @@ bool db_compatible : 1; bool can_sample_z : 1; bool can_sample_s : 1; + bool need_flush_after_depth_decompression: 1; /* We need to track DCC dirtiness, because st/dri usually calls * flush_resource twice per frame (not a bug) and we don't wanna
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pm4.c ^
@@ -137,7 +137,7 @@ * added to the buffer list on the next draw call. / for (unsigned i = 0; i < SI_NUM_STATES; i++) { - struct si_pm4_state state = sctx->emitted.array[i]; + struct si_pm4_state *state = sctx->queued.array[i]; if (state && state->is_shader) { sctx->emitted.array[i] = NULL;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c ^
@@ -115,7 +115,7 @@ info->input[loc].semantic = semantic + i; - if (semantic == SYSTEM_VALUE_PRIMITIVE_ID) + if (semantic == VARYING_SLOT_PRIMITIVE_ID) info->input[loc].interpolate = INTERP_MODE_FLAT; else info->input[loc].interpolate = interp;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state.c ^
@@ -2753,7 +2753,6 @@ bool old_has_stencil = old_has_zsbuf && ((struct si_texture )sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; - bool unbound = false; int i; / Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs @@ -2778,16 +2777,6 @@ if (!surf->dcc_incompatible) continue; - /* Since the DCC decompression calls back into set_framebuffer- - * _state, we need to unbind the framebuffer, so that - * vi_separate_dcc_stop_query isn't called twice with the same - * color buffer. - */ - if (!unbound) { - util_copy_framebuffer_state(&sctx->framebuffer.state, NULL); - unbound = true; - } - if (vi_dcc_enabled(tex, surf->base.u.tex.level)) if (!si_texture_disable_dcc(sctx, tex)) si_decompress_dcc(sctx, tex);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_draw.cpp ^
@@ -1175,7 +1175,9 @@ min_vertex_count); /* Draw state. / - if (ia_multi_vgt_param != sctx->last_multi_vgt_param) { + if (ia_multi_vgt_param != sctx->last_multi_vgt_param \|\| + / Workaround for SpecviewPerf13 Catia hang on GFX9. */ + (GFX_VERSION == GFX9 && prim != sctx->last_prim)) { radeon_begin(cs); if (GFX_VERSION == GFX9)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c ^
@@ -366,7 +366,7 @@ S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4)); radeon_opt_set_context_reg( ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, - S_028BE4_PIX_CENTER(rs->half_pixel_center) \| + S_028BE4_PIX_CENTER(rs->half_pixel_center) \| S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) \| S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode)); radeon_end_update_context_roll(ctx); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_texture.c ^
@@ -123,7 +123,14 @@ unsigned layer_stride) { if (sscreen->info.chip_class >= GFX9) { - stride = tex->surface.u.gfx9.surf_pitch * tex->surface.bpe; + unsigned pitch; + if (tex->surface.is_linear) { + pitch = tex->surface.u.gfx9.pitch[level]; + } else { + pitch = tex->surface.u.gfx9.surf_pitch; + } + + stride = pitch tex->surface.bpe; layer_stride = tex->surface.u.gfx9.surf_slice_size; if (!box) @@ -133,9 +140,8 @@ of mipmap levels. / return tex->surface.u.gfx9.surf_offset + box->z tex->surface.u.gfx9.surf_slice_size + tex->surface.u.gfx9.offset[level] + - (box->y / tex->surface.blk_h * tex->surface.u.gfx9.surf_pitch + - box->x / tex->surface.blk_w) * - tex->surface.bpe; + (box->y / tex->surface.blk_h * pitch + box->x / tex->surface.blk_w) * + tex->surface.bpe; } else { stride = tex->surface.u.legacy.level[level].nblk_x tex->surface.bpe; assert((uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX); @@ -1647,6 +1653,7 @@ resource.array_size = texture->array_size; resource.last_level = texture->last_level; resource.nr_samples = texture->nr_samples; + resource.nr_storage_samples = texture->nr_storage_samples; resource.usage = PIPE_USAGE_DEFAULT; resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; resource.flags = texture->flags \| SI_RESOURCE_FLAG_FLUSHED_DEPTH;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/ci/softpipe-fails.txt ^
@@ -440,7 +440,6 @@ dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.read_pixels_fbo_format_mismatch,Fail dEQP-GLES31.functional.debug.negative_coverage.log.buffer.read_pixels_fbo_format_mismatch,Fail dEQP-GLES31.functional.draw_base_vertex.draw_elements_instanced_base_vertex.line_loop.instanced_attributes,Fail -dEQP-GLES31.functional.draw_buffers_indexed.overwrite_indexed.common_color_mask_buffer_color_mask,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.0,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.1,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.10,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/ci/softpipe-flakes.txt ^
@@ -1,3 +1,2 @@ -dEQP-GLES31.functional.draw_buffers_indexed.overwrite_indexed.common_color_mask_buffer_color_mask dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.geometry.isampler2darray dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.geometry.isampler3d
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/sp_quad_blend.c ^
@@ -1005,7 +1005,7 @@ rebase_colors(bqs->base_format[cbuf], quadColor); if (blend->rt[blend_buf].colormask != 0xf) - colormask_quad( blend->rt[cbuf].colormask, quadColor, dest); + colormask_quad( blend->rt[blend_buf].colormask, quadColor, dest); /* Output color values */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c ^
@@ -1450,7 +1450,7 @@ need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, unsigned index) { - if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY)) + if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY)) && emit->current_loop_depth == 0) { if (!emit->temp_map[index].initialized && emit->temp_map[index].index < emit->num_shader_temps) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/swr/swr_draw.cpp ^
@@ -62,7 +62,7 @@ if (!indirect && !info->primitive_restart && - !u_trim_pipe_prim(info->mode, (unsigned)&draws[0].count)) + !u_trim_pipe_prim((enum pipe_prim_type)info->mode, (unsigned)&draws[0].count)) return; if (!swr_check_render_cond(pipe)) @@ -102,7 +102,7 @@ STREAMOUT_COMPILE_STATE state = {0}; struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; - state.numVertsPerPrim = u_vertices_per_prim(info->mode); + state.numVertsPerPrim = u_vertices_per_prim((enum pipe_prim_type)info->mode); uint32_t offsets[MAX_SO_STREAMS] = {0}; uint32_t num = 0; @@ -221,7 +221,7 @@ if (ctx->gs) topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; else - topology = info->mode; + topology = (enum pipe_prim_type)info->mode; switch (topology) { case PIPE_PRIM_TRIANGLE_FAN:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/swr/swr_state.cpp ^
@@ -1731,7 +1731,7 @@ /* Has to be before fragment shader, since it sets SWR_NEW_FS */ if (p_draw_info) { bool new_prim_is_poly = - (u_reduced_prim(p_draw_info->mode) == PIPE_PRIM_TRIANGLES) && + (u_reduced_prim((enum pipe_prim_type)p_draw_info->mode) == PIPE_PRIM_TRIANGLES) && (ctx->derived.rastState.fillMode == SWR_FILLMODE_SOLID); if (new_prim_is_poly != ctx->poly_stipple.prim_is_poly) { ctx->dirty \|= SWR_NEW_FS;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_context.c ^
@@ -567,10 +567,22 @@ { struct pipe_sampler_view views[PIPE_MAX_SHADER_SAMPLER_VIEWS]; struct tegra_context context = to_tegra_context(pcontext); + struct tegra_sampler_view view; unsigned i; - for (i = 0; i < num_views; i++) + for (i = 0; i < num_views; i++) { + / adjust private reference count / + view = to_tegra_sampler_view(pviews[i]); + if (view) { + view->refcount--; + if (!view->refcount) { + view->refcount = 100000000; + p_atomic_add(&view->gpu->reference.count, view->refcount); + } + } + views[i] = tegra_sampler_view_unwrap(pviews[i]); + } context->gpu->set_sampler_views(context->gpu, shader, start_slot, num_views, unbind_num_trailing_slots, @@ -836,15 +848,19 @@ if (!view) return NULL; - view->gpu = context->gpu->create_sampler_view(context->gpu, resource->gpu, - template); - memcpy(&view->base, view->gpu, sizeof(view->gpu)); + view->base = template; + view->base.context = pcontext; / overwrite to prevent reference from being released / view->base.texture = NULL; - pipe_reference_init(&view->base.reference, 1); pipe_resource_reference(&view->base.texture, presource); - view->base.context = pcontext; + + view->gpu = context->gpu->create_sampler_view(context->gpu, resource->gpu, + template); + + / use private reference count / + view->gpu->reference.count += 100000000; + view->refcount = 100000000; return &view->base; } @@ -856,6 +872,8 @@ struct tegra_sampler_view view = to_tegra_sampler_view(pview); pipe_resource_reference(&view->base.texture, NULL); + /* adjust private reference count */ + p_atomic_add(&view->gpu->reference.count, -view->refcount); pipe_sampler_view_reference(&view->gpu, NULL); free(view); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_context.h ^
@@ -47,6 +47,7 @@ struct tegra_sampler_view { struct pipe_sampler_view base; struct pipe_sampler_view gpu; + unsigned int refcount; }; static inline struct tegra_sampler_view
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_resource.h ^
@@ -31,6 +31,7 @@ struct tegra_resource { struct pipe_resource base; struct pipe_resource *gpu; + unsigned int refcount; uint64_t modifier; uint32_t stride;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_screen.c ^
@@ -245,6 +245,10 @@ pipe_reference_init(&resource->base.reference, 1); resource->base.screen = &screen->base; + /* use private reference count for wrapped resources / + resource->gpu->reference.count += 100000000; + resource->refcount = 100000000; + return &resource->base; destroy: @@ -352,6 +356,8 @@ { struct tegra_resource resource = to_tegra_resource(presource); + /* adjust private reference count */ + p_atomic_add(&resource->gpu->reference.count, -resource->refcount); pipe_resource_reference(&resource->gpu, NULL); free(resource); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/virgl/ci/traces-virgl.yml ^
@@ -17,11 +17,11 @@ - path: gputest/furmark.trace expectations: - device: gl-virgl - checksum: d5682aaa762a4849f0cae1692623bdcb + checksum: a38d4c123d13c5ccd3a86f0663fe1aab - path: gputest/pixmark-piano.trace expectations: - device: gl-virgl - checksum: 1bcded27a6ba04fe0f76ff997b98dbc3 + checksum: b580ae01560380461a103975cab77393 - path: gputest/triangle.trace expectations: - device: gl-virgl @@ -121,7 +121,7 @@ - path: glmark2/refract.trace expectations: - device: gl-virgl - checksum: b1332df324d0fc1db22b362231d3ed01 + checksum: cdadfee0518b964433d80c01329ec191 - path: glmark2/shading:shading=blinn-phong-inf.trace expectations: - device: gl-virgl @@ -178,7 +178,7 @@ - path: gputest/plot3d.trace expectations: - device: gl-virgl - checksum: a1af286874f7060171cb3ca2e765c448 + checksum: 7e818a6070005056700e5ef8590a3f8e # Times out # - path: gputest/tessmark.trace # expectations:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt ^
@@ -16,17 +16,11 @@ dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail -dEQP-GLES3.functional.multisample.fbo_4_samples.proportionality_sample_coverage,Fail -dEQP-GLES3.functional.multisample.fbo_4_samples.sample_coverage_invert,Fail -dEQP-GLES3.functional.multisample.fbo_max_samples.proportionality_sample_coverage,Fail -dEQP-GLES3.functional.multisample.fbo_max_samples.sample_coverage_invert,Fail -KHR-GL32.transform_feedback.capture_geometry_separate_test,Fail KHR-GL32.transform_feedback.capture_vertex_interleaved_test,Fail KHR-GL32.transform_feedback.capture_vertex_separate_test,Fail KHR-GL32.transform_feedback.discard_vertex_test,Fail KHR-GL32.transform_feedback.draw_xfb_instanced_test,Crash KHR-GL32.transform_feedback.draw_xfb_stream_instanced_test,Crash -KHR-GL32.transform_feedback.query_geometry_separate_test,Fail KHR-GL32.transform_feedback.query_vertex_interleaved_test,Fail KHR-GL32.transform_feedback.query_vertex_separate_test,Fail dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail @@ -162,7 +156,6 @@ spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE16F_ARB,Fail spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE32F_ARB,Fail spec@arb_texture_float@fbo-blending-formats@GL_RGB32F,Fail -spec@arb_texture_rg@multisample-fast-clear gl_arb_texture_rg-int,Fail spec@arb_texture_view@rendering-formats,Fail spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16F,Fail spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16I,Fail @@ -177,16 +170,12 @@ spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail @@ -225,9 +214,6 @@ spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail @@ -254,20 +240,15 @@ spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGB10_A2,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32F,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16F,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32F,Fail @@ -307,13 +288,9 @@ spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16F,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8I,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16,Fail @@ -337,12 +314,9 @@ spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16F,Fail @@ -353,9 +327,6 @@ spec@arb_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16_SNORM,Fail @@ -367,9 +338,6 @@ spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8UI,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8,Fail @@ -389,11 +357,8 @@ spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16_SNORM,Fail @@ -423,16 +388,11 @@ spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGB10_A2,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16F,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8I,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8_SNORM,Fail spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32F,Fail @@ -470,8 +430,6 @@ spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail -spec@ext_framebuffer_multisample@sample-coverage 2 non-inverted,Fail -spec@ext_framebuffer_multisample@sample-coverage 4 non-inverted,Fail spec@ext_framebuffer_object@fbo-blending-formats,Fail spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY,Fail spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY12,Fail @@ -511,7 +469,6 @@ spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-unsupported_format,Fail spec@ext_packed_float@query-rgba-signed-components,Fail -spec@ext_texture_integer@multisample-fast-clear gl_ext_texture_integer,Fail spec@ext_texture_snorm@fbo-blending-formats,Fail spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY16_SNORM,Fail spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY8_SNORM,Fail @@ -591,16 +548,12 @@ spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail @@ -639,9 +592,6 @@ spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail @@ -668,20 +618,15 @@ spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/meson.build ^
@@ -71,7 +71,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) zink_c_args = []
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c ^
@@ -1408,16 +1408,16 @@ emit_atomic(struct ntv_context ctx, SpvId op, SpvId type, SpvId src0, SpvId src1, SpvId src2) { if (op == SpvOpAtomicLoad) - return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0)); if (op == SpvOpAtomicCompareExchange) - return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0), emit_uint_const(ctx, 32, 0), / these params are intentionally swapped / src2, src1); - return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0), src1); } @@ -1742,7 +1742,13 @@ BUILTIN_UNOP(nir_op_ufind_msb, GLSLstd450FindUMsb) BUILTIN_UNOP(nir_op_find_lsb, GLSLstd450FindILsb) BUILTIN_UNOP(nir_op_ifind_msb, GLSLstd450FindSMsb) - BUILTIN_UNOPF(nir_op_pack_half_2x16, GLSLstd450PackHalf2x16) + + case nir_op_pack_half_2x16: + assert(nir_op_infos[alu->op].num_inputs == 1); + result = emit_builtin_unop(ctx, GLSLstd450PackHalf2x16, get_dest_type(ctx, &alu->dest.dest, nir_type_uint), src[0]); + force_float = true; + break; + BUILTIN_UNOPF(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16) BUILTIN_UNOPF(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32) #undef BUILTIN_UNOP @@ -2481,12 +2487,12 @@ } static void -handle_atomic_op(struct ntv_context ctx, nir_intrinsic_instr intr, SpvId ptr, SpvId param, SpvId param2) +handle_atomic_op(struct ntv_context ctx, nir_intrinsic_instr intr, SpvId ptr, SpvId param, SpvId param2, nir_alu_type type) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); + SpvId dest_type = get_dest_type(ctx, &intr->dest, type); SpvId result = emit_atomic(ctx, get_atomic_op(intr->intrinsic), dest_type, ptr, param, param2); assert(result); - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_dest(ctx, &intr->dest, result, type); } static void @@ -2525,7 +2531,7 @@ if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) param2 = get_src(ctx, &intr->src[3]); - handle_atomic_op(ctx, intr, ptr, param, param2); + handle_atomic_op(ctx, intr, ptr, param, param2, nir_type_uint32); } static void @@ -2546,7 +2552,7 @@ if (intr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) param2 = get_src(ctx, &intr->src[2]); - handle_atomic_op(ctx, intr, ptr, param, param2); + handle_atomic_op(ctx, intr, ptr, param, param2, nir_type_uint32); } static void @@ -2681,13 +2687,24 @@ type_to_dim(glsl_get_sampler_dim(type), &is_ms); SpvId sample = is_ms ? get_src(ctx, &intr->src[2]) : emit_uint_const(ctx, 32, 0); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); + enum glsl_base_type glsl_type = glsl_get_sampler_result_type(type); + SpvId base_type = get_glsl_basetype(ctx, glsl_type); SpvId texel = spirv_builder_emit_image_texel_pointer(&ctx->builder, base_type, img_var, coord, sample); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) + / The type of Value must be the same as Result Type. + * The type of the value pointed to by Pointer must be the same as Result Type. + / + nir_alu_type ntype = nir_get_nir_type_for_glsl_base_type(glsl_type); + SpvId cast_type = get_dest_type(ctx, &intr->dest, ntype); + param = emit_bitcast(ctx, cast_type, param); + + if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) { param2 = get_src(ctx, &intr->src[4]); - handle_atomic_op(ctx, intr, texel, param, param2); + param2 = emit_bitcast(ctx, cast_type, param2); + } + + handle_atomic_op(ctx, intr, texel, param, param2, ntype); } static void @@ -2869,7 +2886,7 @@ case nir_intrinsic_memory_barrier: spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, SpvMemorySemanticsImageMemoryMask \| SpvMemorySemanticsUniformMemoryMask \| - SpvMemorySemanticsMakeVisibleMask \| SpvMemorySemanticsAcquireReleaseMask); + SpvMemorySemanticsAcquireReleaseMask); break; case nir_intrinsic_memory_barrier_image: @@ -3249,13 +3266,16 @@ lod = emit_float_const(ctx, 32, 0.0); if (tex->op == nir_texop_txs) { SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - / Additionally, if its Dim is 1D, 2D, 3D, or Cube, + /* Its Dim operand must be one of 1D, 2D, 3D, or Cube + * - OpImageQuerySizeLod specification + * + * Additionally, if its Dim is 1D, 2D, 3D, or Cube, * it must also have either an MS of 1 or a Sampled of 0 or 2. * - OpImageQuerySize specification * * all spirv samplers use these types */ - if (tex->sampler_dim != GLSL_SAMPLER_DIM_MS && !lod) + if (!lod && tex_instr_is_lod_allowed(tex)) lod = emit_uint_const(ctx, 32, 0); SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, dest_type, image,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_blit.c ^
@@ -357,12 +357,18 @@ zink_blit_region_fills(struct u_rect region, unsigned width, unsigned height) { struct u_rect intersect = {0, width, 0, height}; + struct u_rect r = { + MIN2(region.x0, region.x1), + MAX2(region.x0, region.x1), + MIN2(region.y0, region.y1), + MAX2(region.y0, region.y1), + }; - if (!u_rect_test_intersection(&region, &intersect)) + if (!u_rect_test_intersection(&r, &intersect)) /* is this even a thing? */ return false; - u_rect_find_intersection(&region, &intersect); + u_rect_find_intersection(&r, &intersect); if (intersect.x0 != 0 \|\| intersect.y0 != 0 \|\| intersect.x1 != width \|\| intersect.y1 != height) return false; @@ -373,11 +379,23 @@ bool zink_blit_region_covers(struct u_rect region, struct u_rect covers) { + struct u_rect r = { + MIN2(region.x0, region.x1), + MAX2(region.x0, region.x1), + MIN2(region.y0, region.y1), + MAX2(region.y0, region.y1), + }; + struct u_rect c = { + MIN2(covers.x0, covers.x1), + MAX2(covers.x0, covers.x1), + MIN2(covers.y0, covers.y1), + MAX2(covers.y0, covers.y1), + }; struct u_rect intersect; - if (!u_rect_test_intersection(&region, &covers)) + if (!u_rect_test_intersection(&r, &c)) return false; - u_rect_union(&intersect, &region, &covers); - return intersect.x0 == covers.x0 && intersect.y0 == covers.y0 && - intersect.x1 == covers.x1 && intersect.y1 == covers.y1; + u_rect_union(&intersect, &r, &c); + return intersect.x0 == c.x0 && intersect.y0 == c.y0 && + intersect.x1 == c.x1 && intersect.y1 == c.y1; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_bo.c ^
@@ -259,7 +259,7 @@ } /* all non-suballocated bo can cache / - init_pb_cache = true; + init_pb_cache = !pNext; bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache sizeof(struct pb_cache_entry)); if (!bo) { @@ -278,7 +278,7 @@ simple_mtx_init(&bo->lock, mtx_plain); pipe_reference_init(&bo->base.reference, 1); bo->base.alignment_log2 = util_logbase2(alignment); - bo->base.size = size; + bo->base.size = mai.allocationSize; bo->base.vtbl = &bo_vtbl; bo->base.placement = vk_domain_from_heap(heap); bo->base.usage = flags; @@ -347,7 +347,7 @@ size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE); buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE, - bo->base.placement, ZINK_ALLOC_NO_SUBALLOC, NULL); + ZINK_HEAP_DEVICE_LOCAL, ZINK_ALLOC_NO_SUBALLOC, NULL); if (!buf) { FREE(best_backing->chunks); FREE(best_backing);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_compiler.c ^
@@ -379,7 +379,7 @@ .lower_unpack_32_2x16_split = true, .lower_vector_cmp = true, .lower_int64_options = 0, - .lower_doubles_options = ~nir_lower_fp64_full_software, + .lower_doubles_options = 0, .lower_uniforms_to_ubo = true, .has_fsub = true, .has_isub = true, @@ -397,6 +397,21 @@ screen->nir_options.lower_flrp64 = true; screen->nir_options.lower_ffma64 = true; } + + /* + The OpFRem and OpFMod instructions use cheap approximations of remainder, + and the error can be large due to the discontinuity in trunc() and floor(). + This can produce mathematically unexpected results in some cases, such as + FMod(x,x) computing x rather than 0, and can also cause the result to have + a different sign than the infinitely precise result. + + -Table 84. Precision of core SPIR-V Instructions + * for drivers that are known to have imprecise fmod for doubles, lower dmod + / + if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV \|\| + screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE \|\| + screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY) + screen->nir_options.lower_doubles_options = nir_lower_dmod; } const void
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_context.c ^
@@ -73,8 +73,18 @@ ALWAYS_INLINE static void check_resource_for_batch_ref(struct zink_context ctx, struct zink_resource res) { - if (!zink_resource_has_binds(res)) - zink_batch_reference_resource(&ctx->batch, res); + if (!zink_resource_has_binds(res)) { + /* avoid desync between usage and tracking: + * - if usage exists, it must be removed before the context is destroyed + * - having usage does not imply having tracking + * - if tracking will be added here, also reapply usage to avoid dangling usage once tracking is removed + * TODO: somehow fix this for perf because it's an extra hash lookup + / + if (res->obj->bo->reads \|\| res->obj->bo->writes) + zink_batch_reference_resource_rw(&ctx->batch, res, !!res->obj->bo->writes); + else + zink_batch_reference_resource(&ctx->batch, res); + } } static void @@ -100,11 +110,14 @@ pipe_surface_release(&ctx->base, &ctx->dummy_surface[i]); zink_buffer_view_reference(screen, &ctx->dummy_bufferview, NULL); - zink_descriptors_deinit_bindless(ctx); + if (ctx->dd) + zink_descriptors_deinit_bindless(ctx); simple_mtx_destroy(&ctx->batch_mtx); - zink_clear_batch_state(ctx, ctx->batch.state); - zink_batch_state_destroy(screen, ctx->batch.state); + if (ctx->batch.state) { + zink_clear_batch_state(ctx, ctx->batch.state); + zink_batch_state_destroy(screen, ctx->batch.state); + } struct zink_batch_state bs = ctx->batch_states; while (bs) { struct zink_batch_state bs_next = bs->next; @@ -149,7 +162,8 @@ _mesa_hash_table_destroy(ctx->render_pass_cache, NULL); slab_destroy_child(&ctx->transfer_pool_unsync); - screen->descriptors_deinit(ctx); + if (ctx->dd) + screen->descriptors_deinit(ctx); zink_descriptor_layouts_deinit(ctx); @@ -595,8 +609,10 @@ struct zink_sampler_state sampler = sampler_state; struct zink_batch batch = &zink_context(pctx)->batch; zink_descriptor_set_refs_clear(&sampler->desc_set_refs, sampler_state); - util_dynarray_append(&batch->state->zombie_samplers, VkSampler, - sampler->sampler); + / may be called if context_create fails / + if (batch->state) + util_dynarray_append(&batch->state->zombie_samplers, VkSampler, + sampler->sampler); if (sampler->custom_border_color) p_atomic_dec(&zink_screen(pctx->screen)->cur_custom_border_color_samplers); FREE(sampler); @@ -637,6 +653,9 @@ assert(bvci.format); bvci.offset = offset; bvci.range = !offset && range == res->base.b.width0 ? VK_WHOLE_SIZE : range; + uint32_t clamp = util_format_get_blocksize(format) screen->info.props.limits.maxTexelBufferElements; + if (bvci.range == VK_WHOLE_SIZE && res->base.b.width0 > clamp) + bvci.range = clamp; bvci.flags = 0; return bvci; } @@ -736,10 +755,24 @@ ivci.subresourceRange.aspectMask = sampler_aspect_from_format(state->format); /* samplers for stencil aspects of packed formats need to always use stencil swizzle / if (ivci.subresourceRange.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT \| VK_IMAGE_ASPECT_STENCIL_BIT)) { - ivci.components.r = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r)); - ivci.components.g = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g)); - ivci.components.b = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b)); - ivci.components.a = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a)); + if (sampler_view->base.swizzle_r == PIPE_SWIZZLE_0 && + sampler_view->base.swizzle_g == PIPE_SWIZZLE_0 && + sampler_view->base.swizzle_b == PIPE_SWIZZLE_0 && + sampler_view->base.swizzle_a == PIPE_SWIZZLE_X) { + / + * When the state tracker asks for 000x swizzles, this is depth mode GL_ALPHA, + * however with the single dref fetch this will fail, so just spam all the channels. + / + ivci.components.r = VK_COMPONENT_SWIZZLE_R; + ivci.components.g = VK_COMPONENT_SWIZZLE_R; + ivci.components.b = VK_COMPONENT_SWIZZLE_R; + ivci.components.a = VK_COMPONENT_SWIZZLE_R; + } else { + ivci.components.r = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r)); + ivci.components.g = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g)); + ivci.components.b = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b)); + ivci.components.a = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a)); + } } else { / if we have e.g., R8G8B8X8, then we have to ignore alpha since we're just emulating * these formats @@ -903,36 +936,9 @@ return; struct zink_resource res = zink_resource(ctx->vertex_buffers[slot].buffer.resource); res->vbo_bind_mask &= ~BITFIELD_BIT(slot); - ctx->vbufs[slot] = VK_NULL_HANDLE; - ctx->vbuf_offsets[slot] = 0; update_res_bind_count(ctx, res, false, true); } -ALWAYS_INLINE static struct zink_resource -set_vertex_buffer_clamped(struct zink_context ctx, unsigned slot) -{ - const struct pipe_vertex_buffer ctx_vb = &ctx->vertex_buffers[slot]; - struct zink_resource res = zink_resource(ctx_vb->buffer.resource); - struct zink_screen screen = zink_screen(ctx->base.screen); - if (ctx_vb->buffer_offset > screen->info.props.limits.maxVertexInputAttributeOffset) { - /* buffer offset exceeds maximum: make a tmp buffer at this offset / - ctx->vbufs[slot] = zink_resource_tmp_buffer(screen, res, ctx_vb->buffer_offset, 0, &ctx->vbuf_offsets[slot]); - util_dynarray_append(&res->obj->tmp, VkBuffer, ctx->vbufs[slot]); - / the driver is broken and sets a min alignment that's larger than its max offset: rebind as staging buffer / - if (unlikely(ctx->vbuf_offsets[slot] > screen->info.props.limits.maxVertexInputAttributeOffset)) { - static bool warned = false; - if (!warned) - debug_printf("zink: this vulkan driver is BROKEN! maxVertexInputAttributeOffset < VkMemoryRequirements::alignment\n"); - warned = true; - } - } else { - ctx->vbufs[slot] = res->obj->buffer; - ctx->vbuf_offsets[slot] = ctx_vb->buffer_offset; - } - assert(ctx->vbufs[slot]); - return res; -} - static void zink_set_vertex_buffers(struct pipe_context pctx, unsigned start_slot, @@ -970,9 +976,9 @@ /* always barrier before possible rebind / zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); - set_vertex_buffer_clamped(ctx, start_slot + i); - } else - enabled_buffers &= ~BITFIELD_BIT(i); + } else { + enabled_buffers &= ~BITFIELD_BIT(start_slot + i); + } } } else { if (need_state_change) @@ -1105,7 +1111,7 @@ update_res_bind_count(ctx, new_res, shader == PIPE_SHADER_COMPUTE, false); } zink_batch_resource_usage_set(&ctx->batch, new_res, false); - zink_fake_buffer_barrier(new_res, VK_ACCESS_UNIFORM_READ_BIT, + zink_resource_buffer_barrier(ctx, new_res, VK_ACCESS_UNIFORM_READ_BIT, zink_pipeline_flags_from_pipe_stage(shader)); } update \|= ((index \|\| screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) && ctx->ubos[shader][index].buffer_offset != offset) \|\| @@ -1200,7 +1206,7 @@ ssbo->buffer_size = MIN2(buffers[i].buffer_size, new_res->base.b.width0 - ssbo->buffer_offset); util_range_add(&new_res->base.b, &new_res->valid_buffer_range, ssbo->buffer_offset, ssbo->buffer_offset + ssbo->buffer_size); - zink_fake_buffer_barrier(new_res, access, + zink_resource_buffer_barrier(ctx, new_res, access, zink_pipeline_flags_from_pipe_stage(p_stage)); update = true; max_slot = MAX2(max_slot, start_slot + i); @@ -1386,7 +1392,7 @@ image_view->buffer_view = create_image_bufferview(ctx, &images[i]); assert(image_view->buffer_view); zink_batch_usage_set(&image_view->buffer_view->batch_uses, ctx->batch.state); - zink_fake_buffer_barrier(res, access, + zink_resource_buffer_barrier(ctx, res, access, zink_pipeline_flags_from_pipe_stage(p_stage)); } else { image_view->surface = create_image_surface(ctx, &images[i], p_stage == PIPE_SHADER_COMPUTE); @@ -1479,7 +1485,7 @@ update = true; } zink_batch_usage_set(&b->buffer_view->batch_uses, ctx->batch.state); - zink_fake_buffer_barrier(res, VK_ACCESS_SHADER_READ_BIT, + zink_resource_buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT, zink_pipeline_flags_from_pipe_stage(shader_type)); if (!a \|\| a->buffer_view->buffer_view != b->buffer_view->buffer_view) update = true; @@ -1646,7 +1652,7 @@ rebind_bindless_bufferview(ctx, res, ds); VkBufferView bv = &ctx->di.bindless[0].buffer_infos[handle]; bv = ds->bufferview->buffer_view; - zink_fake_buffer_barrier(res, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT \| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + zink_resource_buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT \| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); } else { VkDescriptorImageInfo ii = &ctx->di.bindless[0].img_infos[handle]; ii->sampler = bd->sampler->sampler; @@ -1765,7 +1771,7 @@ rebind_bindless_bufferview(ctx, res, ds); VkBufferView bv = &ctx->di.bindless[1].buffer_infos[handle]; bv = ds->bufferview->buffer_view;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_context.h ^
@@ -261,8 +261,6 @@ uint16_t rp_clears_enabled; uint16_t fbfetch_outputs; - VkBuffer vbufs[PIPE_MAX_ATTRIBS]; - unsigned vbuf_offsets[PIPE_MAX_ATTRIBS]; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; bool vertex_buffers_dirty; @@ -399,8 +397,6 @@ void zink_resource_buffer_barrier(struct zink_context ctx, struct zink_resource res, VkAccessFlags flags, VkPipelineStageFlags pipeline); -void -zink_fake_buffer_barrier(struct zink_resource res, VkAccessFlags flags, VkPipelineStageFlags pipeline); bool zink_resource_image_needs_barrier(struct zink_resource res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); bool
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_descriptors.c ^
@@ -696,13 +696,13 @@ #endif switch (type) { case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - zds->sampler_states = (struct zink_sampler_state*)&samplers[i pool->key.layout->num_descriptors]; + zds->sampler_states = (struct zink_sampler_state*)&samplers[i num_resources]; FALLTHROUGH; case ZINK_DESCRIPTOR_TYPE_IMAGE: - zds->surfaces = &surfaces[i * pool->key.layout->num_descriptors]; + zds->surfaces = &surfaces[i * num_resources]; break; default: - zds->res_objs = (struct zink_resource_object*)&res_objs[i pool->key.layout->num_descriptors]; + zds->res_objs = (struct zink_resource_object*)&res_objs[i num_resources]; break; } zds->desc_set = desc_set[i]; @@ -790,20 +790,28 @@ simple_mtx_lock(&pool->mtx); if (last_set && last_set->hash == hash && desc_state_equal(&last_set->key, &key)) { + bool was_recycled = false; zds = last_set; cache_hit = !zds->invalid; if (zds->recycled) { struct hash_entry he = _mesa_hash_table_search_pre_hashed(pool->free_desc_sets, hash, &key); - if (he) + if (he) { + was_recycled = true; _mesa_hash_table_remove(pool->free_desc_sets, he); + } zds->recycled = false; } if (zds->invalid) { if (zink_batch_usage_exists(zds->batch_uses)) punt_invalid_set(zds, NULL); - else + else { + if (was_recycled) { + descriptor_set_invalidate(zds); + goto out; + } /* this set is guaranteed to be in pool->alloc_desc_sets / goto skip_hash_tables; + } zds = NULL; } if (zds) @@ -828,6 +836,8 @@ zds = (void)he->data; cache_hit = !zds->invalid; if (recycled) { + if (zds->invalid) + descriptor_set_invalidate(zds); / need to migrate this entry back to the in-use hash / _mesa_hash_table_remove(pool->free_desc_sets, he); goto out; @@ -1419,6 +1429,7 @@ if (pg->dd->push_usage) { if (pg->dd->fbfetch) { / fbfetch is not cacheable: grab a lazy set because it's faster */ + cache_hit = false; desc_set = zink_descriptors_alloc_lazy_push(ctx); } else { zds = zink_descriptor_set_get(ctx, ZINK_DESCRIPTOR_TYPES, is_compute, &cache_hit);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_descriptors_lazy.c ^
@@ -140,20 +140,20 @@ struct zink_shader *stages; if (pg->is_compute) stages = &((struct zink_compute_program)pg)->shader; - else { + else stages = ((struct zink_gfx_program)pg)->shaders; - if (stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) { - zink_descriptor_util_init_fbfetch(ctx); - push_count = 1; - pg->dd->fbfetch = true; - } - } if (!pg->dd) pg->dd = (void)rzalloc(pg, struct zink_program_descriptor_data); if (!pg->dd) return false; + if (!pg->is_compute && stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) { + zink_descriptor_util_init_fbfetch(ctx); + push_count = 1; + pg->dd->fbfetch = true; + } + unsigned entry_idx[ZINK_DESCRIPTOR_TYPES] = {0}; unsigned num_shaders = pg->is_compute ? 1 : ZINK_SHADER_COUNT;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_draw.cpp ^
@@ -134,16 +134,16 @@ return; for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { - const unsigned buffer_id = ctx->element_state->binding_map[i]; - struct pipe_vertex_buffer vb = ctx->vertex_buffers + buffer_id; + struct pipe_vertex_buffer vb = ctx->vertex_buffers + ctx->element_state->binding_map[i]; assert(vb); if (vb->buffer.resource) { - buffers[i] = ctx->vbufs[buffer_id]; - assert(buffers[i]); + struct zink_resource res = zink_resource(vb->buffer.resource); + assert(res->obj->buffer); + buffers[i] = res->obj->buffer; + buffer_offsets[i] = vb->buffer_offset; + buffer_strides[i] = vb->stride; if (HAS_VERTEX_INPUT) elems->hw_state.dynbindings[i].stride = vb->stride; - buffer_offsets[i] = ctx->vbuf_offsets[buffer_id]; - buffer_strides[i] = vb->stride; zink_batch_resource_usage_set(&ctx->batch, zink_resource(vb->buffer.resource), false); } else { buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; @@ -193,6 +193,8 @@ prog = (struct zink_gfx_program)entry->data; u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages) ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader; + /* ensure variants are always updated if keys have changed since last use / + ctx->dirty_shader_stages \|= prog->stages_present; } else { ctx->dirty_shader_stages \|= bits; prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.vertices_per_patch + 1); @@ -374,6 +376,8 @@ access \|= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; pipeline \|= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; bind_count -= util_bitcount(res->vbo_bind_mask); + if (res->write_bind_count[is_compute]) + pipeline \|= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; } bind_count -= res->so_bind_count; } @@ -462,6 +466,9 @@ const struct pipe_draw_start_count_bias draws, unsigned num_draws) { + if (!dindirect && (!draws[0].count \|\| !dinfo->instance_count)) + return; + struct zink_context ctx = zink_context(pctx); struct zink_screen screen = zink_screen(pctx->screen); struct zink_rasterizer_state *rast_state = ctx->rast_state;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_pipeline.c ^
@@ -124,7 +124,13 @@ warn_missing_feature("alphaToOne"); ms_state.alphaToOneEnable = state->blend_state->alpha_to_one; } - ms_state.pSampleMask = state->sample_mask ? &state->sample_mask : NULL; + /* "If pSampleMask is NULL, it is treated as if the mask has all bits set to 1." + * - Chapter 27. Rasterization + * + * thus it never makes sense to leave this as NULL since gallium will provide correct + * data here as long as sample_mask is initialized on context creation + */ + ms_state.pSampleMask = &state->sample_mask; if (hw_rast_state->force_persample_interp) { ms_state.sampleShadingEnable = VK_TRUE; ms_state.minSampleShading = 1.0;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_program.c ^
@@ -557,6 +557,7 @@ zink_destroy_gfx_program(struct zink_screen screen, struct zink_gfx_program prog) { + util_queue_fence_wait(&prog->base.cache_fence); if (prog->base.layout) VKSCR(DestroyPipelineLayout)(screen->dev, prog->base.layout, NULL); @@ -601,6 +602,7 @@ zink_destroy_compute_program(struct zink_screen screen, struct zink_compute_program comp) { + util_queue_fence_wait(&comp->base.cache_fence); if (comp->base.layout) VKSCR(DestroyPipelineLayout)(screen->dev, comp->base.layout, NULL); @@ -821,6 +823,9 @@ if (old != PIPE_SHADER_TYPES) { memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base)); ctx->dirty_shader_stages \|= BITFIELD_BIT(old); + } else { + /* always unset vertex shader values when changing to a non-vs last stage */ + memset(&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base)); } ctx->last_vertex_stage_dirty = true; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_query.c ^
@@ -183,22 +183,6 @@ query->type == PIPE_QUERY_GPU_FINISHED; } -static void -qbo_sync_from_prev(struct zink_context ctx, struct zink_query query, unsigned id_offset, unsigned last_start) -{ - assert(id_offset); - - struct zink_query_buffer prev = list_last_entry(&query->buffers, struct zink_query_buffer, list); - unsigned result_size = get_num_results(query->type) sizeof(uint64_t); - /* this is get_buffer_offset() but without the zink_query object / - unsigned qbo_offset = last_start get_num_results(query->type) * sizeof(uint64_t); - query->curr_query = id_offset; - query->curr_qbo->num_results = id_offset; - zink_copy_buffer(ctx, zink_resource(query->curr_qbo->buffer), zink_resource(prev->buffer), 0, - qbo_offset, - id_offset * result_size); -} - static bool qbo_append(struct pipe_screen screen, struct zink_query query) { @@ -459,6 +443,8 @@ uint64_t xfb_results = NULL; uint64_t results; bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP \|\| query->type == PIPE_QUERY_TIMESTAMP_DISJOINT; + if (!qbo->num_results) + continue; results = pipe_buffer_map_range(pctx, qbo->buffer, 0, (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer); if (!results) { @@ -563,7 +549,7 @@ util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size); assert(query_id < NUM_QUERIES); VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer, - offset, type_size, flags); + offset, base_result_size, flags); } static void @@ -575,8 +561,6 @@ static void reset_pool(struct zink_context ctx, struct zink_batch batch, struct zink_query q) { - unsigned last_start = q->last_start; - unsigned id_offset = q->curr_query - q->last_start; / This command must only be called outside of a render pass instance * * - vkCmdResetQueryPool spec @@ -605,8 +589,6 @@ reset_qbo(q); else debug_printf("zink: qbo alloc failed on reset!"); - if (id_offset) - qbo_sync_from_prev(ctx, q, id_offset, last_start); } static inline unsigned @@ -650,6 +632,8 @@ if (!is_timestamp) q->curr_qbo->num_results++; + else + q->curr_qbo->num_results = 1; q->needs_update = false; } @@ -1014,17 +998,18 @@ / VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT; + unsigned src_offset = result_size get_num_results(query->type); if (zink_batch_usage_check_completion(ctx, query->batch_id)) { - uint64_t u64[2] = {0}; - if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, 2 * result_size, u64, + uint64_t u64[4] = {0}; + if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, sizeof(u64), u64, 0, size_flags \| VK_QUERY_RESULT_WITH_AVAILABILITY_BIT \| flag) == VK_SUCCESS) { - pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char)u64 + result_size); + pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char)u64 + src_offset); return; } } - struct pipe_resource staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, result_size 2); + struct pipe_resource staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size); copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags \| VK_QUERY_RESULT_WITH_AVAILABILITY_BIT \| flag); - zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size, result_size); + zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size get_num_results(query->type), result_size); pipe_resource_reference(&staging, NULL); return; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_resource.c ^
@@ -165,6 +165,9 @@ if (bind & PIPE_BIND_SHADER_IMAGE) bci.usage \|= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + if (bind & PIPE_BIND_QUERY_BUFFER) + bci.usage \|= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) bci.flags \|= VK_BUFFER_CREATE_SPARSE_BINDING_BIT; return bci; @@ -372,7 +375,7 @@ ici->samples = templ->nr_samples ? templ->nr_samples : VK_SAMPLE_COUNT_1_BIT; ici->tiling = modifiers_count ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : bind & PIPE_BIND_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; ici->sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ici->initialLayout = dmabuf ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; + ici->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; /* sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the following conditions is true: * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT @@ -446,21 +449,20 @@ VkMemoryRequirements reqs; VkMemoryPropertyFlags flags; bool need_dedicated = false; + bool shared = templ->bind & PIPE_BIND_SHARED; VkExternalMemoryHandleTypeFlags export_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; VkExternalMemoryHandleTypeFlags external = 0; if (whandle) { - if (whandle->type == WINSYS_HANDLE_TYPE_FD) + if (whandle->type == WINSYS_HANDLE_TYPE_FD) { external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - else + export_types \|= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + } else unreachable("unknown handle type"); } /* TODO: remove linear for wsi / bool scanout = templ->bind & PIPE_BIND_SCANOUT; - bool shared = templ->bind & PIPE_BIND_SHARED; - if (shared && screen->info.have_EXT_external_memory_dma_buf) - export_types \|= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; pipe_reference_init(&obj->reference, 1); util_dynarray_init(&obj->tmp, NULL); @@ -1232,13 +1234,13 @@ mgr = ctx->tc->base.stream_uploader; else mgr = ctx->base.stream_uploader; - u_upload_alloc(mgr, 0, box->width + box->x, + u_upload_alloc(mgr, 0, box->width, screen->info.props.limits.minMemoryMapAlignment, &offset, (struct pipe_resource )&trans->staging_res, (void )&ptr); res = zink_resource(trans->staging_res); - trans->offset = offset + box->x; + trans->offset = offset; usage \|= PIPE_MAP_UNSYNCHRONIZED; - ptr = ((uint8_t )ptr) + box->x; + ptr = ((uint8_t )ptr); } else { / At this point, the buffer is always idle (we checked it above). */ usage \|= PIPE_MAP_UNSYNCHRONIZED;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_screen.c ^
@@ -184,7 +184,7 @@ if (!screen->disk_cache) return; - util_queue_add_job(&screen->cache_put_thread, pg, NULL, cache_put_job, NULL, 0); + util_queue_add_job(&screen->cache_put_thread, pg, &pg->cache_fence, cache_put_job, NULL, 0); } static void @@ -438,7 +438,7 @@ return 1; case PIPE_CAP_TGSI_BALLOT: - return screen->vk_version >= VK_MAKE_VERSION(1,2,0) && screen->info.props11.subgroupSize <= 64; + return screen->info.have_vulkan12 && screen->info.have_EXT_shader_subgroup_ballot && screen->info.props11.subgroupSize <= 64; case PIPE_CAP_SAMPLE_SHADING: return screen->info.feats.features.sampleRateShading; @@ -671,7 +671,10 @@ return MIN2(screen->info.props.limits.maxVertexOutputComponents / 4 / 2, 16); case PIPE_CAP_DMABUF: - return screen->info.have_KHR_external_memory_fd && screen->info.have_EXT_external_memory_dma_buf && screen->info.have_EXT_queue_family_foreign; + return screen->info.have_KHR_external_memory_fd && + screen->info.have_EXT_external_memory_dma_buf && + screen->info.have_EXT_queue_family_foreign && + screen->info.have_EXT_image_drm_format_modifier; case PIPE_CAP_DEPTH_BOUNDS_TEST: return screen->info.feats.features.depthBounds; @@ -851,8 +854,10 @@ return 0; /* not implemented / case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: - return screen->info.feats11.uniformAndStorageBuffer16BitAccess \|\| - (screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess); + //enabling this breaks GTF-GL46.gtf21.GL2Tests.glGetUniform.glGetUniform + //return screen->info.feats11.uniformAndStorageBuffer16BitAccess \|\| + //(screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess); + return 0; case PIPE_SHADER_CAP_FP16_DERIVATIVES: return 0; //spirv requires 32bit derivative srcs and dests case PIPE_SHADER_CAP_FP16: @@ -1706,11 +1711,11 @@ if (mem.memoryProperties.memoryHeaps[i].flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { / VRAM / info->total_device_memory += mem.memoryProperties.memoryHeaps[i].size / 1024; - info->avail_device_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024; + info->avail_device_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024; } else { / GART / info->total_staging_memory += mem.memoryProperties.memoryHeaps[i].size / 1024; - info->avail_staging_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024; + info->avail_staging_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024; } } / evictions not yet supported in vulkan */ @@ -1941,9 +1946,11 @@ screen->base.get_compiler_options = zink_get_compiler_options; screen->base.get_sample_pixel_grid = zink_get_sample_pixel_grid; screen->base.is_format_supported = zink_is_format_supported; - screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers; - screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported; - screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes; + if (screen->info.have_EXT_image_drm_format_modifier && screen->info.have_EXT_external_memory_dma_buf) { + screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers; + screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported; + screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes; + } screen->base.context_create = zink_context_create; screen->base.flush_frontbuffer = zink_flush_frontbuffer; screen->base.destroy = zink_destroy_screen;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_state.h ^
@@ -63,12 +63,12 @@ unsigned polygon_mode : 2; //VkPolygonMode unsigned cull_mode : 2; //VkCullModeFlags unsigned line_mode : 2; //VkLineRasterizationModeEXT - bool depth_clamp:1; - bool rasterizer_discard:1; - bool pv_last:1; - bool line_stipple_enable:1; - bool force_persample_interp:1; - bool clip_halfz:1; + unsigned depth_clamp:1; + unsigned rasterizer_discard:1; + unsigned pv_last:1; + unsigned line_stipple_enable:1; + unsigned force_persample_interp:1; + unsigned clip_halfz:1; }; #define ZINK_RAST_HW_STATE_SIZE 12
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c ^
@@ -265,6 +265,7 @@ &pool->free_cmd_buffers, pool_link) { lvp_cmd_buffer_destroy(cmd_buffer); } + list_inithead(&pool->free_cmd_buffers); } VKAPI_ATTR void VKAPI_CALL lvp_CmdDrawMultiEXT(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_device.c ^
@@ -1174,11 +1174,13 @@ struct pipe_fence_handle handle = NULL; device->queue.ctx->flush(device->queue.ctx, &handle, 0); if (fence) - fence->handle = handle; + device->pscreen->fence_reference(device->pscreen, &fence->handle, handle); set_last_fence(device, handle, timeline); / this is the array of signaling timeline semaphore links / for (unsigned i = 0; i < num_timelines; i++) - timelines[i]->fence = handle; + device->pscreen->fence_reference(device->pscreen, &timelines[i]->fence, handle); + + device->pscreen->fence_reference(device->pscreen, &handle, NULL); } / get a new timeline link for creating a new signal event @@ -1210,7 +1212,8 @@ * sema->lock MUST be locked before calling / static void -prune_semaphore_links(struct lvp_semaphore sema, uint64_t timeline) +prune_semaphore_links(struct lvp_device device, + struct lvp_semaphore sema, uint64_t timeline) { if (!timeline) /* zero isn't a valid id to prune with / @@ -1225,7 +1228,7 @@ util_dynarray_append(&sema->links, struct lvp_semaphore_timeline, tl); tl = tl->next; cur->next = NULL; - cur->fence = NULL; + device->pscreen->fence_reference(device->pscreen, &cur->fence, NULL); } /* this is now the current timeline link / sema->timeline = tl; @@ -1288,7 +1291,7 @@ / no timeline link was available yet: try to find one / simple_mtx_lock(&sema->lock); / always prune first to update current timeline id / - prune_semaphore_links(sema, device->queue.last_finished); + prune_semaphore_links(device, sema, device->queue.last_finished); tl_array[i].tl = find_semaphore_timeline(sema, waitval); if (timeout && !tl_array[i].tl) { / still no timeline link available: @@ -1540,7 +1543,7 @@ } simple_mtx_lock(&sema->lock); /* always prune first to make links available and update timeline id / - prune_semaphore_links(sema, queue->last_finished); + prune_semaphore_links(queue->device, sema, queue->last_finished); if (sema->current < info->pSignalSemaphoreValues[j]) { / only signal semaphores if the new id is >= the current one / struct lvp_semaphore_timeline tl = get_semaphore_link(sema); @@ -1562,7 +1565,7 @@ } simple_mtx_lock(&sema->lock); /* always prune first to update timeline id / - prune_semaphore_links(sema, queue->last_finished); + prune_semaphore_links(queue->device, sema, queue->last_finished); if (info->pWaitSemaphoreValues[j] && pSubmits[i].pWaitDstStageMask && pSubmits[i].pWaitDstStageMask[j] && sema->current < info->pWaitSemaphoreValues[j]) { @@ -2316,7 +2319,7 @@ LVP_FROM_HANDLE(lvp_device, device, _device); LVP_FROM_HANDLE(lvp_semaphore, sema, _semaphore); simple_mtx_lock(&sema->lock); - prune_semaphore_links(sema, device->queue.last_finished); + prune_semaphore_links(device, sema, device->queue.last_finished); pValue = sema->current; simple_mtx_unlock(&sema->lock); return VK_SUCCESS; @@ -2334,7 +2337,7 @@ sema->current = pSignalInfo->value; cnd_broadcast(&sema->submit); simple_mtx_lock(&sema->lock); - prune_semaphore_links(sema, device->queue.last_finished); + prune_semaphore_links(device, sema, device->queue.last_finished); simple_mtx_unlock(&sema->lock); return VK_SUCCESS; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_execute.c ^
@@ -573,40 +573,6 @@ state->rs_dirty = true; } - state->disable_multisample = pipeline->disable_multisample; - if (pipeline->graphics_create_info.pMultisampleState) { - const VkPipelineMultisampleStateCreateInfo ms = pipeline->graphics_create_info.pMultisampleState; - state->rs_state.multisample = ms->rasterizationSamples > 1; - state->sample_mask = ms->pSampleMask ? ms->pSampleMask[0] : 0xffffffff; - state->blend_state.alpha_to_coverage = ms->alphaToCoverageEnable; - state->blend_state.alpha_to_one = ms->alphaToOneEnable; - state->blend_dirty = true; - state->rs_dirty = true; - state->min_samples = 1; - state->sample_mask_dirty = true; - fb_samples = ms->rasterizationSamples; - if (ms->sampleShadingEnable) { - state->min_samples = ceil(ms->rasterizationSamples ms->minSampleShading); - if (state->min_samples > 1) - state->min_samples = ms->rasterizationSamples; - if (state->min_samples < 1) - state->min_samples = 1; - } - if (pipeline->force_min_sample) - state->min_samples = ms->rasterizationSamples; - state->min_samples_dirty = true; - } else { - state->rs_state.multisample = false; - state->sample_mask_dirty = state->sample_mask != 0xffffffff; - state->sample_mask = 0xffffffff; - state->min_samples_dirty = state->min_samples; - state->min_samples = 0; - state->blend_dirty \|= state->blend_state.alpha_to_coverage \|\| state->blend_state.alpha_to_one; - state->blend_state.alpha_to_coverage = false; - state->blend_state.alpha_to_one = false; - state->rs_dirty = true; - } - if (pipeline->graphics_create_info.pDepthStencilState) { const VkPipelineDepthStencilStateCreateInfo dsa = pipeline->graphics_create_info.pDepthStencilState; @@ -710,6 +676,40 @@ state->blend_dirty = true; } + state->disable_multisample = pipeline->disable_multisample; + if (pipeline->graphics_create_info.pMultisampleState) { + const VkPipelineMultisampleStateCreateInfo ms = pipeline->graphics_create_info.pMultisampleState; + state->rs_state.multisample = ms->rasterizationSamples > 1; + state->sample_mask = ms->pSampleMask ? ms->pSampleMask[0] : 0xffffffff; + state->blend_state.alpha_to_coverage = ms->alphaToCoverageEnable; + state->blend_state.alpha_to_one = ms->alphaToOneEnable; + state->blend_dirty = true; + state->rs_dirty = true; + state->min_samples = 1; + state->sample_mask_dirty = true; + fb_samples = ms->rasterizationSamples; + if (ms->sampleShadingEnable) { + state->min_samples = ceil(ms->rasterizationSamples * ms->minSampleShading); + if (state->min_samples > 1) + state->min_samples = ms->rasterizationSamples; + if (state->min_samples < 1) + state->min_samples = 1; + } + if (pipeline->force_min_sample) + state->min_samples = ms->rasterizationSamples; + state->min_samples_dirty = true; + } else { + state->rs_state.multisample = false; + state->sample_mask_dirty = state->sample_mask != 0xffffffff; + state->sample_mask = 0xffffffff; + state->min_samples_dirty = state->min_samples; + state->min_samples = 0; + state->blend_dirty \|= state->blend_state.alpha_to_coverage \|\| state->blend_state.alpha_to_one; + state->blend_state.alpha_to_coverage = false; + state->blend_state.alpha_to_one = false; + state->rs_dirty = true; + } + if (!dynamic_states[conv_dynamic_state_idx(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT)]) { const VkPipelineVertexInputStateCreateInfo vi = pipeline->graphics_create_info.pVertexInputState; int i; @@ -1011,8 +1011,6 @@ / if (iv->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT \|\| iv->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { - if (templ.swizzle_a == PIPE_SWIZZLE_X) - templ.swizzle_r = PIPE_SWIZZLE_X; fix_depth_swizzle(templ.swizzle_r); fix_depth_swizzle(templ.swizzle_g); fix_depth_swizzle(templ.swizzle_b); @@ -2847,43 +2845,13 @@ } } -static void pack_clear_color(enum pipe_format pformat, VkClearColorValue in_val, uint32_t col_val[4]) -{ - const struct util_format_description desc = util_format_description(pformat); - col_val[0] = col_val[1] = col_val[2] = col_val[3] = 0; - for (unsigned c = 0; c < 4; c++) { - if (desc->swizzle[c] >= 4) - continue; - const struct util_format_channel_description channel = &desc->channel[desc->swizzle[c]]; - if (channel->size == 32) { - col_val[c] = in_val->uint32[c]; - continue; - } - if (channel->pure_integer) { - uint64_t v = in_val->uint32[c] & ((1u << channel->size) - 1); - switch (channel->size) { - case 2: - case 8: - case 10: - col_val[0] \|= (v << channel->shift); - break; - case 16: - col_val[c / 2] \|= (v << (16 (c % 2))); - break; - } - } else { - util_pack_color(in_val->float32, pformat, (union util_color )col_val); - break; - } - } -} - static void handle_clear_color_image(struct vk_cmd_queue_entry cmd, struct rendering_state state) { LVP_FROM_HANDLE(lvp_image, image, cmd->u.clear_color_image.image); - uint32_t col_val[4]; - pack_clear_color(image->bo->format, cmd->u.clear_color_image.color, col_val); + union util_color uc; + uint32_t col_val = uc.ui; + util_pack_color_union(image->bo->format, &uc, (void)cmd->u.clear_color_image.color); for (unsigned i = 0; i < cmd->u.clear_color_image.range_count; i++) { VkImageSubresourceRange range = &cmd->u.clear_color_image.ranges[i]; struct pipe_box box; @@ -3940,6 +3908,18 @@ } } + for (enum pipe_shader_type s = PIPE_SHADER_VERTEX; s < PIPE_SHADER_TYPES; s++) { + for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (state.sv[s][i]) + pipe_sampler_view_reference(&state.sv[s][i], NULL); + } + } + + for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (state.cso_ss_ptr[PIPE_SHADER_COMPUTE][i]) + state.pctx->delete_sampler_state(state.pctx, state.ss_cso[PIPE_SHADER_COMPUTE][i]); + } + free(state.imageless_views); free(state.pending_clear_aspects); free(state.cleared_views);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_formats.c ^
@@ -230,6 +230,9 @@ break; } + if (info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) + goto skip_checks; + if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { goto unsupported; @@ -273,6 +276,7 @@ } } +skip_checks: *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = maxExtent, .maxMipLevels = maxMipLevels,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_pipeline.c ^
@@ -64,6 +64,9 @@ if (pipeline->shader_cso[PIPE_SHADER_COMPUTE]) device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]); + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) + ralloc_free(pipeline->pipeline_nir[i]); + ralloc_free(pipeline->mem_ctx); vk_object_base_finish(&pipeline->base); vk_free2(&device->vk.alloc, pAllocator, pipeline); @@ -602,6 +605,8 @@ NIR_PASS(progress, nir, nir_opt_deref); NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_dce); NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); @@ -656,7 +661,7 @@ static void fill_shader_prog(struct pipe_shader_state state, gl_shader_stage stage, struct lvp_pipeline pipeline) { state->type = PIPE_SHADER_IR_NIR; - state->ir.nir = pipeline->pipeline_nir[stage]; + state->ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[stage]); } static void @@ -728,7 +733,7 @@ device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, pipeline->pipeline_nir[stage]); if (stage == MESA_SHADER_COMPUTE) { struct pipe_compute_state shstate = {0}; - shstate.prog = (void )pipeline->pipeline_nir[MESA_SHADER_COMPUTE]; + shstate.prog = (void )nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE]); shstate.ir_type = PIPE_SHADER_IR_NIR; shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size; pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate); @@ -904,7 +909,7 @@ pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader; struct pipe_shader_state shstate = {0}; shstate.type = PIPE_SHADER_IR_NIR; - shstate.ir.nir = pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]; + shstate.ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]); pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); } return VK_SUCCESS;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c ^
@@ -608,6 +608,7 @@ ws->info.num_physical_sgprs_per_simd = 512; ws->info.num_physical_wave64_vgprs_per_simd = 256; ws->info.has_3d_cube_border_color_mipmap = true; + ws->info.never_stop_sq_perf_counters = false; ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL \|\| strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gbm/main/gbm.c ^
@@ -497,8 +497,22 @@ const uint64_t modifiers, const unsigned int count) { + uint32_t flags = 0; + + / + * ABI version 1 added the modifiers+flags capability. Backends from + * prior versions may fail if "unknown" flags are provided along with + * modifiers, but assume scanout is required when modifiers are used. + * Newer backends expect scanout to be explicitly requested if required, + * but applications using this older interface rely on the older implied + * requirement, so that behavior must be preserved. + / + if (gbm->v0.backend_version >= 1) { + flags \|= GBM_BO_USE_SCANOUT; + } + return gbm_bo_create_with_modifiers2(gbm, width, height, format, modifiers, - count, GBM_BO_USE_SCANOUT); + count, flags); } GBM_EXPORT struct gbm_bo @@ -648,9 +662,23 @@ const uint64_t modifiers, const unsigned int count) { + uint32_t flags = 0; + + / + * ABI version 1 added the modifiers+flags capability. Backends from + * prior versions may fail if "unknown" flags are provided along with + * modifiers, but assume scanout is required when modifiers are used. + * Newer backends expect scanout to be explicitly requested if required, + * but applications using this older interface rely on the older implied + * requirement, so that behavior must be preserved. + / + if (gbm->v0.backend_version >= 1) { + flags \|= GBM_BO_USE_SCANOUT; + } + return gbm_surface_create_with_modifiers2(gbm, width, height, format, modifiers, count, - GBM_BO_USE_SCANOUT); + flags); } GBM_EXPORT struct gbm_surface
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/glx/dri3_glx.c ^
@@ -306,6 +306,8 @@ pcp->base.noError = GL_TRUE; } + pcp->base.renderType = dca.render_type; + pcp->driContext = (*psc->image_driver->createContextAttribs) (psc->driScreen, dca.api,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/glx/glx_pbuffer.c ^
@@ -199,6 +199,8 @@ pdraw->textureTarget = determineTextureTarget(attrib_list, num_attribs); pdraw->textureFormat = determineTextureFormat(attrib_list, num_attribs); + + pdraw->refcount = 1; #endif return GL_TRUE;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/glx/glxextensions.h ^
@@ -283,17 +283,11 @@ /* GLX_ALIAS should be used for functions with a non-void return type. GLX_ALIAS_VOID is for functions with a void return type. / # ifdef HAVE_FUNC_ATTRIBUTE_ALIAS -/ GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the - * extra expansion means that the name mangling macros in glx_mangle.h will - * apply before stringification, so the alias attribute will have a string like - * "mglXFoo" instead of "glXFoo". */ -# define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \ +# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \ return_type real_func proto_args \ __attribute__ ((alias( # aliased_func ) )); -# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \ - GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) # define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \ - GLX_ALIAS2(void, real_func, proto_args, args, aliased_func) + GLX_ALIAS(void, real_func, proto_args, args, aliased_func) # else # define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \ return_type real_func proto_args \
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_cfg.cpp ^
@@ -364,6 +364,8 @@ next = new_block(); if (inst->predicate) cur->add_successor(mem_ctx, next, bblock_link_logical); + else + cur->add_successor(mem_ctx, next, bblock_link_physical); set_next_block(&cur, next, ip); break;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs.cpp ^
@@ -2839,6 +2839,9 @@ if (inst->src[1].file != IMM) continue; + if (brw_reg_type_is_floating_point(inst->src[1].type)) + break; + /* a * 1.0 = a / if (inst->src[1].is_one()) { inst->opcode = BRW_OPCODE_MOV; @@ -8933,7 +8936,12 @@ if (last_scratch > 0) { ASSERTED unsigned max_scratch_size = 2 1024 * 1024; - prog_data->total_scratch = brw_get_scratch_size(last_scratch); + /* Take the max of any previously compiled variant of the shader. In the + * case of bindless shaders with return parts, this will also take the + * max of all parts. + / + prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch), + prog_data->total_scratch); if (stage == MESA_SHADER_COMPUTE \|\| stage == MESA_SHADER_KERNEL) { if (devinfo->is_haswell) { @@ -9675,6 +9683,7 @@ prog_data->per_coarse_pixel_dispatch = key->coarse_pixel && + !prog_data->uses_omask && !prog_data->persample_dispatch && !prog_data->uses_sample_mask && (prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) && @@ -9717,6 +9726,7 @@ INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_WM); prog_data->base.stage = MESA_SHADER_FRAGMENT; + prog_data->base.total_scratch = 0; const struct intel_device_info devinfo = compiler->devinfo; const unsigned max_subgroup_size = compiler->devinfo->ver >= 6 ? 32 : 16; @@ -10106,6 +10116,7 @@ prog_data->base.stage = MESA_SHADER_COMPUTE; prog_data->base.total_shared = nir->info.shared_size; + prog_data->base.total_scratch = 0; /* Generate code for all the possible SIMD variants. */ bool generate_all; @@ -10501,7 +10512,7 @@ assert(local_arg_offset % 8 == 0); return offset \| - SET_BITS(simd_size > 8, 4, 4) \| + SET_BITS(simd_size == 8, 4, 4) \| SET_BITS(local_arg_offset / 8, 2, 0); } @@ -10519,6 +10530,7 @@ const bool debug_enabled = INTEL_DEBUG(DEBUG_RT); prog_data->base.stage = shader->info.stage; + prog_data->base.total_scratch = 0; prog_data->max_stack_size = 0; fs_generator g(compiler, log_data, mem_ctx, &prog_data->base,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_generator.cpp ^
@@ -616,8 +616,8 @@ * easier just to split it here. */ const unsigned lower_width = - (devinfo->ver <= 7 \|\| type_sz(src.type) > 4) ? - 8 : MIN2(16, inst->exec_size); + devinfo->ver <= 7 \|\| element_sz(src) > 4 \|\| element_sz(dst) > 4 ? 8 : + MIN2(16, inst->exec_size); brw_set_default_exec_size(p, cvt(lower_width) - 1); for (unsigned group = 0; group < inst->exec_size; group += lower_width) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_nir.cpp ^
@@ -547,17 +547,16 @@ /* For (gl_FrontFacing ? 1.0 : -1.0), emit: * - * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W + * or(8) tmp.1<2>W g1.1<0,1,0>W 0x00003f80W * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D * - * and negate the result for (gl_FrontFacing ? -1.0 : 1.0). + * and negate g1.1<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0). / - bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1), - g1, brw_imm_uw(0x3f80)); - if (value1 == -1.0f) - bld.MOV(tmp, negate(tmp)); + g1.negate = true; + bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1), + g1, brw_imm_uw(0x3f80)); } else if (devinfo->ver >= 6) { / Bit 15 of g0.0 is 0 if the polygon is front facing. / fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); @@ -1887,7 +1886,7 @@ case nir_op_sdot_4x8_iadd: case nir_op_sdot_4x8_iadd_sat: - inst = bld.DP4A(result, + inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D), retype(op[2], BRW_REGISTER_TYPE_D), retype(op[0], BRW_REGISTER_TYPE_D), retype(op[1], BRW_REGISTER_TYPE_D)); @@ -1898,7 +1897,7 @@ case nir_op_udot_4x8_uadd: case nir_op_udot_4x8_uadd_sat: - inst = bld.DP4A(result, + inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_UD), retype(op[2], BRW_REGISTER_TYPE_UD), retype(op[0], BRW_REGISTER_TYPE_UD), retype(op[1], BRW_REGISTER_TYPE_UD)); @@ -1909,7 +1908,7 @@ case nir_op_sudot_4x8_iadd: case nir_op_sudot_4x8_iadd_sat: - inst = bld.DP4A(result, + inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D), retype(op[2], BRW_REGISTER_TYPE_D), retype(op[0], BRW_REGISTER_TYPE_D), retype(op[1], BRW_REGISTER_TYPE_UD)); @@ -3929,7 +3928,10 @@ srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX7_BTI_SLM); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); - srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); + / No point in masking with sample mask, here we're handling compute + * intrinsics. + / + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); fs_reg data = get_nir_src(instr->src[0]); data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); @@ -6067,7 +6069,7 @@ Compiler should send U,V,R parameters even if V,R are 0. */ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && devinfo->verx10 == 125) - assert(instr->coord_components == 3u + instr->is_array); + assert(instr->coord_components >= 3u); break; case nir_tex_src_ddx: srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_scoreboard.cpp ^
@@ -122,6 +122,8 @@ else if (inst->opcode == SHADER_OPCODE_BROADCAST && !devinfo->has_64bit_float && type_sz(t) >= 8) return TGL_PIPE_INT; + else if (inst->opcode == FS_OPCODE_PACK_HALF_2x16_SPLIT) + return TGL_PIPE_FLOAT; else if (type_sz(inst->dst.type) >= 8 \|\| type_sz(t) >= 8 \|\| is_dword_multiply) return TGL_PIPE_LONG;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c ^
@@ -60,6 +60,9 @@ for (unsigned i = 0; i < info->num_indices; i++) dup->const_index[i] = intrin->const_index[i]; + if (nir_intrinsic_has_access(intrin)) + nir_intrinsic_set_access(dup, nir_intrinsic_access(intrin)); + nir_intrinsic_set_align(dup, align, 0); if (info->has_dest) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_nir_lower_shader_calls.c ^
@@ -124,143 +124,128 @@ nir_btd_stack_push_intel(b, offset); } -bool -brw_nir_lower_shader_calls(nir_shader shader) +static bool +lower_shader_calls_instr(struct nir_builder b, nir_instr instr, void data) { - nir_function_impl impl = nir_shader_get_entrypoint(shader); - bool progress = false; - - nir_builder _b, b = &_b; - nir_builder_init(&_b, impl); + if (instr->type != nir_instr_type_intrinsic) + return false; - nir_foreach_block_safe(block, impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr call = nir_instr_as_intrinsic(instr); - if (call->intrinsic != nir_intrinsic_rt_trace_ray && - call->intrinsic != nir_intrinsic_rt_execute_callable && - call->intrinsic != nir_intrinsic_rt_resume) - continue; - - b->cursor = nir_before_instr(instr); - - progress = true; - - switch (call->intrinsic) { - case nir_intrinsic_rt_trace_ray: { - store_resume_addr(b, call); - - nir_ssa_def as_addr = call->src[0].ssa; - nir_ssa_def ray_flags = call->src[1].ssa; - / From the SPIR-V spec: - * - * "Only the 8 least-significant bits of Cull Mask are used by - * this instruction - other bits are ignored. - * - * Only the 4 least-significant bits of SBT Offset and SBT - * Stride are used by this instruction - other bits are - * ignored. - * - * Only the 16 least-significant bits of Miss Index are used by - * this instruction - other bits are ignored." - / - nir_ssa_def cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff); - nir_ssa_def sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf); - nir_ssa_def sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf); - nir_ssa_def miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff); - nir_ssa_def ray_orig = call->src[6].ssa; - nir_ssa_def ray_t_min = call->src[7].ssa; - nir_ssa_def ray_dir = call->src[8].ssa; - nir_ssa_def ray_t_max = call->src[9].ssa; - - / The hardware packet takes the address to the root node in the - * acceleration structure, not the acceleration structure itself. - * To find that, we have to read the root node offset from the - * acceleration structure which is the first QWord. - / - nir_ssa_def root_node_ptr = - nir_iadd(b, as_addr, nir_load_global(b, as_addr, 256, 1, 64)); - - /* The hardware packet requires an address to the first element of - * the hit SBT. - * - * In order to calculate this, we must multiply the "SBT Offset" - * provided to OpTraceRay by the SBT stride provided for the hit - * SBT in the call to vkCmdTraceRay() and add that to the base - * address of the hit SBT. This stride is not to be confused with - * the "SBT Stride" provided to OpTraceRay which is in units of - * this stride. It's a rather terrible overload of the word - * "stride". The hardware docs calls the SPIR-V stride value the - * "shader index multiplier" which is a much more sane name. - / - nir_ssa_def hit_sbt_stride_B = - nir_load_ray_hit_sbt_stride_intel(b); - nir_ssa_def hit_sbt_offset_B = - nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B)); - nir_ssa_def hit_sbt_addr = - nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b), - nir_u2u64(b, hit_sbt_offset_B)); - - /* The hardware packet takes an address to the miss BSR. / - nir_ssa_def miss_sbt_stride_B = - nir_load_ray_miss_sbt_stride_intel(b); - nir_ssa_def miss_sbt_offset_B = - nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B)); - nir_ssa_def miss_sbt_addr = - nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b), - nir_u2u64(b, miss_sbt_offset_B)); - - struct brw_nir_rt_mem_ray_defs ray_defs = { - .root_node_ptr = root_node_ptr, - .ray_flags = nir_u2u16(b, ray_flags), - .ray_mask = cull_mask, - .hit_group_sr_base_ptr = hit_sbt_addr, - .hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B), - .miss_sr_ptr = miss_sbt_addr, - .orig = ray_orig, - .t_near = ray_t_min, - .dir = ray_dir, - .t_far = ray_t_max, - .shader_index_multiplier = sbt_stride, - }; - brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD); - nir_trace_ray_initial_intel(b); - break; - } - - case nir_intrinsic_rt_execute_callable: { - store_resume_addr(b, call); - - nir_ssa_def sbt_offset32 = - nir_imul(b, call->src[0].ssa, - nir_u2u32(b, nir_load_callable_sbt_stride_intel(b))); - nir_ssa_def sbt_addr = - nir_iadd(b, nir_load_callable_sbt_addr_intel(b), - nir_u2u64(b, sbt_offset32)); - brw_nir_btd_spawn(b, sbt_addr); - break; - } - - default: - unreachable("Invalid intrinsic"); - } - - nir_instr_remove(&call->instr); - } + /* Leave nir_intrinsic_rt_resume to be lowered by + * brw_nir_lower_rt_intrinsics() + / + nir_intrinsic_instr call = nir_instr_as_intrinsic(instr); + + switch (call->intrinsic) { + case nir_intrinsic_rt_trace_ray: { + b->cursor = nir_instr_remove(instr); + + store_resume_addr(b, call); + + nir_ssa_def as_addr = call->src[0].ssa; + nir_ssa_def ray_flags = call->src[1].ssa; + /* From the SPIR-V spec: + * + * "Only the 8 least-significant bits of Cull Mask are used by this + * instruction - other bits are ignored. + * + * Only the 4 least-significant bits of SBT Offset and SBT Stride are + * used by this instruction - other bits are ignored. + * + * Only the 16 least-significant bits of Miss Index are used by this + * instruction - other bits are ignored." + / + nir_ssa_def cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff); + nir_ssa_def sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf); + nir_ssa_def sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf); + nir_ssa_def miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff); + nir_ssa_def ray_orig = call->src[6].ssa; + nir_ssa_def ray_t_min = call->src[7].ssa; + nir_ssa_def ray_dir = call->src[8].ssa; + nir_ssa_def ray_t_max = call->src[9].ssa; + + / The hardware packet takes the address to the root node in the + * acceleration structure, not the acceleration structure itself. To + * find that, we have to read the root node offset from the acceleration + * structure which is the first QWord. + / + nir_ssa_def root_node_ptr = + nir_iadd(b, as_addr, nir_load_global(b, as_addr, 256, 1, 64)); + + /* The hardware packet requires an address to the first element of the + * hit SBT. + * + * In order to calculate this, we must multiply the "SBT Offset" + * provided to OpTraceRay by the SBT stride provided for the hit SBT in + * the call to vkCmdTraceRay() and add that to the base address of the + * hit SBT. This stride is not to be confused with the "SBT Stride" + * provided to OpTraceRay which is in units of this stride. It's a + * rather terrible overload of the word "stride". The hardware docs + * calls the SPIR-V stride value the "shader index multiplier" which is + * a much more sane name. + / + nir_ssa_def hit_sbt_stride_B = + nir_load_ray_hit_sbt_stride_intel(b); + nir_ssa_def hit_sbt_offset_B = + nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B)); + nir_ssa_def hit_sbt_addr = + nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b), + nir_u2u64(b, hit_sbt_offset_B)); + + /* The hardware packet takes an address to the miss BSR. / + nir_ssa_def miss_sbt_stride_B = + nir_load_ray_miss_sbt_stride_intel(b); + nir_ssa_def miss_sbt_offset_B = + nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B)); + nir_ssa_def miss_sbt_addr =
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_nir_lower_storage_image.c ^
@@ -646,6 +646,9 @@ if (var->data.access & ACCESS_NON_READABLE) return false; + if (var->data.image.format == PIPE_FORMAT_NONE) + return false; + /* If we have a matching typed format, then we have an actual image surface * so we fall back and let the back-end emit a TXS for this. */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_predicated_break.cpp ^
@@ -53,19 +53,79 @@ * and we can remove the BREAK instruction and predicate the WHILE. / +#define MAX_NESTING 128 + +struct loop_continue_tracking { + BITSET_WORD has_continue[BITSET_WORDS(MAX_NESTING)]; + unsigned depth; +}; + +static void +enter_loop(struct loop_continue_tracking s) +{ + s->depth++; + + /* Any loops deeper than that maximum nesting will just re-use the last + * flag. This simplifies most of the code. MAX_NESTING is chosen to be + * large enough that it is unlikely to occur. Even if it does, the + * optimization that uses this tracking is unlikely to make much + * difference. + / + if (s->depth < MAX_NESTING) + BITSET_CLEAR(s->has_continue, s->depth); +} + +static void +exit_loop(struct loop_continue_tracking s) +{ + assert(s->depth > 0); + s->depth--; +} + +static void +set_continue(struct loop_continue_tracking s) +{ + const unsigned i = MIN2(s->depth, MAX_NESTING - 1); + + BITSET_SET(s->has_continue, i); +} + +static bool +has_continue(const struct loop_continue_tracking s) +{ + const unsigned i = MIN2(s->depth, MAX_NESTING - 1); + + return BITSET_TEST(s->has_continue, i); +} + bool opt_predicated_break(backend_shader s) { bool progress = false; + struct loop_continue_tracking state = { {0, }, 0 }; foreach_block (block, s->cfg) { - if (block->start_ip != block->end_ip) - continue; + / DO instructions, by definition, can only be found at the beginning of + * basic blocks. + / + backend_instruction const do_inst = block->start(); - /* BREAK and CONTINUE instructions, by definition, can only be found at - * the ends of basic blocks. + /* BREAK, CONTINUE, and WHILE instructions, by definition, can only be + * found at the ends of basic blocks. / backend_instruction jump_inst = block->end(); + + if (do_inst->opcode == BRW_OPCODE_DO) + enter_loop(&state); + + if (jump_inst->opcode == BRW_OPCODE_CONTINUE) + set_continue(&state); + else if (jump_inst->opcode == BRW_OPCODE_WHILE) + exit_loop(&state); + + if (block->start_ip != block->end_ip) + continue; + if (jump_inst->opcode != BRW_OPCODE_BREAK && jump_inst->opcode != BRW_OPCODE_CONTINUE) continue; @@ -119,13 +179,20 @@ /* Now look at the first instruction of the block following the BREAK. If * it's a WHILE, we can delete the break, predicate the WHILE, and join * the two basic blocks. + * + * This optimization can only be applied if the only instruction that + * can transfer control to the WHILE is the BREAK. If other paths can + * lead to the while, the flags may be in an unknown state, and the loop + * could terminate prematurely. This can occur if the loop contains a + * CONT instruction. / bblock_t while_block = earlier_block->next(); backend_instruction *while_inst = while_block->start(); if (jump_inst->opcode == BRW_OPCODE_BREAK && while_inst->opcode == BRW_OPCODE_WHILE && - while_inst->predicate == BRW_PREDICATE_NONE) { + while_inst->predicate == BRW_PREDICATE_NONE && + !has_continue(&state)) { jump_inst->remove(earlier_block); while_inst->predicate = jump_inst->predicate; while_inst->predicate_inverse = !jump_inst->predicate_inverse;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_reg.h ^
@@ -1238,6 +1238,28 @@ region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \ BRW_HORIZONTAL_STRIDE_0) +/** + * Return the size in bytes per data element of register \p reg on the + * corresponding register file. + / +static inline unsigned +element_sz(struct brw_reg reg) +{ + if (reg.file == BRW_IMMEDIATE_VALUE \|\| has_scalar_region(reg)) { + return type_sz(reg.type); + + } else if (reg.width == BRW_WIDTH_1 && + reg.hstride == BRW_HORIZONTAL_STRIDE_0) { + assert(reg.vstride != BRW_VERTICAL_STRIDE_0); + return type_sz(reg.type) << (reg.vstride - 1); + + } else { + assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0); + assert(reg.vstride == reg.hstride + reg.width); + return type_sz(reg.type) << (reg.hstride - 1); + } +} + / brw_packed_float.c */ int brw_float_to_vf(float f); float brw_vf_to_float(unsigned char vf);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_shader.h ^
@@ -121,7 +121,7 @@ extern const char const pred_ctrl_align16[16]; / Per-thread scratch space is a power-of-two multiple of 1KB. */ -static inline int +static inline unsigned brw_get_scratch_size(int size) { return MAX2(1024, util_next_power_of_two(size));
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_vec4.cpp ^
@@ -2896,6 +2896,7 @@ INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_VS); prog_data->base.base.stage = MESA_SHADER_VERTEX; + prog_data->base.base.total_scratch = 0; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX]; brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_vec4_gs_visitor.cpp ^
@@ -600,6 +600,7 @@ const bool debug_enabled = INTEL_DEBUG(DEBUG_GS); prog_data->base.base.stage = MESA_SHADER_GEOMETRY; + prog_data->base.base.total_scratch = 0; /* The GLSL linker will have already matched up GS inputs and the outputs * of prior stages. The driver does extend VS outputs in some cases, but
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_vec4_tcs.cpp ^
@@ -372,6 +372,7 @@ const unsigned *assembly; vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL; + prog_data->base.base.total_scratch = 0; nir->info.outputs_written = key->outputs_written; nir->info.patch_outputs_written = key->patch_outputs_written;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/meson.build ^
@@ -143,7 +143,7 @@ prog_python, '@INPUT@', '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, capture : true, )
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/test_fs_scoreboard.cpp ^
@@ -104,6 +104,14 @@ return inst; } +static tgl_swsb +tgl_swsb_testcase(unsigned regdist, unsigned sbid, enum tgl_sbid_mode mode) +{ + tgl_swsb swsb = tgl_swsb_sbid(mode, sbid); + swsb.regdist = regdist; + return swsb; +} + bool operator ==(const tgl_swsb &a, const tgl_swsb &b) { return a.mode == b.mode && @@ -178,8 +186,7 @@ EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null()); EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null()); - EXPECT_EQ(instruction(block0, 2)->sched, - (tgl_swsb { .regdist = 2, .sbid = 0, .mode = TGL_SBID_SET })); + EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET)); } TEST_F(scoreboard_test, RAW_outoforder_inorder) @@ -206,8 +213,7 @@ EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0)); EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null()); - EXPECT_EQ(instruction(block0, 2)->sched, - (tgl_swsb { .regdist = 1, .sbid = 0, .mode = TGL_SBID_DST })); + EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(1, 0, TGL_SBID_DST)); } TEST_F(scoreboard_test, RAW_outoforder_outoforder) @@ -292,8 +298,7 @@ EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null()); EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null()); - EXPECT_EQ(instruction(block0, 2)->sched, - (tgl_swsb { .regdist = 2, .sbid = 0, .mode = TGL_SBID_SET })); + EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET)); } TEST_F(scoreboard_test, WAR_outoforder_inorder) @@ -405,8 +410,7 @@ EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null()); EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null()); - EXPECT_EQ(instruction(block0, 2)->sched, - (tgl_swsb { .regdist = 2, .sbid = 0, .mode = TGL_SBID_SET })); + EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET)); } TEST_F(scoreboard_test, WAW_outoforder_inorder)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/dev/intel_device_info.c ^
@@ -1411,7 +1411,7 @@ * available for that PCI ID and then compute the real value from the * subslice information we get from the kernel. / - const uint32_t subslice_total = intel_device_info_eu_total(devinfo); + const uint32_t subslice_total = intel_device_info_subslice_total(devinfo); const uint32_t eu_total = intel_device_info_eu_total(devinfo); / Logical CS threads = EUs per subslice * num threads per EU */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen12.xml ^
@@ -6450,6 +6450,7 @@ <instruction name="PIPE_CONTROL" bias="2" length="6" engine="render"> <field name="DWord Length" start="0" end="7" type="uint" default="4"/> <field name="HDC Pipeline Flush Enable" start="9" end="9" type="bool"/> + <field name="L3 Read Only Cache Invalidation Enable" start="10" end="10" type="bool"/> <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="0"/> <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/> <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen125.xml ^
@@ -6368,6 +6368,7 @@ <instruction name="PIPE_CONTROL" bias="2" length="6" engine="render"> <field name="DWord Length" start="0" end="7" type="uint" default="4"/> <field name="HDC Pipeline Flush Enable" start="9" end="9" type="bool"/> + <field name="L3 Read Only Cache Invalidation Enable" start="10" end="10" type="bool"/> <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="0"/> <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/> <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen4.xml ^
@@ -976,7 +976,7 @@ <field name="2D Command Opcode" start="22" end="28" type="uint" default="80"/> <field name="Command Type" start="29" end="31" type="uint" default="2"/> <field name="Destination Pitch" start="32" end="47" type="int"/> - <field name="Raster Operation" start="48" end="55" type="int"/> + <field name="Raster Operation" start="48" end="55" type="uint"/> <field name="Color Depth" start="56" end="57" type="uint" prefix="COLOR_DEPTH"> <value name="8 bit" value="0"/> <value name="565" value="1"/>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen45.xml ^
@@ -1032,7 +1032,7 @@ <field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/> <field name="Command Type" start="29" end="31" type="uint" default="2"/> <field name="Destination Pitch" start="32" end="47" type="int"/> - <field name="Raster Operation" start="48" end="55" type="int"/> + <field name="Raster Operation" start="48" end="55" type="uint"/> <field name="Color Depth" start="56" end="57" type="uint" prefix="COLOR_DEPTH"> <value name="8 bit" value="0"/> <value name="565" value="1"/>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen5.xml ^
@@ -1110,7 +1110,7 @@ <field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/> <field name="Command Type" start="29" end="31" type="uint" default="2"/> <field name="Destination Pitch" start="32" end="47" type="int"/> - <field name="Raster Operation" start="48" end="55" type="int"/> + <field name="Raster Operation" start="48" end="55" type="uint"/> <field name="Color Depth" start="56" end="57" type="uint" prefix="COLOR_DEPTH"> <value name="8 bit" value="0"/> <value name="565" value="1"/>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen_rt.xml ^
@@ -3,8 +3,8 @@ <struct name="BINDLESS_SHADER_RECORD" length="2"> <field name="Offset To Local Arguments" start="0" end="2" type="uint"/> <field name="Bindless Shader Dispatch Mode" start="4" end="4" type="uint"> - <value name="SIMD8" value="0"/> - <value name="SIMD16" value="1"/> + <value name="RT_SIMD16" value="0"/> + <value name="RT_SIMD8" value="1"/> </field> <field name="Kernel Start Pointer" start="6" end="31" type="offset"/> </struct>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/gen_perf.py ^
@@ -20,6 +20,8 @@ # IN THE SOFTWARE. import argparse +import builtins +import collections import os import sys import textwrap @@ -392,10 +394,53 @@ return "" if unit == 'hz': unit = 'Hz' - return " Unit: " + unit + "." + return "Unit: " + unit + "." -def output_counter_report(set, counter, current_offset): +counter_key_tuple = collections.namedtuple( + 'counter_key', + [ + 'name', + 'description', + 'symbol_name', + 'mdapi_group', + 'semantic_type', + 'data_type', + 'units', + ] +) + + +def counter_key(counter): + return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields]) + + +def output_counter_struct(set, counter, idx, + name_to_idx, desc_to_idx, + symbol_name_to_idx, category_to_idx): + data_type = counter.data_type + data_type_uc = data_type.upper() + + semantic_type = counter.semantic_type + if semantic_type in semantic_type_map: + semantic_type = semantic_type_map[semantic_type] + + semantic_type_uc = semantic_type.upper() + + c("[" + str(idx) + "] = {\n") + c_indent(3) + c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n") + c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n") + c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n") + c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n") + c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n") + c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n") + c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n") + c_outdent(3) + c("},\n") + + +def output_counter_report(set, counter, counter_to_idx, current_offset): data_type = counter.get('data_type') data_type_uc = data_type.upper() c_type = data_type @@ -416,19 +461,15 @@ output_availability(set, availability, counter.get('name')) c_indent(3) - c("counter = &query->counters[query->n_counters++];\n") - c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n") - c("counter->name = \"" + counter.get('name') + "\";\n") - c("counter->desc = \"" + counter.get('description') + desc_units(counter.get('units')) + "\";\n") - c("counter->symbol_name = \"" + counter.get('symbol_name') + "\";\n") - c("counter->category = \"" + counter.get('mdapi_group') + "\";\n") - c("counter->type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ";\n") - c("counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ";\n") - c("counter->units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.get('units')) + ";\n") - c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n") + key = counter_key(counter) + idx = str(counter_to_idx[key]) current_offset = pot_align(current_offset, sizeof(c_type)) - c("counter->offset = " + str(current_offset) + ";\n") + + c("intel_perf_query_add_counter(query, " + idx + ", " + + str(current_offset) + ", " + + set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" + + set.read_funcs[counter.get('symbol_name')] + ");\n") if availability: c_outdent(3); @@ -437,6 +478,29 @@ return current_offset + sizeof(c_type) +def str_to_idx_table(strs): + sorted_strs = sorted(strs) + + str_to_idx = collections.OrderedDict() + str_to_idx[sorted_strs[0]] = 0 + previous = sorted_strs[0] + + for i in range(1, len(sorted_strs)): + str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1 + previous = sorted_strs[i] + + return str_to_idx + + +def output_str_table(name: str, str_to_idx): + c("\n") + c("static const char " + name + "[] = {\n") + c_indent(3) + c("\n".join(f"/* {idx} / \"{val}\\0\"" for val, idx in str_to_idx.items())) + c_outdent(3) + c("};\n") + + register_types = { 'FLEX': 'flex_regs', 'NOA': 'mux_regs', @@ -686,13 +750,32 @@ c(textwrap.dedent("""\ #include "perf/intel_perf.h" + #include "perf/intel_perf_setup.h" + """)) + names = builtins.set() + descs = builtins.set() + symbol_names = builtins.set() + categories = builtins.set() + for gen in gens: + for set in gen.sets: + for counter in set.counters: + names.add(counter.get('name')) + symbol_names.add(counter.get('symbol_name')) + descs.add(counter.get('description') + " " + desc_units(counter.get('units'))) + categories.add(counter.get('mdapi_group')) + + name_to_idx = str_to_idx_table(names) + output_str_table("name", name_to_idx) - #define MIN(a, b) ((a < b) ? (a) : (b)) - #define MAX(a, b) ((a > b) ? (a) : (b)) + desc_to_idx = str_to_idx_table(descs) + output_str_table("desc", desc_to_idx) + symbol_name_to_idx = str_to_idx_table(symbol_names) + output_str_table("symbol_name", symbol_name_to_idx) - """)) + category_to_idx = str_to_idx_table(categories) + output_str_table("category", category_to_idx) # Print out all equation functions. for gen in gens: @@ -701,6 +784,54 @@ output_counter_read(gen, set, counter) output_counter_max(gen, set, counter) + c("\n") + c("static const struct intel_perf_query_counter_data counters[] = {\n") + c_indent(3) + + counter_to_idx = collections.OrderedDict() + idx = 0 + for gen in gens: + for set in gen.sets: + for counter in set.counters: + key = counter_key(counter) + if key not in counter_to_idx: + counter_to_idx[key] = idx + output_counter_struct(set, key, idx, + name_to_idx, + desc_to_idx, + symbol_name_to_idx, + category_to_idx) + idx += 1 + + c_outdent(3) + c("};\n\n") + + c(textwrap.dedent("""\ + typedef uint64_t (oa_counter_read_func)(struct intel_perf_config perf, + const struct intel_perf_query_info query, + const struct intel_perf_query_result results); + static void ATTRIBUTE_NOINLINE + intel_perf_query_add_counter(struct intel_perf_query_info query, + int counter_idx, size_t offset, + uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64) + { + struct intel_perf_query_counter dest = &query->counters[query->n_counters++]; + const struct intel_perf_query_counter_data counter = &counters[counter_idx]; + + dest->name = &name[counter->name_idx]; + dest->desc = &desc[counter->desc_idx]; + dest->symbol_name = &symbol_name[counter->symbol_name_idx]; + dest->category = &category[counter->category_idx];
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/intel_perf.h ^
@@ -52,7 +52,7 @@ #define INTEL_PERF_INVALID_CTX_ID (0xffffffff) -enum intel_perf_counter_type { +enum PACKED intel_perf_counter_type { INTEL_PERF_COUNTER_TYPE_EVENT, INTEL_PERF_COUNTER_TYPE_DURATION_NORM, INTEL_PERF_COUNTER_TYPE_DURATION_RAW, @@ -61,7 +61,7 @@ INTEL_PERF_COUNTER_TYPE_TIMESTAMP, }; -enum intel_perf_counter_data_type { +enum PACKED intel_perf_counter_data_type { INTEL_PERF_COUNTER_DATA_TYPE_BOOL32, INTEL_PERF_COUNTER_DATA_TYPE_UINT32, INTEL_PERF_COUNTER_DATA_TYPE_UINT64, @@ -69,7 +69,7 @@ INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE, }; -enum intel_perf_counter_units { +enum PACKED intel_perf_counter_units { /* size */ INTEL_PERF_COUNTER_UNITS_BYTES,
[-] [+]	Added	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/intel_perf_setup.h ^
@@ -0,0 +1,85 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + / + +#ifndef INTEL_PERF_SETUP_H +#define INTEL_PERF_SETUP_H + +#include "perf/intel_perf.h" + +#define MIN(a, b) ((a < b) ? (a) : (b)) +#define MAX(a, b) ((a > b) ? (a) : (b)) + +static struct intel_perf_query_info +intel_query_alloc(struct intel_perf_config perf, int ncounters) +{ + struct intel_perf_query_info query = rzalloc(perf, struct intel_perf_query_info); + query->perf = perf; + query->kind = INTEL_PERF_QUERY_TYPE_OA; + query->n_counters = 0; + query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs / + query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters); + return query; +} + +static struct intel_perf_query_info +hsw_query_alloc(struct intel_perf_config perf, int ncounters) +{ + struct intel_perf_query_info query = intel_query_alloc(perf, ncounters); + query->oa_format = I915_OA_FORMAT_A45_B8_C8; + /* Accumulation buffer offsets... / + query->gpu_time_offset = 0; + query->a_offset = query->gpu_time_offset + 1; + query->b_offset = query->a_offset + 45; + query->c_offset = query->b_offset + 8; + query->perfcnt_offset = query->c_offset + 8; + query->rpstat_offset = query->perfcnt_offset + 2; + return query; +} + +static struct intel_perf_query_info +bdw_query_alloc(struct intel_perf_config perf, int ncounters) +{ + struct intel_perf_query_info query = intel_query_alloc(perf, ncounters); + query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + /* Accumulation buffer offsets... / + query->gpu_time_offset = 0; + query->gpu_clock_offset = query->gpu_time_offset + 1; + query->a_offset = query->gpu_clock_offset + 1; + query->b_offset = query->a_offset + 36; + query->c_offset = query->b_offset + 8; + query->perfcnt_offset = query->c_offset + 8; + query->rpstat_offset = query->perfcnt_offset + 2; + return query; +} + +struct intel_perf_query_counter_data { + uint16_t name_idx; + uint16_t desc_idx; + uint16_t symbol_name_idx; + uint16_t category_idx; + enum intel_perf_counter_type type; + enum intel_perf_counter_data_type data_type; + enum intel_perf_counter_units units; +}; + +#endif / INTEL_PERF_SETUP_H */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/oa-hsw.xml ^
@@ -128,7 +128,7 @@ <counter name="GPU Busy" symbol_name="GpuBusy" underscore_name="gpu_busy" - description="The percentage of time in which the GPU has being processing GPU commands." + description="The percentage of time in which the GPU has been processing GPU commands." data_type="float" max_equation="100" units="percent" @@ -1091,7 +1091,7 @@ <counter name="GPU Busy" symbol_name="GpuBusy" underscore_name="gpu_busy" - description="The percentage of time in which the GPU has being processing GPU commands." + description="The percentage of time in which the GPU has been processing GPU commands." data_type="float" max_equation="100" units="percent" @@ -2200,7 +2200,7 @@ <counter name="GPU Busy" symbol_name="GpuBusy" underscore_name="gpu_busy" - description="The percentage of time in which the GPU has being processing GPU commands." + description="The percentage of time in which the GPU has been processing GPU commands." data_type="float" max_equation="100" units="percent" @@ -3014,7 +3014,7 @@ <counter name="GPU Busy" symbol_name="GpuBusy" underscore_name="gpu_busy" - description="The percentage of time in which the GPU has being processing GPU commands." + description="The percentage of time in which the GPU has been processing GPU commands." data_type="float" max_equation="100" units="percent" @@ -3830,7 +3830,7 @@ <counter name="GPU Busy" symbol_name="GpuBusy" underscore_name="gpu_busy" - description="The percentage of time in which the GPU has being processing GPU commands." + description="The percentage of time in which the GPU has been processing GPU commands." data_type="float" max_equation="100" units="percent"
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/tools/intel_noop_drm_shim.c ^
@@ -344,7 +344,8 @@ [DRM_I915_GEM_CONTEXT_GETPARAM] = i915_ioctl_gem_context_getparam, [DRM_I915_GEM_CONTEXT_SETPARAM] = i915_ioctl_noop, [DRM_I915_GEM_EXECBUFFER2] = i915_ioctl_noop, - [DRM_I915_GEM_EXECBUFFER2_WR] = i915_ioctl_noop, + /* [DRM_I915_GEM_EXECBUFFER2_WR] = i915_ioctl_noop, + same value as DRM_I915_GEM_EXECBUFFER2. */ [DRM_I915_GEM_USERPTR] = i915_ioctl_gem_userptr,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_allocator.c ^
@@ -155,10 +155,8 @@ * userptr and send a chunk of it off to the GPU. */ table->fd = os_create_anonymous_file(BLOCK_POOL_MEMFD_SIZE, "state table"); - if (table->fd == -1) { - result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); - goto fail_fd; - } + if (table->fd == -1) + return vk_error(device, VK_ERROR_INITIALIZATION_FAILED); if (!u_vector_init(&table->cleanups, 8, sizeof(struct anv_state_table_cleanup))) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_batch_chain.c ^
@@ -346,7 +346,7 @@ { VkResult result; - struct anv_batch_bo bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(bbo), + struct anv_batch_bo bbo = vk_zalloc(&cmd_buffer->pool->alloc, sizeof(bbo), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (bbo == NULL) return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1795,8 +1795,8 @@ __builtin_ia32_mfence(); for (uint32_t i = 0; i < num_cmd_buffers; i++) { u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) { - for (uint32_t i = 0; i < (bbo)->length; i += CACHELINE_SIZE) - __builtin_ia32_clflush((bbo)->bo->map + i); + for (uint32_t l = 0; l < (bbo)->length; l += CACHELINE_SIZE) + __builtin_ia32_clflush((bbo)->bo->map + l); } } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_blorp.c ^
@@ -1416,7 +1416,6 @@ assert(src_image->vk.samples > 1); assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D); assert(dst_image->vk.samples == 1); - assert(src_image->n_planes == dst_image->n_planes); struct blorp_surf src_surf, dst_surf; get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_cmd_buffer.c ^
@@ -275,7 +275,7 @@ result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk); if (result != VK_SUCCESS) - goto fail; + goto fail_alloc; cmd_buffer->batch.status = VK_SUCCESS; @@ -285,7 +285,7 @@ result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) - goto fail; + goto fail_vk; anv_state_stream_init(&cmd_buffer->surface_state_stream, &device->surface_state_pool, 4096); @@ -306,7 +306,9 @@ return VK_SUCCESS; - fail: + fail_vk: + vk_command_buffer_finish(&cmd_buffer->vk); + fail_alloc: vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer); return result; @@ -1583,7 +1585,7 @@ { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - assert(attachmentCount < MAX_RTS); + assert(attachmentCount <= MAX_RTS); uint8_t color_writes = 0; for (uint32_t i = 0; i < attachmentCount; i++)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_descriptor_set.c ^
@@ -1702,8 +1702,36 @@ copy->srcArrayElement, copy->descriptorCount); } else { - for (uint32_t j = 0; j < copy->descriptorCount; j++) - dst_desc[j] = src_desc[j]; + struct anv_buffer_view dst_bview = + &dst->buffer_views[dst_layout->buffer_view_index + + copy->dstArrayElement]; + struct anv_buffer_view src_bview = + &src->buffer_views[src_layout->buffer_view_index + + copy->srcArrayElement]; + /* If ANV_DESCRIPTOR_BUFFER_VIEW is present in the source descriptor, + * it means we're using an anv_buffer_view allocated by the source + * descriptor set. In that case we want to careful copy it because + * his lifecycle is tied to the source descriptor set, not the + * destination descriptor set. + */ + if (src_layout->data & ANV_DESCRIPTOR_BUFFER_VIEW) { + assert(dst_layout->data & ANV_DESCRIPTOR_BUFFER_VIEW); + for (uint32_t j = 0; j < copy->descriptorCount; j++) { + dst_bview[j].format = src_bview[j].format; + dst_bview[j].range = src_bview[j].range; + dst_bview[j].address = src_bview[j].address; + + memcpy(dst_bview[j].surface_state.map, + src_bview[j].surface_state.map, + src_bview[j].surface_state.alloc_size); + + dst_desc[j].type = src_desc[j].type; + dst_desc[j].buffer_view = &dst_bview[j]; + } + } else { + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; + } unsigned desc_size = anv_descriptor_size(src_layout); if (desc_size > 0) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_device.c ^
@@ -1973,12 +1973,12 @@ scalar_stages \|= mesa_to_vk_shader_stage(stage); } if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) { - scalar_stages \|= MESA_SHADER_RAYGEN \| - MESA_SHADER_ANY_HIT \| - MESA_SHADER_CLOSEST_HIT \| - MESA_SHADER_MISS \| - MESA_SHADER_INTERSECTION \| - MESA_SHADER_CALLABLE; + scalar_stages \|= VK_SHADER_STAGE_RAYGEN_BIT_KHR \| + VK_SHADER_STAGE_ANY_HIT_BIT_KHR \| + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR \| + VK_SHADER_STAGE_MISS_BIT_KHR \| + VK_SHADER_STAGE_INTERSECTION_BIT_KHR \| + VK_SHADER_STAGE_CALLABLE_BIT_KHR; } p->subgroupSupportedStages = scalar_stages; p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT \|
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_image.c ^
@@ -635,20 +635,6 @@ return VK_SUCCESS; } - if (device->info.ver >= 12 && - (image->vk.array_layers > 1 \|\| image->vk.mip_levels)) { - /* HSD 14010672564: On TGL, if a block of fragment shader outputs - * match the surface's clear color, the HW may convert them to - * fast-clears. Anv only does clear color tracking for the first - * slice unfortunately. Disable CCS until anv gains more clear color - * tracking abilities. - / - anv_perf_warn(VK_LOG_OBJS(&image->vk.base), - "HW may put fast-clear blocks on more slices than SW " - "currently tracks. Not allocating a CCS buffer."); - return VK_SUCCESS; - } - if (INTEL_DEBUG(DEBUG_NO_RBC)) return VK_SUCCESS; @@ -2044,6 +2030,20 @@ bool aux_supported = true; bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage); + const struct isl_format_layout fmtl = + isl_format_get_layout(image->planes[plane].primary_surface.isl.format); + + /* Disabling CCS for the following case avoids failures in: + * - dEQP-VK.drm_format_modifiers.export_import.* + * - dEQP-VK.synchronization* + / + if (usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT \| + VK_IMAGE_USAGE_TRANSFER_SRC_BIT) && fmtl->bpb <= 16 && + aux_usage == ISL_AUX_USAGE_CCS_E && devinfo->ver >= 12) { + aux_supported = false; + clear_supported = false; + } + if ((usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && !read_only) { / This image could be used as both an input attachment and a render * target (depth, stencil, or color) at the same time and this can cause @@ -2265,6 +2265,17 @@ case ISL_AUX_STATE_COMPRESSED_CLEAR: if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { return ANV_FAST_CLEAR_DEFAULT_VALUE; + } else if (devinfo->ver >= 12 && + image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { + /* On TGL, if a block of fragment shader outputs match the surface's + * clear color, the HW may convert them to fast-clears (see HSD + * 14010672564). This can lead to rendering corruptions if not + * handled properly. We restrict the clear color to zero to avoid + * issues that can occur with: + * - Texture view rendering (including blorp_copy calls) + * - Images with multiple levels or array layers + / + return ANV_FAST_CLEAR_DEFAULT_VALUE; } else if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { / When we're in a render pass we have the clear color data from the * VkRenderPassBeginInfo and we can use arbitrary clear colors. They
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_pass.c ^
@@ -107,7 +107,11 @@ all_usage \|= subpass_att->usage; - if (pass_att->first_subpass_layout == VK_IMAGE_LAYOUT_UNDEFINED) { + /* first_subpass_layout only applies to color and depth. + * See genX(cmd_buffer_setup_attachments) + */ + if (vk_format_aspects(pass_att->format) != VK_IMAGE_ASPECT_STENCIL_BIT && + pass_att->first_subpass_layout == VK_IMAGE_LAYOUT_UNDEFINED) { pass_att->first_subpass_layout = subpass_att->layout; assert(pass_att->first_subpass_layout != VK_IMAGE_LAYOUT_UNDEFINED); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_pipeline.c ^
@@ -233,6 +233,12 @@ / NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + const nir_opt_access_options opt_access_options = { + .is_vulkan = true, + .infer_non_readable = true, + }; + NIR_PASS_V(nir, nir_opt_access, &opt_access_options); + / Split member structs. We do this before lower_io_to_temporaries so that * it doesn't lower system values to temporaries by accident. / @@ -1157,6 +1163,24 @@ if (deleted_output) nir_fixup_deref_modes(stage->nir); + / Initially the valid outputs value is based off the renderpass color + * attachments (see populate_wm_prog_key()), now that we've potentially + * deleted variables that map to unused attachments, we need to update the + * valid outputs for the backend compiler based on what output variables + * are actually used. / + stage->key.wm.color_outputs_valid = 0; + nir_foreach_shader_out_variable_safe(var, stage->nir) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + const unsigned rt = var->data.location - FRAG_RESULT_DATA0; + const unsigned array_len = + glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; + assert(rt + array_len <= MAX_RTS); + + stage->key.wm.color_outputs_valid \|= BITFIELD_RANGE(rt, array_len); + } + / We stored the number of subpass color attachments in nr_color_regions * when calculating the key for caching. Now that we've computed the bind * map, we can reduce this to the actual max before we go into the back-end @@ -2183,8 +2207,7 @@ } const VkPipelineMultisampleStateCreateInfo ms_info = - pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL : - pCreateInfo->pMultisampleState; + raster_discard ? NULL : pCreateInfo->pMultisampleState; if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { const VkPipelineSampleLocationsStateCreateInfoEXT sl_info = ms_info ? vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL; @@ -2214,8 +2237,7 @@ } if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { - if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && - uses_color_att) { + if (!raster_discard && uses_color_att) { assert(pCreateInfo->pColorBlendState); const VkPipelineColorWriteCreateInfoEXT *color_write_info = vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_private.h ^
@@ -3487,7 +3487,8 @@ \ (struct GFX_BINDLESS_SHADER_RECORD) { \ .OffsetToLocalArguments = (local_arg_offset) / 8, \ - .BindlessShaderDispatchMode = prog_data->simd_size / 16, \ + .BindlessShaderDispatchMode = \ + prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \ .KernelStartPointer = bin->kernel.offset, \ }; \ })
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/genX_cmd_buffer.c ^
@@ -1179,9 +1179,12 @@ UNUSED const uint32_t image_layers = MAX2(image->vk.array_layers, max_depth); assert((uint64_t)base_layer + layer_count <= image_layers); assert(last_level_num <= image->vk.mip_levels); - /* The spec disallows these final layouts. / - assert(final_layout != VK_IMAGE_LAYOUT_UNDEFINED && - final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED); + / If there is a layout transfer, the final layout cannot be undefined or + * preinitialized (VUID-VkImageMemoryBarrier-newLayout-01198). + / + assert(initial_layout == final_layout \|\| + (final_layout != VK_IMAGE_LAYOUT_UNDEFINED && + final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED)); const struct isl_drm_modifier_info isl_mod_info = image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT ? isl_drm_modifier_get_info(image->vk.drm_format_mod) @@ -1410,14 +1413,22 @@ anv_layout_to_aux_usage(devinfo, image, aspect, 0, initial_layout); enum isl_aux_usage final_aux_usage = anv_layout_to_aux_usage(devinfo, image, aspect, 0, final_layout); + enum anv_fast_clear_type initial_fast_clear = + anv_layout_to_fast_clear_type(devinfo, image, aspect, initial_layout); + enum anv_fast_clear_type final_fast_clear = + anv_layout_to_fast_clear_type(devinfo, image, aspect, final_layout); /* We must override the anv_layout_to_* functions because they are unaware of * acquire/release direction. / if (mod_acquire) { initial_aux_usage = isl_mod_info->aux_usage; + initial_fast_clear = isl_mod_info->supports_clear_color ? + initial_fast_clear : ANV_FAST_CLEAR_NONE; } else if (mod_release) { final_aux_usage = isl_mod_info->aux_usage; + final_fast_clear = isl_mod_info->supports_clear_color ? + final_fast_clear : ANV_FAST_CLEAR_NONE; } / The current code assumes that there is no mixing of CCS_E and CCS_D. @@ -1440,10 +1451,6 @@ /* If the initial layout supports more fast clear than the final layout * then we need at least a partial resolve. / - const enum anv_fast_clear_type initial_fast_clear = - anv_layout_to_fast_clear_type(devinfo, image, aspect, initial_layout); - const enum anv_fast_clear_type final_fast_clear = - anv_layout_to_fast_clear_type(devinfo, image, aspect, final_layout); if (final_fast_clear < initial_fast_clear) resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE; @@ -1703,7 +1710,6 @@ else continue; - state->attachments[att].color.state = next_state; next_state.offset += ss_stride; next_state.map += ss_stride; } @@ -1820,7 +1826,7 @@ const struct anv_image_view const iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - if (iview) { + if (iview && (iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { VkImageLayout layout = cmd_buffer->state.subpass->depth_stencil_attachment->layout; @@ -2360,6 +2366,13 @@ bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT; pipe.ConstantCacheInvalidationEnable = bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; +#if GFX_VER >= 12 + /* Invalidates the L3 cache part in which index & vertex data is loaded + * when VERTEX_BUFFER_STATE::L3BypassDisable is set. + / + pipe.L3ReadOnlyCacheInvalidationEnable = + bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT; +#endif pipe.VFCacheInvalidationEnable = bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT; pipe.TextureCacheInvalidationEnable = @@ -3752,8 +3765,10 @@ cmd_buffer_emit_clip(cmd_buffer); - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) - cmd_buffer_emit_streamout(cmd_buffer); + if (pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE \| ANV_CMD_DIRTY_XFB_ENABLE)) + cmd_buffer_emit_streamout(cmd_buffer); + } if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) gfx8_cmd_buffer_emit_viewport(cmd_buffer); @@ -4197,6 +4212,9 @@ genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + if (vs_prog_data->uses_firstvertex \|\| vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); @@ -4231,6 +4249,7 @@ anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.IndirectParameterEnable = true; + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = SEQUENTIAL; prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology; } @@ -6509,6 +6528,7 @@ &cmd_state->attachments[dst_att]; if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) { / MSAA resolves sample from the source attachment. Transition the @@ -6575,6 +6595,7 @@ } if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) { src_state->current_stencil_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/genX_query.c ^
@@ -723,7 +723,6 @@ switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - case VK_QUERY_TYPE_TIMESTAMP: for (uint32_t i = 0; i < queryCount; i++) { emit_query_pc_availability(cmd_buffer, anv_query_address(pool, firstQuery + i), @@ -731,6 +730,23 @@ } break; + case VK_QUERY_TYPE_TIMESTAMP: { + for (uint32_t i = 0; i < queryCount; i++) { + emit_query_pc_availability(cmd_buffer, + anv_query_address(pool, firstQuery + i), + false); + } + + /* Add a CS stall here to make sure the PIPE_CONTROL above has + * completed. Otherwise some timestamps written later with MI_STORE_* + * commands might race with the PIPE_CONTROL in the loop above. + / + anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_CS_STALL_BIT, + "vkCmdResetQueryPool of timestamps"); + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + break; + } + case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: { struct mi_builder b; @@ -1244,6 +1260,7 @@ if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) { mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)), mi_reg64(TIMESTAMP)); + emit_query_mi_availability(&b, query_addr, true); } else { / Everything else is bottom-of-pipe / cmd_buffer->state.pending_pipe_bits \|= ANV_PIPE_POST_SYNC_BIT; @@ -1257,9 +1274,9 @@ if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4) pc.CommandStreamerStallEnable = true; } + emit_query_pc_availability(cmd_buffer, query_addr, true); } - emit_query_pc_availability(cmd_buffer, query_addr, true); / When multiview is active the spec requires that N consecutive query * indices are used, where N is the number of active views in the subpass. @@ -1361,6 +1378,7 @@ */ if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) { anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_TILE_CACHE_FLUSH_BIT \| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, "CopyQueryPoolResults"); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/genX_state.c ^
@@ -156,11 +156,12 @@ init_render_queue_state(struct anv_queue queue) { struct anv_device device = queue->device; - struct anv_batch batch; - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void ) cmds + sizeof(cmds); + struct anv_batch batch = { + .start = cmds, + .next = cmds, + .end = (void ) cmds + sizeof(cmds), + }; anv_batch_emit(&batch, GENX(PIPELINE_SELECT), ps) { #if GFX_VER >= 9
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/loader/meson.build ^
@@ -41,11 +41,19 @@ '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), ] -if get_option('prefer-iris') +_prefer_iris = get_option('prefer-iris') +if _prefer_iris == 'auto' + _prefer_iris = amber ? 'false' : 'true' +endif +if _prefer_iris == 'true' loader_c_args += ['-DPREFER_IRIS'] endif -if get_option('prefer-crocus') +_prefer_crocus = get_option('prefer-crocus') +if _prefer_crocus == 'auto' + _prefer_crocus = 'false' +endif +if _prefer_crocus == 'true' loader_c_args += ['-DPREFER_CROCUS'] endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mapi/glapi/gen/static_data.py ^
@@ -1689,8 +1689,6 @@ "VertexAttribs2hvNV": 1653, "VertexAttribs3hvNV": 1654, "VertexAttribs4hvNV": 1655, - "ClearTexImageEXT": 1656, - "ClearTexSubImageEXT": 1657, } functions = [
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/i915/intel_context.c ^
@@ -111,7 +111,8 @@ __DRIdrawable driDrawable = driContext->driDrawablePriv; __DRIscreen const screen = intel->intelScreen->driScrnPriv; - if (intel->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + if (intel->front_buffer_dirty && ctx->DrawBuffer && + _mesa_is_winsys_fbo(ctx->DrawBuffer)) { if (flushFront(screen) && driDrawable && driDrawable->loaderPrivate) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/i965/brw_context.c ^
@@ -239,7 +239,8 @@ __DRIdrawable driDrawable = driContext->driDrawablePriv; __DRIscreen const dri_screen = brw->screen->driScrnPriv; - if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + if (brw->front_buffer_dirty && ctx->DrawBuffer && + _mesa_is_winsys_fbo(ctx->DrawBuffer)) { if (flushFront(dri_screen) && driDrawable && driDrawable->loaderPrivate) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/i965/brw_extensions.c ^
@@ -87,7 +87,6 @@ ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true; ctx->Extensions.ARB_texture_non_power_of_two = true; ctx->Extensions.ARB_texture_rg = true; - ctx->Extensions.ARB_texture_rgb10_a2ui = true; ctx->Extensions.ARB_vertex_program = true; ctx->Extensions.ARB_vertex_shader = true; ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true; @@ -112,7 +111,6 @@ ctx->Extensions.EXT_texture_array = true; ctx->Extensions.EXT_texture_env_dot3 = true; ctx->Extensions.EXT_texture_filter_anisotropic = true; - ctx->Extensions.EXT_texture_integer = true; ctx->Extensions.EXT_texture_norm16 = true; ctx->Extensions.EXT_texture_shared_exponent = true; ctx->Extensions.EXT_texture_snorm = true; @@ -202,8 +200,10 @@ ctx->Extensions.ARB_texture_cube_map_array = true; ctx->Extensions.ARB_texture_gather = true; ctx->Extensions.ARB_texture_multisample = true; + ctx->Extensions.ARB_texture_rgb10_a2ui = true; ctx->Extensions.ARB_uniform_buffer_object = true; ctx->Extensions.EXT_gpu_shader4 = true; + ctx->Extensions.EXT_texture_integer = true; ctx->Extensions.EXT_texture_shadow_lod = true; if (ctx->API != API_OPENGL_COMPAT \|\|
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/meson.build ^
@@ -51,7 +51,7 @@ if _dri_drivers != [] libmesa_dri_drivers = shared_library( - 'mesa_dri_drivers', + '@0@_dri_drivers'.format(glvnd_vendor_name), [], link_whole : _dri_drivers, link_with : [
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/nouveau/nouveau_driver.c ^
@@ -71,7 +71,7 @@ PUSH_KICK(push); - if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && + if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer) && ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { __DRIscreen screen = nctx->screen->dri_screen; const __DRIdri2LoaderExtension dri2 = screen->dri2.loader;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/radeon/radeon_common.c ^
@@ -544,7 +544,8 @@ rcommonFlushCmdBuf(radeon, __func__); flush_front: - if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && radeon->front_buffer_dirty) { + if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer) && + radeon->front_buffer_dirty) { __DRIscreen *const screen = radeon->radeonScreen->driScreen; if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/dlist.c ^
@@ -816,6 +816,7 @@ free(node->cold->current_data); node->cold->current_data = NULL; + free(node->cold->prims); free(node->cold); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/fbobject.c ^
@@ -5192,9 +5192,19 @@ discard_framebuffer(struct gl_context ctx, struct gl_framebuffer fb, GLsizei numAttachments, const GLenum attachments) { + GLenum depth_att, stencil_att; + if (!ctx->Driver.DiscardFramebuffer) return; + if (_mesa_is_user_fbo(fb)) { + depth_att = GL_DEPTH_ATTACHMENT; + stencil_att = GL_STENCIL_ATTACHMENT; + } else { + depth_att = GL_DEPTH; + stencil_att = GL_STENCIL; + } + for (int i = 0; i < numAttachments; i++) { struct gl_renderbuffer_attachment att = get_fb_attachment(ctx, fb, attachments[i]); @@ -5207,12 +5217,12 @@ * Driver.DiscardFramebuffer if the attachments list includes both depth * and stencil and they both point at the same renderbuffer. */ - if ((attachments[i] == GL_DEPTH_ATTACHMENT \|\| - attachments[i] == GL_STENCIL_ATTACHMENT) && + if ((attachments[i] == depth_att \|\| + attachments[i] == stencil_att) && (!att->Renderbuffer \|\| att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL)) { - GLenum other_format = (attachments[i] == GL_DEPTH_ATTACHMENT ? - GL_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); + GLenum other_format = (attachments[i] == depth_att ? + stencil_att : depth_att); bool has_both = false; for (int j = 0; j < numAttachments; j++) { if (attachments[j] == other_format) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/ffvertex_prog.c ^
@@ -918,19 +918,19 @@ static struct ureg get_lightprod( struct tnl_program p, GLuint light, - GLuint side, GLuint property ) + GLuint side, GLuint property, bool is_state_light ) { GLuint attrib = material_attrib(side, property); if (p->materials & (1<<attrib)) { struct ureg light_value = register_param3(p, STATE_LIGHT, light, property); - struct ureg material_value = get_material(p, side, property); - struct ureg tmp = get_temp(p); - emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); - return tmp; + is_state_light = true; + return light_value; } - else + else { + is_state_light = false; return register_param3(p, STATE_LIGHTPROD, light, attrib); + } } @@ -1112,20 +1112,28 @@ / struct ureg lightprod_front[MAX_LIGHTS][3]; struct ureg lightprod_back[MAX_LIGHTS][3]; + bool lightprod_front_is_state_light[MAX_LIGHTS][3]; + bool lightprod_back_is_state_light[MAX_LIGHTS][3]; for (i = 0; i < MAX_LIGHTS; i++) { if (p->state->unit[i].light_enabled) { - lightprod_front[i][0] = get_lightprod(p, i, 0, STATE_AMBIENT); + lightprod_front[i][0] = get_lightprod(p, i, 0, STATE_AMBIENT, + &lightprod_front_is_state_light[i][0]); if (twoside) - lightprod_back[i][0] = get_lightprod(p, i, 1, STATE_AMBIENT); + lightprod_back[i][0] = get_lightprod(p, i, 1, STATE_AMBIENT, + &lightprod_back_is_state_light[i][0]); - lightprod_front[i][1] = get_lightprod(p, i, 0, STATE_DIFFUSE); + lightprod_front[i][1] = get_lightprod(p, i, 0, STATE_DIFFUSE, + &lightprod_front_is_state_light[i][1]); if (twoside) - lightprod_back[i][1] = get_lightprod(p, i, 1, STATE_DIFFUSE); + lightprod_back[i][1] = get_lightprod(p, i, 1, STATE_DIFFUSE, + &lightprod_back_is_state_light[i][1]); - lightprod_front[i][2] = get_lightprod(p, i, 0, STATE_SPECULAR); + lightprod_front[i][2] = get_lightprod(p, i, 0, STATE_SPECULAR, + &lightprod_front_is_state_light[i][2]); if (twoside) - lightprod_back[i][2] = get_lightprod(p, i, 1, STATE_SPECULAR); + lightprod_back[i][2] = get_lightprod(p, i, 1, STATE_SPECULAR, + &lightprod_back_is_state_light[i][2]); } } @@ -1209,6 +1217,18 @@ / Front face lighting: / { + / Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in + * get_lightprod to avoid using too many temps. + / + for (int j = 0; j < 3; j++) { + if (lightprod_front_is_state_light[i][j]) { + struct ureg material_value = get_material(p, 0, STATE_AMBIENT + j); + struct ureg tmp = get_temp(p); + emit_op2(p, OPCODE_MUL, tmp, 0, lightprod_front[i][j], material_value); + lightprod_front[i][j] = tmp; + } + } + struct ureg ambient = lightprod_front[i][0]; struct ureg diffuse = lightprod_front[i][1]; struct ureg specular = lightprod_front[i][2]; @@ -1264,6 +1284,18 @@ / Back face lighting: / if (twoside) { + / Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in + * get_lightprod to avoid using too many temps. + */ + for (int j = 0; j < 3; j++) { + if (lightprod_back_is_state_light[i][j]) { + struct ureg material_value = get_material(p, 1, STATE_AMBIENT + j); + struct ureg tmp = get_temp(p); + emit_op2(p, OPCODE_MUL, tmp, 1, lightprod_back[i][j], material_value); + lightprod_back[i][j] = tmp; + } + } + struct ureg ambient = lightprod_back[i][0]; struct ureg diffuse = lightprod_back[i][1]; struct ureg specular = lightprod_back[i][2];
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/program_resource.c ^
@@ -97,7 +97,6 @@ _mesa_enum_to_string(pname), params); } - unsigned i; struct gl_shader_program shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetProgramInterfaceiv"); @@ -117,125 +116,7 @@ return; } - / Validate pname against interface. / - switch(pname) { - case GL_ACTIVE_RESOURCES: - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) - if (shProg->data->ProgramResourceList[i].Type == programInterface) - (params)++; - break; - case GL_MAX_NAME_LENGTH: - if (programInterface == GL_ATOMIC_COUNTER_BUFFER \|\| - programInterface == GL_TRANSFORM_FEEDBACK_BUFFER) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetProgramInterfaceiv(%s pname %s)", - _mesa_enum_to_string(programInterface), - _mesa_enum_to_string(pname)); - return; - } - / Name length consists of base name, 3 additional chars '[0]' if - * resource is an array and finally 1 char for string terminator. - / - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { - if (shProg->data->ProgramResourceList[i].Type != programInterface) - continue; - unsigned len = - _mesa_program_resource_name_len(&shProg->data->ProgramResourceList[i]); - params = MAX2(params, len + 1); - } - break; - case GL_MAX_NUM_ACTIVE_VARIABLES: - switch (programInterface) { - case GL_UNIFORM_BLOCK: - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { - if (shProg->data->ProgramResourceList[i].Type == programInterface) { - struct gl_uniform_block block = - (struct gl_uniform_block ) - shProg->data->ProgramResourceList[i].Data; - params = MAX2(params, block->NumUniforms); - } - } - break; - case GL_SHADER_STORAGE_BLOCK: - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { - if (shProg->data->ProgramResourceList[i].Type == programInterface) { - struct gl_uniform_block block = - (struct gl_uniform_block ) - shProg->data->ProgramResourceList[i].Data; - GLint block_params = 0; - for (unsigned j = 0; j < block->NumUniforms; j++) { - struct gl_program_resource uni = - _mesa_program_resource_find_active_variable( - shProg, - GL_BUFFER_VARIABLE, - block, - j); - if (!uni) - continue; - block_params++; - } - params = MAX2(params, block_params); - } - } - break; - case GL_ATOMIC_COUNTER_BUFFER: - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { - if (shProg->data->ProgramResourceList[i].Type == programInterface) { - struct gl_active_atomic_buffer buffer = - (struct gl_active_atomic_buffer ) - shProg->data->ProgramResourceList[i].Data; - params = MAX2(params, buffer->NumUniforms); - } - } - break; - case GL_TRANSFORM_FEEDBACK_BUFFER: - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { - if (shProg->data->ProgramResourceList[i].Type == programInterface) { - struct gl_transform_feedback_buffer buffer = - (struct gl_transform_feedback_buffer ) - shProg->data->ProgramResourceList[i].Data; - params = MAX2(params, buffer->NumVaryings); - } - } - break; - default: - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetProgramInterfaceiv(%s pname %s)", - _mesa_enum_to_string(programInterface), - _mesa_enum_to_string(pname)); - } - break; - case GL_MAX_NUM_COMPATIBLE_SUBROUTINES: - switch (programInterface) { - case GL_VERTEX_SUBROUTINE_UNIFORM: - case GL_FRAGMENT_SUBROUTINE_UNIFORM: - case GL_GEOMETRY_SUBROUTINE_UNIFORM: - case GL_COMPUTE_SUBROUTINE_UNIFORM: - case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: - case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: { - for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { - if (shProg->data->ProgramResourceList[i].Type == programInterface) { - struct gl_uniform_storage uni = - (struct gl_uniform_storage ) - shProg->data->ProgramResourceList[i].Data; - params = MAX2(params, uni->num_compatible_subroutines); - } - } - break; - } - - default: - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetProgramInterfaceiv(%s pname %s)", - _mesa_enum_to_string(programInterface), - _mesa_enum_to_string(pname)); - } - break; - default: - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetProgramInterfaceiv(pname %s)", - _mesa_enum_to_string(pname)); - } + _mesa_get_program_interfaceiv(shProg, programInterface, pname, params); } static bool
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shader_query.cpp ^
@@ -1668,6 +1668,135 @@ length = amount; } +extern void +_mesa_get_program_interfaceiv(struct gl_shader_program shProg, + GLenum programInterface, GLenum pname, + GLint params) +{ + GET_CURRENT_CONTEXT(ctx); + unsigned i; + + / Validate pname against interface. / + switch(pname) { + case GL_ACTIVE_RESOURCES: + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) + if (shProg->data->ProgramResourceList[i].Type == programInterface) + (params)++; + break; + case GL_MAX_NAME_LENGTH: + if (programInterface == GL_ATOMIC_COUNTER_BUFFER \|\| + programInterface == GL_TRANSFORM_FEEDBACK_BUFFER) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramInterfaceiv(%s pname %s)", + _mesa_enum_to_string(programInterface), + _mesa_enum_to_string(pname)); + return; + } + / Name length consists of base name, 3 additional chars '[0]' if + * resource is an array and finally 1 char for string terminator. + / + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { + if (shProg->data->ProgramResourceList[i].Type != programInterface) + continue; + unsigned len = + _mesa_program_resource_name_len(&shProg->data->ProgramResourceList[i]); + params = MAX2((unsigned)params, len + 1); + } + break; + case GL_MAX_NUM_ACTIVE_VARIABLES: + switch (programInterface) { + case GL_UNIFORM_BLOCK: + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { + if (shProg->data->ProgramResourceList[i].Type == programInterface) { + struct gl_uniform_block block = + (struct gl_uniform_block ) + shProg->data->ProgramResourceList[i].Data; + params = MAX2((unsigned)params, block->NumUniforms); + } + } + break; + case GL_SHADER_STORAGE_BLOCK: + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { + if (shProg->data->ProgramResourceList[i].Type == programInterface) { + struct gl_uniform_block block = + (struct gl_uniform_block ) + shProg->data->ProgramResourceList[i].Data; + GLint block_params = 0; + for (unsigned j = 0; j < block->NumUniforms; j++) { + struct gl_program_resource uni = + _mesa_program_resource_find_active_variable( + shProg, + GL_BUFFER_VARIABLE, + block, + j); + if (!uni) + continue; + block_params++; + } + params = MAX2(params, block_params); + } + } + break; + case GL_ATOMIC_COUNTER_BUFFER: + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { + if (shProg->data->ProgramResourceList[i].Type == programInterface) { + struct gl_active_atomic_buffer buffer = + (struct gl_active_atomic_buffer ) + shProg->data->ProgramResourceList[i].Data; + params = MAX2((unsigned)params, buffer->NumUniforms); + } + } + break; + case GL_TRANSFORM_FEEDBACK_BUFFER: + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { + if (shProg->data->ProgramResourceList[i].Type == programInterface) { + struct gl_transform_feedback_buffer buffer = + (struct gl_transform_feedback_buffer ) + shProg->data->ProgramResourceList[i].Data; + params = MAX2((unsigned)params, buffer->NumVaryings); + } + } + break; + default: + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramInterfaceiv(%s pname %s)", + _mesa_enum_to_string(programInterface), + _mesa_enum_to_string(pname)); + } + break; + case GL_MAX_NUM_COMPATIBLE_SUBROUTINES: + switch (programInterface) { + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: { + for (i = 0, params = 0; i < shProg->data->NumProgramResourceList; i++) { + if (shProg->data->ProgramResourceList[i].Type == programInterface) { + struct gl_uniform_storage uni = + (struct gl_uniform_storage ) + shProg->data->ProgramResourceList[i].Data; + params = MAX2((unsigned)params, uni->num_compatible_subroutines); + } + } + break; + } + + default: + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramInterfaceiv(%s pname %s)", + _mesa_enum_to_string(programInterface), + _mesa_enum_to_string(pname)); + } + break; + default: + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramInterfaceiv(pname %s)", + _mesa_enum_to_string(pname)); + } +} + static bool validate_io(struct gl_program producer, struct gl_program consumer) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shaderapi.c ^
@@ -767,13 +767,8 @@ params = _mesa_longest_attribute_name_length(shProg); return; case GL_ACTIVE_UNIFORMS: { - unsigned i; - const unsigned num_uniforms = - shProg->data->NumUniformStorage - shProg->data->NumHiddenUniforms; - for (params = 0, i = 0; i < num_uniforms; i++) { - if (!shProg->data->UniformStorage[i].is_shader_storage) - (*params)++; - } + _mesa_get_program_interfaceiv(shProg, GL_UNIFORM, GL_ACTIVE_RESOURCES, + params); return; } case GL_ACTIVE_UNIFORM_MAX_LENGTH: {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shaderapi.h ^
@@ -335,6 +335,11 @@ GLint params); extern void +_mesa_get_program_interfaceiv(struct gl_shader_program shProg, + GLenum programInterface, GLenum pname, + GLint params); + +extern void _mesa_create_program_resource_hash(struct gl_shader_program shProg); /* GL_ARB_tessellation_shader */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shared.c ^
@@ -357,6 +357,7 @@ _mesa_HashDeleteAll(shared->DisplayList, delete_displaylist_cb, ctx); _mesa_DeleteHashTable(shared->DisplayList); free(shared->small_dlist_store.ptr); + util_idalloc_fini(&shared->small_dlist_store.free_idx); } if (shared->BitmapAtlas) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/state_tracker/st_context.c ^
@@ -555,10 +555,9 @@ f->NewDepthClamp = ST_NEW_RASTERIZER; } + f->NewClipPlaneEnable = ST_NEW_RASTERIZER; if (st->lower_ucp) - f->NewClipPlaneEnable = ST_NEW_VS_STATE \| ST_NEW_GS_STATE; - else - f->NewClipPlaneEnable = ST_NEW_RASTERIZER; + f->NewClipPlaneEnable \|= ST_NEW_VS_STATE \| ST_NEW_GS_STATE; f->NewLineState = ST_NEW_RASTERIZER; f->NewPolygonState = ST_NEW_RASTERIZER;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/state_tracker/st_format.c ^
@@ -110,23 +110,12 @@ } if (st_astc_format_fallback(st, mesaFormat)) { - const struct util_format_description desc = - util_format_description(mesaFormat); - if (_mesa_is_format_srgb(mesaFormat)) { - if (!st->transcode_astc) - return PIPE_FORMAT_R8G8B8A8_SRGB; - else if (desc->block.width desc->block.height < 32) - return PIPE_FORMAT_DXT5_SRGBA; - else - return PIPE_FORMAT_DXT1_SRGBA; + return st->transcode_astc ? PIPE_FORMAT_DXT5_SRGBA : + PIPE_FORMAT_R8G8B8A8_SRGB; } else { - if (!st->transcode_astc) - return PIPE_FORMAT_R8G8B8A8_UNORM; - else if (desc->block.width * desc->block.height < 32) - return PIPE_FORMAT_DXT5_RGBA; - else - return PIPE_FORMAT_DXT1_RGBA; + return st->transcode_astc ? PIPE_FORMAT_DXT5_RGBA : + PIPE_FORMAT_R8G8B8A8_UNORM; } }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/vbo/vbo_save_api.c ^
@@ -117,6 +117,7 @@ #include "util/bitscan.h" #include "util/u_memory.h" #include "util/hash_table.h" +#include "util/u_prim.h" #include "gallium/include/pipe/p_state.h" @@ -607,9 +608,7 @@ node->cold->max_index = end - 1; int max_index_count = total_vert_count * 2; - - int size = max_index_count * sizeof(uint32_t); - uint32_t* indices = (uint32_t) malloc(size); + uint32_t indices = (uint32_t) malloc(max_index_count sizeof(uint32_t)); struct _mesa_prim merged_prims = NULL; int idx = 0; @@ -637,6 +636,12 @@ continue; } + / Increase indices storage if the original estimation was too small. / + if (idx + 3 vertex_count > max_index_count) { + max_index_count = max_index_count + 3 * vertex_count; + indices = (uint32_t) realloc(indices, max_index_count sizeof(uint32_t)); + } + /* Line strips may get converted to lines / if (mode == GL_LINE_STRIP) mode = GL_LINES; @@ -701,6 +706,14 @@ } } + / Duplicate the last vertex for incomplete primitives / + unsigned min_vert = u_prim_vertex_count(mode)->min; + for (unsigned j = vertex_count; j < min_vert; j++) { + indices[idx++] = add_vertex(save, vertex_to_index, + original_prims[i].start + vertex_count - 1, + temp_vertices_buffer, &max_index); + } + if (merge_prims) { / Update vertex count. / merged_prims[last_valid_prim].count += idx - start; @@ -813,12 +826,14 @@ free(temp_vertices_buffer); } - / Since we're append the indices to an existing buffer, we need to adjust the start value of each + /* Since we append the indices to an existing buffer, we need to adjust the start value of each * primitive (not the indices themselves). / - save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used; - int indices_offset = save->current_bo_bytes_used / 4; - for (int i = 0; i < merged_prim_count; i++) { - merged_prims[i].start += indices_offset; + if (!ctx->ListState.Current.UseLoopback) { + save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used; + int indices_offset = save->current_bo_bytes_used / 4; + for (int i = 0; i < merged_prim_count; i++) { + merged_prims[i].start += indices_offset; + } } / Then upload the indices. / @@ -933,20 +948,16 @@ _glapi_set_dispatch(ctx->Exec); / _vbo_loopback_vertex_list doesn't use the index buffer, so we have to - * use buffer_in_ram instead of current_bo which contains all vertices instead - * of the deduplicated vertices only in the !UseLoopback case. + * use buffer_in_ram (which contains all vertices) instead of current_bo + * (which contains deduplicated vertices when UseLoopback is false). * * The problem is that the VAO offset is based on current_bo's layout, * so we have to use a temp value. / struct gl_vertex_array_object vao = node->VAO[VP_MODE_SHADER]; GLintptr original = vao->BufferBinding[0].Offset; - if (!ctx->ListState.Current.UseLoopback) { - GLintptr new_offset = 0; - /* 'start_offset' has been added to all primitives 'start', so undo it here. / - new_offset -= start_offset stride; - vao->BufferBinding[0].Offset = new_offset; - } + /* 'start_offset' has been added to all primitives 'start', so undo it here. / + vao->BufferBinding[0].Offset = -(GLintptr)(start_offset stride); _vbo_loopback_vertex_list(ctx, node, save->vertex_store->buffer_in_ram); vao->BufferBinding[0].Offset = original;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/microsoft/compiler/dxil_nir.c ^
@@ -1386,10 +1386,10 @@ int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); if (sampler_idx == -1) { /* No derefs, must be using indices / - struct hash_entry hash_entry = _mesa_hash_table_u64_search(data, tex->sampler_index); + nir_variable bare_sampler = _mesa_hash_table_u64_search(data, tex->sampler_index); / Already have a bare sampler here / - if (hash_entry) + if (bare_sampler) return false; nir_variable typed_sampler = NULL; @@ -1408,7 +1408,7 @@ /* Clone the typed sampler to a bare sampler and we're done / assert(typed_sampler); - nir_variable bare_sampler = nir_variable_clone(typed_sampler, b->shader); + bare_sampler = nir_variable_clone(typed_sampler, b->shader); bare_sampler->type = get_bare_samplers_for_type(typed_sampler->type); nir_shader_add_variable(b->shader, bare_sampler); _mesa_hash_table_u64_insert(data, tex->sampler_index, bare_sampler); @@ -1428,11 +1428,8 @@ return false; } - struct hash_entry hash_entry = _mesa_hash_table_u64_search(data, old_var->data.binding); - nir_variable new_var; - if (hash_entry) { - new_var = hash_entry->data; - } else { + nir_variable *new_var = _mesa_hash_table_u64_search(data, old_var->data.binding); + if (!new_var) { new_var = nir_variable_clone(old_var, b->shader); new_var->type = get_bare_samplers_for_type(old_var->type); nir_shader_add_variable(b->shader, new_var);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/microsoft/compiler/meson.build ^
@@ -41,7 +41,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) libdxil_compiler = static_library(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/microsoft/compiler/nir_to_dxil.c ^
@@ -2112,6 +2112,12 @@ case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]); case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]); case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]); + case nir_op_inot: { + unsigned bit_size = alu->dest.dest.ssa.bit_size; + intmax_t val = bit_size == 1 ? 1 : -1; + const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size); + return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one); + } case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]); case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]); case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bi_lower_swizzle.c ^
@@ -50,6 +50,19 @@ * derivatives, which might require swizzle lowering / case BI_OPCODE_CLPER_I32: case BI_OPCODE_CLPER_V6_I32: + + / Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the + * boolean is implemented as a 16-bit integer, the swizzle is needed + * for correct operation if the instruction producing the 16-bit + * boolean does not replicate to both halves of the containing 32-bit + * register. As such, we may need to lower a swizzle. + * + * This is a silly hack. Ideally, code gen would be smart enough to + * avoid this case (by replicating). In practice, silly hardware design + * decisions force our hand here. + */ + case BI_OPCODE_MUX_I32: + case BI_OPCODE_CSEL_I32: break; case BI_OPCODE_IADD_V2S16:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bi_opt_mod_props.c ^
@@ -25,6 +25,18 @@ #include "compiler.h" #include "bi_builder.h" +/* + * Due to a Bifrost encoding restriction, some instructions cannot have an abs + * modifier on both sources. Check if adding a fabs modifier to a given source + * of a binary instruction would cause this restriction to be hit. + / +static bool +bi_would_impact_abs(unsigned arch, bi_instr I, bi_index repl, unsigned s) +{ + return (arch <= 8) && I->src[1 - s].abs && + bi_is_word_equiv(I->src[1 - s], repl); +} + static bool bi_takes_fabs(unsigned arch, bi_instr I, bi_index repl, unsigned s) { @@ -32,9 +44,15 @@ case BI_OPCODE_FCMP_V2F16: case BI_OPCODE_FMAX_V2F16: case BI_OPCODE_FMIN_V2F16: - / Bifrost encoding restriction: can't have both abs if equal sources / - return !(arch <= 8 && I->src[1 - s].abs - && bi_is_word_equiv(I->src[1 - s], repl)); + return !bi_would_impact_abs(arch, I, repl, s); + case BI_OPCODE_FADD_V2F16: + / + * For FADD.v2f16, the FMA pipe has the abs encoding hazard, + * while the FADD pipe cannot encode a clamp. Either case in + * isolation can be worked around in the scheduler, but both + * together is impossible to encode. Avoid the hazard. + / + return !(I->clamp && bi_would_impact_abs(arch, I, repl, s)); case BI_OPCODE_V2F32_TO_V2F16: / TODO: Needs both match or lower / return false; @@ -182,6 +200,10 @@ case BI_OPCODE_FMA_RSCALE_V2F16: case BI_OPCODE_FADD_RSCALE_F32: return false; + case BI_OPCODE_FADD_V2F16: + / Encoding restriction */ + return !(I->src[0].abs && I->src[1].abs && + bi_is_word_equiv(I->src[0], I->src[1])); default: return bi_opcode_props[I->op].clamp; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bi_schedule.c ^
@@ -197,7 +197,7 @@ } static void -bi_create_dependency_graph(struct bi_worklist st, bool inorder) +bi_create_dependency_graph(struct bi_worklist st, bool inorder, bool is_blend) { struct util_dynarray last_read[64], last_write[64]; @@ -262,6 +262,17 @@ } } + /* Blend shaders are allowed to clobber R0-R15. Treat these + * registers like extra destinations for scheduling purposes. + / + if (ins->op == BI_OPCODE_BLEND && !is_blend) { + for (unsigned c = 0; c < 16; ++c) { + add_dependency(last_read, c, i, st.dependents, st.dep_counts); + add_dependency(last_write, c, i, st.dependents, st.dep_counts); + mark_access(last_write, c, i); + } + } + bi_foreach_src(ins, s) { if (ins->src[s].type != BI_INDEX_REGISTER) continue; @@ -414,7 +425,7 @@ / static struct bi_worklist -bi_initialize_worklist(bi_block block, bool inorder) +bi_initialize_worklist(bi_block block, bool inorder, bool is_blend) { struct bi_worklist st = { }; st.instructions = bi_flatten_block(block, &st.count); @@ -425,7 +436,7 @@ st.dependents = calloc(st.count, sizeof(st.dependents[0])); st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0])); - bi_create_dependency_graph(st, inorder); + bi_create_dependency_graph(st, inorder, is_blend); st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD)); for (unsigned i = 0; i < st.count; ++i) { @@ -479,6 +490,18 @@ ins->src[1].swizzle == BI_SWIZZLE_H01); } +/* + * The encoding of FADD.v2f16 only specifies a single abs flag. All abs + encodings are permitted by swapping operands; however, this scheme fails if + * both operands are equal. Test for this case. + / +static bool +bi_impacted_abs(bi_instr I) +{ + return I->src[0].abs && I->src[1].abs && + bi_is_word_equiv(I->src[0], I->src[1]); +} + bool bi_can_fma(bi_instr ins) { @@ -486,6 +509,10 @@ if (bi_can_iaddc(ins)) return true; + / FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead / + if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins)) + return false; + /* TODO: some additional fp16 constraints / return bi_opcode_props[ins->op].fma; } @@ -972,16 +999,21 @@ same clause (most likely they will not), so if a later instruction * in the clause accesses the destination, the message-passing * instruction can't be scheduled / - if (bi_opcode_props[instr->op].sr_write && !bi_is_null(instr->dest[0])) { - unsigned nr = bi_count_write_registers(instr, 0); - assert(instr->dest[0].type == BI_INDEX_REGISTER); - unsigned reg = instr->dest[0].value; + if (bi_opcode_props[instr->op].sr_write) { + bi_foreach_dest(instr, d) { + if (bi_is_null(instr->dest[d])) + continue; - for (unsigned i = 0; i < clause->access_count; ++i) { - bi_index idx = clause->accesses[i]; - for (unsigned d = 0; d < nr; ++d) { - if (bi_is_equiv(bi_register(reg + d), idx)) - return false; + unsigned nr = bi_count_write_registers(instr, d); + assert(instr->dest[d].type == BI_INDEX_REGISTER); + unsigned reg = instr->dest[d].value; + + for (unsigned i = 0; i < clause->access_count; ++i) { + bi_index idx = clause->accesses[i]; + for (unsigned d = 0; d < nr; ++d) { + if (bi_is_equiv(bi_register(reg + d), idx)) + return false; + } } } } @@ -1796,7 +1828,8 @@ / Copy list to dynamic array */ struct bi_worklist st = bi_initialize_worklist(block, - bifrost_debug & BIFROST_DBG_INORDER); + bifrost_debug & BIFROST_DBG_INORDER, + ctx->inputs->is_blend); if (!st.count) { bi_free_worklist(st);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bifrost_compile.c ^
@@ -1420,7 +1420,9 @@ uint32_t acc = 0; for (unsigned i = 0; i < instr->def.num_components; ++i) { - unsigned v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size); + uint32_t v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size); + + v = bi_extend_constant(v, instr->def.bit_size); acc \|= (v << (i * instr->def.bit_size)); } @@ -2615,6 +2617,7 @@ for (unsigned i = 0; i < instr->num_srcs; ++i) { bi_index index = bi_src_index(&instr->src[i].src); unsigned sz = nir_src_bit_size(instr->src[i].src); + unsigned components = nir_src_num_components(instr->src[i].src); ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i); nir_alu_type T = base \| sz; @@ -2623,27 +2626,25 @@ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { cx = bi_emit_texc_cube_coord(b, index, &cy); } else { - unsigned components = nir_src_num_components(instr->src[i].src); - /* Copy XY (for 2D+) or XX (for 1D) / cx = index; cy = bi_word(index, MIN2(1, components - 1)); assert(components >= 1 && components <= 3); - if (components < 3) { - / nothing to do / - } else if (desc.array) { - / 2D array / - dregs[BIFROST_TEX_DREG_ARRAY] = - bi_emit_texc_array_index(b, - bi_word(index, 2), T); - } else { + if (components == 3 && !desc.array) { / 3D / dregs[BIFROST_TEX_DREG_Z_COORD] = bi_word(index, 2); } } + + if (desc.array) { + dregs[BIFROST_TEX_DREG_ARRAY] = + bi_emit_texc_array_index(b, + bi_word(index, components - 1), T); + } + break; case nir_tex_src_lod: @@ -3832,7 +3833,7 @@ / TODO: pack flat */ } - info->ubo_mask = ctx->ubo_mask & BITSET_MASK(ctx->nir->info.num_ubos); + info->ubo_mask = ctx->ubo_mask & ((1 << ctx->nir->info.num_ubos) - 1); if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) { disassemble_bifrost(stdout, binary->data, binary->size,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/compiler.h ^
@@ -701,6 +701,19 @@ return bi_get_index(ctx->reg_alloc++, true, 0); } +/* NIR booleans are 1-bit (0/1). For now, backend IR booleans are N-bit + * (0/~0) where N depends on the context. This requires us to sign-extend + * when converting constants from NIR to the backend IR. + / +static inline uint32_t +bi_extend_constant(uint32_t constant, unsigned bit_size) +{ + if (bit_size == 1 && constant != 0) + return ~0; + else + return constant; +} + / Inline constants automatically, will be lowered out by bi_lower_fau where a * constant is not allowed. load_const_to_scalar gaurantees that this makes * sense / @@ -708,11 +721,13 @@ static inline bi_index bi_src_index(nir_src src) { - if (nir_src_is_const(src) && nir_src_bit_size(src) <= 32) - return bi_imm_u32(nir_src_as_uint(src)); - else if (src->is_ssa) + if (nir_src_is_const(src) && nir_src_bit_size(src) <= 32) { + uint32_t v = nir_src_as_uint(src); + + return bi_imm_u32(bi_extend_constant(v, nir_src_bit_size(*src))); + } else if (src->is_ssa) { return bi_get_index(src->ssa->index, false, 0); - else { + } else { assert(!src->reg.indirect); return bi_get_index(src->reg.reg->index, true, 0); }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/gen_disasm.py ^
@@ -238,7 +238,7 @@ key_set = find_context_keys(desc, test) ordered = 'ordering' in key_set key_set.discard('ordering') - keys = list(key_set) + keys = sorted(list(key_set)) # Evaluate the deriveds for every possible state, forming a (state -> deriveds) map testf = compile_derived(test, keys)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/meson.build ^
@@ -116,7 +116,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) libpanfrost_bifrost_disasm = static_library(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/valhall/disassemble.h ^
@@ -13,7 +13,6 @@ #define MASK(count) ((1ull << (count)) - 1) #define SEXT(b, count) ((b ^ BIT(count - 1)) - BIT(count - 1)) #define UNUSED __attribute__((unused)) -static inline float fui(uint32_t u) { float f; memcpy(&f, &u, 4); return f; } #define VA_SRC_UNIFORM_TYPE 0x2 #define VA_SRC_IMM_TYPE 0x3
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/ci/panfrost-g52-fails.txt ^
@@ -21,7 +21,6 @@ shaders@glsl-bug-110796,Fail shaders@glsl-uniform-interstage-limits@subdivide 5,Crash shaders@glsl-uniform-interstage-limits@subdivide 5- statechanges,Crash -shaders@glsl-vs-if-bool,Fail shaders@point-vertex-id divisor,Fail shaders@point-vertex-id gl_instanceid divisor,Fail shaders@point-vertex-id gl_instanceid,Fail @@ -71,56 +70,23 @@ spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-copypixels,Fail spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-drawpixels,Fail spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-readpixels,Fail -spec@arb_depth_buffer_float@texwrap formats bordercolor,Fail -spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH32F_STENCIL8- border color only,Fail -spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH_COMPONENT32F- border color only,Fail -spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled,Fail -spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH32F_STENCIL8- swizzled- border color only,Fail -spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32F- swizzled- border color only,Fail spec@arb_depth_clamp@depth-clamp-range,Fail -spec@arb_depth_texture@texwrap formats bordercolor,Fail -spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT16- border color only,Fail -spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT24- border color only,Fail -spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT32- border color only,Fail -spec@arb_depth_texture@texwrap formats bordercolor-swizzled,Fail -spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT16- swizzled- border color only,Fail -spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT24- swizzled- border color only,Fail -spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32- swizzled- border color only,Fail spec@arb_direct_state_access@gettextureimage-formats,Crash spec@arb_direct_state_access@gettextureimage-formats init-by-rendering,Fail -spec@arb_direct_state_access@texture-buffer,Fail spec@arb_draw_buffers@fbo-mrt-new-bind,Crash spec@arb_es2_compatibility@fbo-blending-formats,Fail spec@arb_es2_compatibility@fbo-blending-formats@GL_RGB565,Fail -spec@arb_es2_compatibility@texwrap formats bordercolor,Fail -spec@arb_es2_compatibility@texwrap formats bordercolor@GL_RGB565- border color only,Fail -spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail -spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail -spec@arb_fragment_program@fp-fragment-position,Crash -spec@arb_fragment_program@sparse-samplers,Crash spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit depth gl_depth32f_stencil8,Fail spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit depth_stencil gl_depth32f_stencil8,Fail spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_depth32f_stencil8,Fail spec@arb_framebuffer_object@fbo-luminance-alpha,Fail spec@arb_framebuffer_srgb@fbo-fast-clear,Fail spec@arb_get_program_binary@restore-sso-program,Fail -spec@arb_get_texture_sub_image@arb_get_texture_sub_image-getcompressed,Fail -spec@arb_get_texture_sub_image@arb_get_texture_sub_image-get,Fail spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_DEPTH_STENCIL,Fail spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_STENCIL_INDEX,Fail -spec@arb_pixel_buffer_object@texsubimage array pbo,Fail -spec@arb_pixel_buffer_object@texsubimage cube_map_array pbo,Fail -spec@arb_pixel_buffer_object@texsubimage pbo,Fail spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail spec@arb_point_sprite@arb_point_sprite-mipmap,Fail -spec@arb_provoking_vertex@arb-provoking-vertex-render,Fail -spec@arb_sample_shading@builtin-gl-sample-id 0,Fail -spec@arb_sample_shading@builtin-gl-sample-id 2,Fail -spec@arb_sample_shading@builtin-gl-sample-id 4,Fail -spec@arb_sample_shading@builtin-gl-sample-mask 0,Fail -spec@arb_sample_shading@builtin-gl-sample-mask 2,Fail -spec@arb_sample_shading@builtin-gl-sample-mask 4,Fail spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail @@ -145,7 +111,6 @@ spec@arb_sample_shading@samplemask 4,Fail spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail -spec@arb_shader_atomic_counters@respecify-buffer,Fail spec@arb_shader_draw_parameters@drawid-indirect-baseinstance,Fail spec@arb_shader_draw_parameters@drawid-indirect-basevertex,Fail spec@arb_shader_draw_parameters@drawid-indirect,Fail @@ -157,25 +122,9 @@ spec@arb_shader_texture_lod@execution@tex-miplevel-selection projgradarb 2drect_projvec4,Crash spec@arb_shader_texture_lod@execution@tex-miplevel-selection projgradarb 2drectshadow,Crash spec@arb_shading_language_420pack@active sampler conflict,Crash -spec@arb_texture_buffer_object@data-sync,Fail spec@arb_texture_buffer_object@formats (fs- arb),Crash spec@arb_texture_buffer_object@formats (vs- arb),Crash spec@arb_texture_buffer_object@render-no-bo,Crash -spec@arb_texture_buffer_object@subdata-sync,Fail -spec@arb_texture_compression@texwrap formats bordercolor,Fail -spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_ALPHA- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_LUMINANCE_ALPHA- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_RGBA- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_RGB- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor-swizzled,Fail -spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_ALPHA- swizzled- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_LUMINANCE_ALPHA- swizzled- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA- swizzled- border color only,Fail -spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB- swizzled- border color only,Fail -spec@arb_texture_cube_map_array@arb_texture_cube_map_array-cubemap,Fail -spec@arb_texture_cube_map_array@arb_texture_cube_map_array-cubemap-lod,Fail -spec@arb_texture_cube_map_array@arb_texture_cube_map_array-fbo-cubemap-array,Fail -spec@arb_texture_cube_map_array@texsubimage cube_map_array,Fail spec@arb_texture_cube_map_array@texturesize@fs-texturesize-isamplercubearray,Fail spec@arb_texture_cube_map_array@texturesize@fs-texturesize-samplercubearray,Fail spec@arb_texture_cube_map_array@texturesize@fs-texturesize-samplercubearrayshadow,Fail @@ -194,144 +143,6 @@ spec@arb_texture_float@fbo-generatemipmap-formats@GL_INTENSITY16F_ARB NPOT,Fail spec@arb_texture_float@multisample-formats 2 gl_arb_texture_float,Crash spec@arb_texture_float@multisample-formats 4 gl_arb_texture_float,Crash -spec@arb_texture_float@texwrap formats bordercolor,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA16F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA32F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_INTENSITY16F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_INTENSITY32F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE16F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE32F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE_ALPHA16F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE_ALPHA32F_ARB- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_RGB16F- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_RGB32F- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_RGBA16F- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor@GL_RGBA32F- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA16F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA32F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_INTENSITY16F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_INTENSITY32F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE16F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE32F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE_ALPHA16F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE_ALPHA32F_ARB- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB16F- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB32F- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA16F- swizzled- border color only,Fail -spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA32F- swizzled- border color only,Fail -spec@arb_texture_gather@texturegather@fs-rgba-alpha-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-alpha-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-alpha-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-alpha-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-blue-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-blue-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-blue-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-blue-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-green-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-green-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-green-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-green-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-none-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-none-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-none-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-none-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-red-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-red-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-red-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgba-red-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-blue-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-blue-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-blue-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-blue-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-green-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-green-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-green-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-green-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-none-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-none-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-none-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-none-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-red-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-red-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-red-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rgb-red-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-green-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-green-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-green-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-green-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-none-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-none-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-none-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-none-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-red-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-red-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-red-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-rg-red-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-none-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-none-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-none-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-none-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-red-float-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-red-int-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-red-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@fs-r-red-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-alpha-float-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-alpha-int-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-alpha-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-alpha-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-blue-float-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-blue-int-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-blue-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-blue-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-green-float-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-green-int-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-green-uint-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-green-unorm-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-none-float-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-none-int-cubearray,Fail -spec@arb_texture_gather@texturegather@vs-rgba-none-uint-cubearray,Fail
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/ci/traces-panfrost.yml ^
@@ -5,7 +5,7 @@ - path: behdad-glyphy/glyphy.trace expectations: - device: gl-panfrost-t860 - checksum: b6cd8d92987530edcfc36a933c9b07f6 + checksum: 22bf5262745fd47c5c5eadb93d7cc420 - path: glmark2/desktop:windows=4:effect=blur:blur-radius=5:passes=1:separable=true.trace expectations: - device: gl-panfrost-t860 @@ -158,7 +158,7 @@ - path: glmark2/refract.trace expectations: - device: gl-panfrost-t860 - checksum: e520a0071fd940be1401aea2bec97709 + checksum: 6557deca1a47a7a77723658ea579ac63 - path: glmark2/shading:shading=blinn-phong-inf.trace expectations: - device: gl-panfrost-t860 @@ -209,11 +209,11 @@ - path: gputest/plot3d.trace expectations: - device: gl-panfrost-t860 - checksum: e73715f3b6a4f1609eaf5432af03714e + checksum: a34223830866a42747db199b04c5e1be - path: humus/AmbientAperture.trace expectations: - device: gl-panfrost-t860 - checksum: b0d4a64e0907f817161b2a0e85af7a9a + checksum: e4c0b930ef99f14305e1ade7f1779c09 - path: humus/CelShading.trace expectations: - device: gl-panfrost-t860
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/lib/genxml/v6.xml ^
@@ -835,7 +835,7 @@ <field name="Alpha reference" size="32" start="12:0" type="float"/> <field name="Thread Balancing" size="16" start="13:0" type="uint"/> <field name="Secondary preload" size="32" start="13:0" type="Preload"/> - <field name="Secondary shader" size="64" start="13:0" type="address"/> + <field name="Secondary shader" size="64" start="14:0" type="address"/> </struct> <struct name="Uniform Buffer" align="8">
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/lib/genxml/v7.xml ^
@@ -881,7 +881,7 @@ <field name="Alpha reference" size="32" start="12:0" type="float"/> <field name="Thread Balancing" size="16" start="13:0" type="uint"/> <field name="Secondary preload" size="32" start="13:0" type="Preload"/> - <field name="Secondary shader" size="64" start="13:0" type="address"/> + <field name="Secondary shader" size="64" start="14:0" type="address"/> <field name="Message Preload 1" size="16" start="15:0" type="uint"/> <field name="Message Preload 2" size="16" start="15:16" type="uint"/> </struct>
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/lib/pan_indirect_draw.c ^
@@ -1131,7 +1131,6 @@ pan_pack(state, RENDERER_STATE, cfg) { pan_shader_prepare_rsd(&shader_info, address, &cfg); } - pthread_mutex_unlock(&dev->indirect_draw_shaders.lock); draw_shader->push = shader_info.push; draw_shader->rsd = dev->indirect_draw_shaders.states->ptr.gpu + @@ -1171,15 +1170,15 @@ const struct indirect_draw_inputs inputs) { struct panfrost_ptr inputs_buf = - pan_pool_alloc_aligned(pool, sizeof(inputs), 16); + pan_pool_alloc_aligned(pool, sizeof(inputs), 16); - memcpy(inputs_buf.cpu, &inputs, sizeof(inputs)); + memcpy(inputs_buf.cpu, inputs, sizeof(inputs)); struct panfrost_ptr ubos_buf = pan_pool_alloc_desc(pool, UNIFORM_BUFFER); pan_pack(ubos_buf.cpu, UNIFORM_BUFFER, cfg) { - cfg.entries = DIV_ROUND_UP(sizeof(inputs), 16); + cfg.entries = DIV_ROUND_UP(sizeof(inputs), 16); cfg.pointer = inputs_buf.gpu; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/midgard/meson.build ^
@@ -52,7 +52,7 @@ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) libpanfrost_midgard_disasm = static_library(
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/midgard/midgard_compile.c ^
@@ -3231,7 +3231,7 @@ /* Report the very first tag executed */ info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0); - info->ubo_mask = ctx->ubo_mask & BITSET_MASK(ctx->nir->info.num_ubos); + info->ubo_mask = ctx->ubo_mask & ((1 << ctx->nir->info.num_ubos) - 1); if ((midgard_debug & MIDGARD_DBG_SHADERS) && ((midgard_debug & MIDGARD_DBG_INTERNAL) \|\| !nir->info.internal)) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/shared/pan_tiling.c ^
@@ -250,6 +250,12 @@ TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \ } +/* + * Perform a generic access to a tiled image with a given format. This works + * even for block-compressed images on entire blocks at a time. sx/sy/w/h are + * specified in pixels, not blocks, but our internal routines work in blocks, + * so we divide here. Alignment is assumed. + / static void panfrost_access_tiled_image_generic(void dst, void src, unsigned sx, unsigned sy, @@ -261,10 +267,13 @@ { unsigned bpp = desc->block.bits; - if (desc->block.width > 1) { - w = DIV_ROUND_UP(w, desc->block.width); - h = DIV_ROUND_UP(h, desc->block.height); + / Convert units / + sx /= desc->block.width; + sy /= desc->block.height; + w = DIV_ROUND_UP(w, desc->block.width); + h = DIV_ROUND_UP(h, desc->block.height); + if (desc->block.width > 1) { if (_is_store) TILED_UNALIGNED_TYPES(true, 2) else @@ -371,6 +380,11 @@ panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); } +/* + * Access a tiled image (load or store). Note: the region of interest (x, y, w, + * h) is specified in pixels, not blocks. It is expected that these quantities + * are aligned to the block size. + / void panfrost_store_tiled_image(void dst, const void *src, unsigned x, unsigned y,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/util/pan_ir.h ^
@@ -112,11 +112,17 @@ unsigned sysval_count; }; -/* Technically Midgard could go up to 92 in a pathological case but we don't - * take advantage of that. Likewise Bifrost's FAU encoding can address 128 - * words but actual implementations (G72, G76) are capped at 64 / - -#define PAN_MAX_PUSH 64 +/ Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each. + * In practice, the maximum number of FAU slots is limited by implementation. + * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the + * maximum number of 32-bit words is 128, since there are 2 words per FAU slot. + * + * Midgard can push at most 92 words, so this bound suffices. The Midgard + * compiler pushes less than this, as Midgard uses register-mapped uniforms + * instead of FAU, preventing large numbers of uniforms to be pushed for + * nontrivial programs. + / +#define PAN_MAX_PUSH 128 / Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_device.c ^
@@ -157,7 +157,7 @@ /* Nothing to do yet / break; case PANVK_EVENT_OP_WAIT: - in_fences[nr_in_fences++] = op->event->syncobj; + in_fences[(*nr_in_fences)++] = op->event->syncobj; break; default: unreachable("bad panvk_event_op type\n");
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_meta_clear.c ^
@@ -70,8 +70,7 @@ GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); /* Make sure UBO words have been upgraded to push constants / - assert(shader_info->ubo_count == 1); - assert(shader_info->push.count == 4); + assert(shader_info->ubo_mask == 0); mali_ptr shader = pan_pool_upload_aligned(bin_pool, binary.data, binary.size, @@ -138,8 +137,7 @@ GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); / Make sure UBO words have been upgraded to push constants */ - assert(shader_info->ubo_count == 1); - assert(shader_info->push.count == 2); + assert(shader_info->ubo_mask == 0); mali_ptr shader = pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/00-mesa-defaults.conf ^
@@ -293,6 +293,11 @@ <option name="force_integer_tex_nearest" value="true" /> </application> + <application name="DiRT Rally" executable="DirtRally"> + <!-- https://gitlab.freedesktop.org/mesa/mesa/-/issues/5648 --> + <option name="vs_position_always_invariant" value="true" /> + </application> + <!-- Workarounds for SPECviewperf relying on invalid / non-conformant OpenGL behavior. Older SPECviewperf versions might also need this. --> @@ -793,92 +798,6 @@ <option name="vs_position_always_invariant" value="true" /> </application> </device> - <device driver="radv"> - <!-- Engine workarounds --> - <engine engine_name_match="vkd3d"> - <option name="radv_zero_vram" value="true" /> - </engine> - - <engine engine_name_match="Quantic Dream Engine"> - <option name="radv_zero_vram" value="true" /> - <option name="radv_lower_discard_to_demote" value="true" /> - <option name="radv_disable_tc_compat_htile_general" value="true" /> - </engine> - - <!-- Game workarounds --> - <application name="Shadow Of The Tomb Raider (Native)" application_name_match="ShadowOfTheTomb"> - <option name="radv_report_llvm9_version_string" value="true" /> - <option name="radv_invariant_geom" value="true" /> - </application> - - <application name="Shadow Of The Tomb Raider (DX11/DX12)" application_name_match="SOTTR.exe"> - <option name="radv_invariant_geom" value="true" /> - </application> - - <application name="RAGE 2" executable="RAGE2.exe"> - <option name="radv_enable_mrt_output_nan_fixup" value="true" /> - </application> - - <application name="Path of Exile (64-bit, Steam)" executable="PathOfExile_x64Steam.exe"> - <option name="radv_no_dynamic_bounds" value="true" /> - <option name="radv_absolute_depth_bias" value="true" /> - </application> - <application name="Path of Exile (32-bit, Steam)" executable="PathOfExileSteam.exe"> - <option name="radv_no_dynamic_bounds" value="true" /> - <option name="radv_absolute_depth_bias" value="true" /> - </application> - <application name="Path of Exile (64-bit)" executable="PathOfExile_x64.exe"> - <option name="radv_no_dynamic_bounds" value="true" /> - <option name="radv_absolute_depth_bias" value="true" /> - </application> - <application name="Path of Exile (32-bit)" executable="PathOfExile.exe"> - <option name="radv_no_dynamic_bounds" value="true" /> - <option name="radv_absolute_depth_bias" value="true" /> - </application> - - <application name="The Surge 2" application_name_match="Fledge"> - <option name="radv_disable_shrink_image_store" value="true" /> - <option name="radv_zero_vram" value="true" /> - </application> - - <application name="World War Z (and World War Z: Aftermath)" application_name_match="WWZ\|wwz"> - <option name="radv_override_uniform_offset_alignment" value="16" /> - <option name="radv_disable_shrink_image_store" value="true" /> - <option name="radv_invariant_geom" value="true" /> - </application> - - <application name="DOOM VFR" application_name_match="DOOM_VFR"> - <option name="radv_no_dynamic_bounds" value="true" /> - </application> - - <application name="DOOM Eternal" application_name_match="DOOMEternal"> - <option name="radv_zero_vram" value="true" /> - </application> - - <application name="No Man's Sky" application_name_match="No Man's Sky"> - <option name="radv_lower_discard_to_demote" value="true" /> - </application> - - <application name="Monster Hunter World" application_name_match="MonsterHunterWorld.exe"> - <option name="radv_invariant_geom" value="true" /> - </application> - - <application name="DOOM (2016)" application_name_match="DOOM$"> - <option name="radv_disable_dcc" value="true" /> - </application> - - <application name="Wolfenstein II" application_name_match="Wolfenstein II The New Colossus"> - <option name="radv_disable_dcc" value="true" /> - </application> - - <application name="RDR2" application_name_match="Red Dead Redemption 2"> - <option name="radv_report_apu_as_dgpu" value="true" /> - </application> - - <application name="Resident Evil Village" application_name_match="re8.exe"> - <option name="radv_invariant_geom" value="true" /> - </application> - </device> <!-- The android game hall of shame:
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/bitset.h ^
@@ -208,7 +208,7 @@ static inline void __bitset_set_range(BITSET_WORD *r, unsigned start, unsigned end) { - const unsigned size = end - start; + const unsigned size = end - start + 1; const unsigned start_mod = start % BITSET_WORDBITS; if (start_mod + size <= BITSET_WORDBITS) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/driconf.h ^
@@ -520,4 +520,8 @@ DRI_CONF_OPT_B(radv_report_apu_as_dgpu, def, \ "Report APUs as discrete GPUs instead of integrated GPUs") +#define DRI_CONF_RADV_DISABLE_HTILE_LAYERS(def) \ + DRI_CONF_OPT_B(radv_disable_htile_layers, def, \ + "Disable HTILE for layered depth/stencil formats") + #endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/driconf_static.py ^
@@ -46,6 +46,7 @@ self.cname = cname('application') self.name = xml.attrib['name'] self.executable = xml.attrib.get('executable', None) + self.executable_regexp = xml.attrib.get('executable_regexp', None) self.sha1 = xml.attrib.get('sha1', None) self.application_name_match = xml.attrib.get('application_name_match', None) self.application_versions = xml.attrib.get('application_versions', None) @@ -118,6 +119,7 @@ struct driconf_application { const char name; const char executable; + const char executable_regexp; const char sha1; const char application_name_match; const char application_versions; @@ -179,6 +181,9 @@ % if application.executable: .executable = "${application.executable}", % endif +% if application.executable_regexp: + .executable_regexp = "${application.executable_regexp}", +% endif % if application.sha1: .sha1 = "${application.sha1}", % endif
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/fossilize_db.c ^
@@ -318,8 +318,12 @@ free(filename); free(idx_filename); - if (!check_files_opened_successfully(foz_db->file[file_idx], db_idx)) + if (!check_files_opened_successfully(foz_db->file[file_idx], db_idx)) { + /* Prevent foz_destroy from destroying it a second time. / + foz_db->file[file_idx] = NULL; + continue; / Ignore invalid user provided filename and continue */ + } if (!load_foz_dbs(foz_db, db_idx, file_idx, true)) { fclose(db_idx);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/perf/u_trace.c ^
@@ -24,16 +24,17 @@ #include <inttypes.h> #include "util/list.h" -#include "util/ralloc.h" #include "util/u_debug.h" #include "util/u_inlines.h" #include "util/u_fifo.h" +#include "util/u_vector.h" #include "u_trace.h" #define __NEEDS_TRACE_PRIV #include "u_trace_priv.h" +#define PAYLOAD_BUFFER_SIZE 0x100 #define TIMESTAMP_BUF_SIZE 0x1000 #define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t)) @@ -49,6 +50,14 @@ struct list_head ctx_list = { &ctx_list, &ctx_list }; #endif +struct u_trace_payload_buf { + uint32_t refcount; + + uint8_t buf; + uint8_t next; + uint8_t end; +}; + struct u_trace_event { const struct u_tracepoint tp; const void payload; @@ -76,12 +85,12 @@ / void timestamps; - /* - * For trace payload, we sub-allocate from ralloc'd buffers which - * hang off of the chunk's ralloc context, so they are automatically - * free'd when the chunk is free'd + /* Array of u_trace_payload_buf referenced by traces[] elements. / - uint8_t payload_buf, payload_end; + struct u_vector payloads; + + / Current payload buffer being written. / + struct u_trace_payload_buf payload; struct util_queue_fence fence; @@ -97,6 +106,35 @@ bool free_flush_data; }; +static struct u_trace_payload_buf * +u_trace_payload_buf_create(void) +{ + struct u_trace_payload_buf payload = + malloc(sizeof(payload) + PAYLOAD_BUFFER_SIZE); + + p_atomic_set(&payload->refcount, 1); + + payload->buf = (uint8_t ) (payload + 1); + payload->end = payload->buf + PAYLOAD_BUFFER_SIZE; + payload->next = payload->buf; + + return payload; +} + +static struct u_trace_payload_buf +u_trace_payload_buf_ref(struct u_trace_payload_buf payload) +{ + p_atomic_inc(&payload->refcount); + return payload; +} + +static void +u_trace_payload_buf_unref(struct u_trace_payload_buf payload) +{ + if (p_atomic_dec_zero(&payload->refcount)) + free(payload); +} + static void free_chunk(void ptr) { @@ -104,7 +142,14 @@ chunk->utctx->delete_timestamp_buffer(chunk->utctx, chunk->timestamps); + / Unref payloads attached to this chunk. / + struct u_trace_payload_buf payload; + u_vector_foreach(payload, &chunk->payloads) + u_trace_payload_buf_unref(payload); + u_vector_finish(&chunk->payloads); + list_del(&chunk->node); + free(chunk); } static void @@ -113,21 +158,41 @@ while (!list_is_empty(chunks)) { struct u_trace_chunk chunk = list_first_entry(chunks, struct u_trace_chunk, node); - ralloc_free(chunk); + free_chunk(chunk); } } static struct u_trace_chunk -get_chunk(struct u_trace ut) +get_chunk(struct u_trace ut, size_t payload_size) { struct u_trace_chunk chunk; + assert(payload_size <= PAYLOAD_BUFFER_SIZE); + / do we currently have a non-full chunk to append msgs to? / if (!list_is_empty(&ut->trace_chunks)) { chunk = list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node); - if (chunk->num_traces < TRACES_PER_CHUNK) - return chunk; + / Can we store a new trace in the chunk? / + if (chunk->num_traces < TRACES_PER_CHUNK) { + / If no payload required, nothing else to check. / + if (payload_size <= 0) + return chunk; + + / If the payload buffer has space for the payload, we're good. + / + if (chunk->payload && + (chunk->payload->end - chunk->payload->next) >= payload_size) + return chunk; + + / If we don't have enough space in the payload buffer, can we + * allocate a new one? + / + struct u_trace_payload_buf buf = u_vector_add(&chunk->payloads); + buf = u_trace_payload_buf_create(); + chunk->payload = buf; + return chunk; + } / we need to expand to add another chunk to the batch, so * the current one is no longer the last one of the batch: / @@ -135,12 +200,17 @@ } / .. if not, then create a new one: / - chunk = rzalloc_size(NULL, sizeof(chunk)); - ralloc_set_destructor(chunk, free_chunk); + chunk = calloc(1, sizeof(chunk)); chunk->utctx = ut->utctx; chunk->timestamps = ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE); chunk->last = true; + u_vector_init(&chunk->payloads, 4, sizeof(struct u_trace_payload_buf )); + if (payload_size > 0) { + struct u_trace_payload_buf *buf = u_vector_add(&chunk->payloads); + buf = u_trace_payload_buf_create(); + chunk->payload = buf; + } list_addtail(&chunk->node, &ut->trace_chunks); @@ -319,7 +389,7 @@ static void cleanup_chunk(void job, void gdata, int thread_index) { - ralloc_free(job); + free_chunk(job); } void @@ -417,7 +487,7 @@ uint32_t from_idx = begin_it.event_idx; while (from_chunk != end_it.chunk \|\| from_idx != end_it.event_idx) { - struct u_trace_chunk to_chunk = get_chunk(into); + struct u_trace_chunk to_chunk = get_chunk(into, 0 / payload_size /); unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces, from_chunk->num_traces - from_idx); @@ -433,6 +503,17 @@ &from_chunk->traces[from_idx], to_copy sizeof(struct u_trace_event)); + /* Take a refcount on payloads from from_chunk if needed. / + if (begin_it.ut != into) { + struct u_trace_payload_buf in_payload; + u_vector_foreach(in_payload, &from_chunk->payloads) { + struct u_trace_payload_buf out_payload = + u_vector_add(&to_chunk->payloads); + + out_payload = u_trace_payload_buf_ref(*in_payload); + }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/perf/u_trace.h ^
@@ -235,9 +235,6 @@ * Provides callback for driver to copy timestamps on GPU from * one buffer to another. * - * The payload is shared and remains owned by the original u_trace - * if tracepoints are being copied between different u_trace! - * * It allows: * - Tracing re-usable command buffer in Vulkan, by copying tracepoints * each time it is submitted.
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/slab.c ^
@@ -110,6 +110,7 @@ parent->element_size = ALIGN_POT(sizeof(struct slab_element_header) + item_size, sizeof(intptr_t)); parent->num_elements = num_items; + parent->item_size = item_size; } void @@ -231,6 +232,18 @@ } /** + * Same as slab_alloc but memset the returned object to 0. + / +void +slab_zalloc(struct slab_child_pool pool) +{ + void r = slab_alloc(pool); + if (r) + memset(r, 0, pool->parent->item_size); + return r; +} + +/** * Free an object allocated from the slab. Single-threaded (i.e. the caller * must ensure that no operation happens on the same child pool in another * thread).
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/slab.h ^
@@ -55,6 +55,7 @@ simple_mtx_t mutex; unsigned element_size; unsigned num_elements; + unsigned item_size; }; struct slab_child_pool { @@ -81,6 +82,7 @@ struct slab_parent_pool parent); void slab_destroy_child(struct slab_child_pool pool); void slab_alloc(struct slab_child_pool pool); +void slab_zalloc(struct slab_child_pool pool); void slab_free(struct slab_child_pool pool, void ptr); struct slab_mempool {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/u_atomic.c ^
@@ -34,6 +34,21 @@ static pthread_mutex_t sync_mutex = PTHREAD_MUTEX_INITIALIZER; +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma redefine_extname __sync_add_and_fetch_8_c __sync_add_and_fetch_8 +#pragma redefine_extname __sync_sub_and_fetch_8_c __sync_sub_and_fetch_8 +#pragma redefine_extname __sync_fetch_and_add_8_c __sync_fetch_and_add_8 +#pragma redefine_extname __sync_fetch_and_sub_8_c __sync_fetch_and_sub_8 +#pragma redefine_extname __sync_val_compare_and_swap_8_c \ + __sync_val_compare_and_swap_8 +#define __sync_add_and_fetch_8 __sync_add_and_fetch_8_c +#define __sync_sub_and_fetch_8 __sync_sub_and_fetch_8_c +#define __sync_fetch_and_add_8 __sync_fetch_and_add_8_c +#define __sync_fetch_and_sub_8 __sync_fetch_and_sub_8_c +#define __sync_val_compare_and_swap_8 __sync_val_compare_and_swap_8_c +#endif + WEAK uint64_t __sync_add_and_fetch_8(uint64_t ptr, uint64_t val) { @@ -58,6 +73,32 @@ pthread_mutex_unlock(&sync_mutex); return r; +} + +WEAK uint64_t +__sync_fetch_and_add_8(uint64_t ptr, uint64_t val) +{ + uint64_t r; + + pthread_mutex_lock(&sync_mutex); + r = ptr; + ptr += val; + pthread_mutex_unlock(&sync_mutex); + + return r; +} + +WEAK uint64_t +__sync_fetch_and_sub_8(uint64_t ptr, uint64_t val) +{ + uint64_t r; + + pthread_mutex_lock(&sync_mutex); + r = ptr; + *ptr -= val; + pthread_mutex_unlock(&sync_mutex); + + return r; } WEAK uint64_t
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/u_cpu_detect.c ^
@@ -136,7 +136,7 @@ int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC}; #endif int has_vu = 0; - int len = sizeof (has_vu); + size_t len = sizeof (has_vu); int err; err = sysctl(sels, 2, &has_vu, &len, NULL, 0); @@ -438,6 +438,7 @@ static void check_os_mips64_support(void) { +#if defined(PIPE_OS_LINUX) Elf64_auxv_t aux; int fd; @@ -453,6 +454,7 @@ } close (fd); } +#endif /* PIPE_OS_LINUX / } #endif / PIPE_ARCH_MIPS64 */ @@ -623,7 +625,7 @@ if (available_cpus == 0) { const int mib[] = { CTL_HW, HW_NCPUONLINE }; int ncpu; - int len = sizeof(ncpu); + size_t len = sizeof(ncpu); sysctl(mib, 2, &ncpu, &len, NULL, 0); available_cpus = ncpu;
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/u_debug_stack.c ^
@@ -199,7 +199,6 @@ unsigned start_frame, unsigned nr_frames) { - const void frame_pointer = NULL; unsigned i = 0; if (!nr_frames) { @@ -250,21 +249,22 @@ } #endif +#ifdef PIPE_ARCH_X86 #if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION > 404) \|\| defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wframe-address" - frame_pointer = ((const void )__builtin_frame_address(1)); + const void frame_pointer = ((const void )__builtin_frame_address(1)); #pragma GCC diagnostic pop -#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +#elif defined(PIPE_CC_MSVC) + const void frame_pointer; __asm { mov frame_pointer, ebp } frame_pointer = (const void )frame_pointer[0]; #else - frame_pointer = NULL; + const void frame_pointer = NULL; #endif -#ifdef PIPE_ARCH_X86 while (nr_frames) { const void next_frame_pointer; @@ -287,8 +287,6 @@ frame_pointer = next_frame_pointer; } -#else - (void) frame_pointer; #endif while (nr_frames) {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/xmlconfig.c ^
@@ -1093,6 +1093,8 @@ const char *appattr[] = { "name", a->name, "executable", a->executable, + "executable_regexp", a->executable_regexp, + "sha1", a->sha1, "application_name_match", a->application_name_match, "application_versions", a->application_versions, NULL
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_image.c ^
@@ -206,7 +206,9 @@ const VkImageCreateInfo create_info, struct vn_image img) { - return vn_image_init(dev, create_info, img); + VkResult result = vn_image_init(dev, create_info, img); + img->deferred_info->initialized = result == VK_SUCCESS; + return result; } VkResult @@ -298,7 +300,9 @@ if (img->private_memory != VK_NULL_HANDLE) vn_FreeMemory(device, img->private_memory, pAllocator); - vn_async_vkDestroyImage(dev->instance, device, image, NULL); + /* must not ask renderer to destroy uninitialized deferred image */ + if (!img->deferred_info \|\| img->deferred_info->initialized) + vn_async_vkDestroyImage(dev->instance, device, image, NULL); if (img->deferred_info) vk_free(alloc, img->deferred_info);
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_image.h ^
@@ -22,6 +22,9 @@ VkImageCreateInfo create; VkImageFormatListCreateInfo list; VkImageStencilUsageCreateInfo stencil; + + /* track whether vn_image_init_deferred succeeds */ + bool initialized; }; struct vn_image {
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_physical_device.c ^
@@ -2196,8 +2196,25 @@ if (result != VK_SUCCESS \|\| !external_info) return vn_result(physical_dev->instance, result); + if (external_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) { + VkAndroidHardwareBufferUsageANDROID ahb_usage = + vk_find_struct(pImageFormatProperties->pNext, + ANDROID_HARDWARE_BUFFER_USAGE_ANDROID); + if (ahb_usage) { + ahb_usage->androidHardwareBufferUsage = vn_android_get_ahb_usage( + pImageFormatInfo->usage, pImageFormatInfo->flags); + } + + / AHBs with mipmap usage will ignore this property / + pImageFormatProperties->imageFormatProperties.maxMipLevels = 1; + } + VkExternalImageFormatProperties img_props = vk_find_struct( pImageFormatProperties->pNext, EXTERNAL_IMAGE_FORMAT_PROPERTIES); + if (!img_props) + return VK_SUCCESS; + VkExternalMemoryProperties mem_props = &img_props->externalMemoryProperties; @@ -2217,17 +2234,6 @@ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID; mem_props->compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID; - - VkAndroidHardwareBufferUsageANDROID ahb_usage = - vk_find_struct(pImageFormatProperties->pNext, - ANDROID_HARDWARE_BUFFER_USAGE_ANDROID); - if (ahb_usage) { - ahb_usage->androidHardwareBufferUsage = vn_android_get_ahb_usage( - pImageFormatInfo->usage, pImageFormatInfo->flags); - } - - /* AHBs with mipmap usage will ignore this property */ - pImageFormatProperties->imageFormatProperties.maxMipLevels = 1; } else { mem_props->compatibleHandleTypes = supported_handle_types; mem_props->exportFromImportedHandleTypes = @@ -2236,7 +2242,7 @@ : 0; } - return vn_result(physical_dev->instance, result); + return VK_SUCCESS; } void
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_pipeline.c ^
@@ -140,9 +140,12 @@ VkPipelineCacheCreateInfo local_create_info; if (pCreateInfo->initialDataSize) { + const struct vk_pipeline_cache_header header = + pCreateInfo->pInitialData; + local_create_info = pCreateInfo; - local_create_info.pInitialData += - sizeof(struct vk_pipeline_cache_header); + local_create_info.initialDataSize -= header->header_size; + local_create_info.pInitialData += header->header_size; pCreateInfo = &local_create_info; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/device-select-layer/device_select_x11.c ^
@@ -67,6 +67,8 @@ int scrn; xcb_connection_t conn; int default_idx = -1; + drmDevicePtr xdev = NULL; + conn = xcb_connect(NULL, &scrn); if (!conn) return -1; @@ -91,7 +93,6 @@ if (dri3_fd == -1) goto out; - drmDevicePtr xdev; int ret = drmGetDevice2(dri3_fd, 0, &xdev); close(dri3_fd); if (ret < 0) @@ -113,7 +114,9 @@ if (default_idx != -1) break; } + out: + drmFreeDevice(&xdev); / Is NULL pointer safe. */ xcb_disconnect(conn); return default_idx; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/util/gen_enum_to_str.py ^
@@ -78,8 +78,9 @@ case ${v}: return "${enum.values[v]}"; % endfor + case ${enum.max_enum_name}: return "${enum.max_enum_name}"; default: - unreachable("Undefined enum value."); + return "Unknown ${enum.name} value."; } } @@ -116,7 +117,7 @@ return "${object_types[0].enum_to_name[object_type]}"; % endfor default: - unreachable("Undefined enum value."); + return "Unknown VkObjectType value."; } } """)) @@ -246,12 +247,24 @@ def CamelCase_to_SHOUT_CASE(s): return (s[:1] + re.sub(r'(?<![A-Z])([A-Z])', r'_\1', s[1:])).upper() +def compute_max_enum_name(s): + max_enum_name = CamelCase_to_SHOUT_CASE(s) + last_prefix = max_enum_name.rsplit('_', 1)[-1] + # Those special prefixes need to be always at the end + if last_prefix in ['AMD', 'EXT', 'INTEL', 'KHR', 'NV'] : + max_enum_name = "_".join(max_enum_name.split('_')[:-1]) + max_enum_name = max_enum_name + "_MAX_ENUM_" + last_prefix + else: + max_enum_name = max_enum_name + "_MAX_ENUM" + + return max_enum_name class VkEnum(object): """Simple struct-like class representing a single Vulkan Enum.""" def __init__(self, name, bitwidth=32, values=None): self.name = name + self.max_enum_name = compute_max_enum_name(name) self.bitwidth = bitwidth self.extension = None # Maps numbers to names
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/util/vk_log.c ^
@@ -268,7 +268,6 @@ case VK_ERROR_TOO_MANY_OBJECTS: return &vk_object_to_device(obj)->base; default: - assert(obj->client_visible); return obj; } } @@ -306,6 +305,8 @@ VK_LOG_NO_OBJS(instance), file, line, "%s (%s)", message, error_str); } + + ralloc_free(message); } else { if (object) { __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/util/vk_synchronization2.c ^
@@ -27,6 +27,7 @@ #include "vk_device.h" #include "vk_queue.h" #include "vk_util.h" +#include "../wsi/wsi_common.h" VKAPI_ATTR void VKAPI_CALL vk_common_CmdWriteTimestamp( @@ -291,6 +292,7 @@ STACK_ARRAY(VkSubmitInfo2KHR, submit_info_2, submitCount); STACK_ARRAY(VkPerformanceQuerySubmitInfoKHR, perf_query_submit_info, submitCount); + STACK_ARRAY(struct wsi_memory_signal_submit_info, wsi_mem_submit_info, submitCount); uint32_t n_wait_semaphores = 0; uint32_t n_command_buffers = 0; @@ -373,6 +375,15 @@ __vk_append_struct(&submit_info_2[s], &perf_query_submit_info[s]); } + const struct wsi_memory_signal_submit_info mem_signal_info = + vk_find_struct_const(pSubmits[s].pNext, + WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA); + if (mem_signal_info) { + wsi_mem_submit_info[s] = mem_signal_info; + wsi_mem_submit_info[s].pNext = NULL; + __vk_append_struct(&submit_info_2[s], &wsi_mem_submit_info[s]); + } + n_wait_semaphores += pSubmits[s].waitSemaphoreCount; n_command_buffers += pSubmits[s].commandBufferCount; n_signal_semaphores += pSubmits[s].signalSemaphoreCount; @@ -388,6 +399,7 @@ STACK_ARRAY_FINISH(signal_semaphores); STACK_ARRAY_FINISH(submit_info_2); STACK_ARRAY_FINISH(perf_query_submit_info); + STACK_ARRAY_FINISH(wsi_mem_submit_info); return result; }
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_display.c ^
@@ -460,10 +460,8 @@ } } -VKAPI_ATTR VkResult VKAPI_CALL -wsi_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physicalDevice, - uint32_t pPropertyCount, - VkDisplayProperties2KHR pProperties) +static VkResult +wsi_get_connectors(VkPhysicalDevice physicalDevice) { VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); struct wsi_device wsi_device = pdevice->wsi_device; @@ -471,27 +469,46 @@ (struct wsi_display ) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY]; if (wsi->fd < 0) - goto bail; + return VK_SUCCESS; drmModeResPtr mode_res = drmModeGetResources(wsi->fd); if (!mode_res) - goto bail; - - VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount); + return VK_ERROR_OUT_OF_HOST_MEMORY; /* Get current information / - for (int c = 0; c < mode_res->count_connectors; c++) { struct wsi_display_connector connector = wsi_display_get_connector(wsi_device, wsi->fd, mode_res->connectors[c]); - if (!connector) { drmModeFreeResources(mode_res); return VK_ERROR_OUT_OF_HOST_MEMORY; } + } + drmModeFreeResources(mode_res); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +wsi_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physicalDevice, + uint32_t pPropertyCount, + VkDisplayProperties2KHR pProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + struct wsi_device wsi_device = pdevice->wsi_device; + struct wsi_display wsi = + (struct wsi_display ) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY]; + + / Get current information / + VkResult result = wsi_get_connectors(physicalDevice); + if (result != VK_SUCCESS) + goto bail; + + VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount); + + wsi_for_each_connector(connector, wsi) { if (connector->connected) { vk_outarray_append(&conn, prop) { wsi_display_fill_in_display_properties(wsi_device, @@ -501,13 +518,11 @@ } } - drmModeFreeResources(mode_res); - return vk_outarray_status(&conn); bail: pPropertyCount = 0; - return VK_SUCCESS; + return result; } /* @@ -541,6 +556,10 @@ struct wsi_display wsi = (struct wsi_display ) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY]; + VkResult result = wsi_get_connectors(physicalDevice); + if (result != VK_SUCCESS) + goto bail; + VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount); wsi_for_each_connector(connector, wsi) { @@ -554,6 +573,10 @@ } } return vk_outarray_status(&conn); + +bail: + pPropertyCount = 0; + return result; } VKAPI_ATTR VkResult VKAPI_CALL @@ -566,6 +589,11 @@ struct wsi_display wsi = (struct wsi_display ) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY]; + / Get current information / + VkResult result = wsi_get_connectors(physicalDevice); + if (result != VK_SUCCESS) + goto bail; + VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount); wsi_for_each_connector(connector, wsi) { @@ -575,6 +603,10 @@ } } return vk_outarray_status(&conn); + +bail: + pPropertyCount = 0; + return result; } /*
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_drm.c ^
@@ -531,7 +531,7 @@ .sType = VK_STRUCTURE_TYPE_WSI_IMAGE_CREATE_INFO_MESA, .prime_blit_src = true, }; - const VkImageCreateInfo image_info = { + VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = &image_wsi_info, .flags = 0, @@ -552,6 +552,10 @@ .pQueueFamilyIndices = pCreateInfo->pQueueFamilyIndices, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; + if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR) { + image_info.flags \|= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT \| + VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR; + } result = wsi->CreateImage(chain->device, &image_info, &chain->alloc, &image->image); if (result != VK_SUCCESS)
[-] [+]	Changed	_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_wayland.c ^
@@ -294,14 +294,25 @@ format = wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_R8G8B8_UNORM, true, true); - FALLTHROUGH; + if (format) + wsi_wl_format_add_modifier(format, modifier); + if (srgb_format) + wsi_wl_format_add_modifier(srgb_format, modifier); + + srgb_format = wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_R8G8B8A8_SRGB, + false, true); + format = wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_R8G8B8A8_UNORM, + false, true); + break; case DRM_FORMAT_ABGR8888: srgb_format = wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_R8G8B8A8_SRGB, - true, true); + true, false); format = wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_R8G8B8A8_UNORM, - true, true); + true, false); break; case DRM_FORMAT_XRGB8888: srgb_format = wsi_wl_display_add_vk_format(display, formats, @@ -310,14 +321,25 @@ format = wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_B8G8R8_UNORM, true, true); - FALLTHROUGH; + if (format) + wsi_wl_format_add_modifier(format, modifier); + if (srgb_format) + wsi_wl_format_add_modifier(srgb_format, modifier); + + srgb_format = wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_B8G8R8A8_SRGB, + false, true); + format = wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_B8G8R8A8_UNORM, + false, true); + break; case DRM_FORMAT_ARGB8888: srgb_format = wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_B8G8R8A8_SRGB, - true, true); + true, false); format = wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_B8G8R8A8_UNORM, - true, true); + true, false); break; } @@ -336,11 +358,17 @@ case WL_SHM_FORMAT_XBGR8888: wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_R8G8B8_SRGB, - false, true); + true, true); wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_R8G8B8_UNORM, + true, true); + wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_R8G8B8A8_SRGB, + false, true); + wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_R8G8B8A8_UNORM, false, true); - FALLTHROUGH; + break; case WL_SHM_FORMAT_ABGR8888: wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_R8G8B8A8_SRGB, @@ -352,11 +380,17 @@ case WL_SHM_FORMAT_XRGB8888: wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_B8G8R8_SRGB, - false, true); + true, true); wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_B8G8R8_UNORM, + true, true); + wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_B8G8R8A8_SRGB, false, true); - FALLTHROUGH; + wsi_wl_display_add_vk_format(display, formats, + VK_FORMAT_B8G8R8A8_UNORM, + false, true); + break; case WL_SHM_FORMAT_ARGB8888: wsi_wl_display_add_vk_format(display, formats, VK_FORMAT_B8G8R8A8_SRGB, @@ -427,6 +461,12 @@ case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_B8G8R8A8_SRGB: return alpha ? WL_SHM_FORMAT_ARGB8888 : WL_SHM_FORMAT_XRGB8888; + case VK_FORMAT_R8G8B8_UNORM: + case VK_FORMAT_R8G8B8_SRGB: + return WL_SHM_FORMAT_XBGR8888; + case VK_FORMAT_B8G8R8_UNORM: + case VK_FORMAT_B8G8R8_SRGB: + return WL_SHM_FORMAT_XRGB8888; default: assert(!"Unsupported Vulkan format");