[-]
[+]
|
Changed |
_service:tar_git:mesa.spec
|
|
[-]
[+]
|
Changed |
_service
^
|
@@ -2,7 +2,7 @@
<service name="tar_git">
<param name="url">https://github.com/sailfish-on-dontbeevil/mesa</param>
<param name="branch">master</param>
- <param name="revision">21.3.0+git2</param>
+ <param name="revision">21.3.9+git1</param>
<param name="token"/>
<param name="debian">N</param>
<param name="dumb">N</param>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/.gitmodules
^
|
@@ -1,3 +1,3 @@
[submodule "mesa"]
path = mesa
- url = https://github.com/sailfishos-mirror/mesa.git
+ url = https://github.com/sailfish-on-dontbeevil/mesa-1
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/.gitlab-ci.yml
^
|
@@ -102,7 +102,7 @@
- .ci-run-policy
script:
- apk --no-cache add graphviz doxygen
- - pip3 install "sphinx<4.0" breathe mako sphinx_rtd_theme
+ - pip3 install sphinx breathe mako sphinx_rtd_theme
- docs/doxygen-wrapper.py --out-dir=docs/doxygen_xml
- sphinx-build -W -b html docs public
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/.gitlab-ci/all-skips.txt
^
|
@@ -6,9 +6,6 @@
# reliable to be run in parallel with other tests due to CPU-side timing.
dEQP-GLES[0-9]*.functional.flush_finish.*
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4575
-dEQP-VK.wsi.display.get_display_plane_capabilities
-
# piglit: WGL is Windows-only
wgl@.*
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/.pick_status.json
^
|
@@ -1,5 +1,36851 @@
[
{
+ "sha": "fc381fa1e341bc9cf71f53a915dc47926a3f621f",
+ "description": "tu: Actually expose VK_EXT_texel_buffer_alignment",
+ "nominated": false,
+ "nomination_type": 1,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": "3d04c435766a1dc4946a2c5276e0116a5fbb67cd"
+ },
+ {
+ "sha": "f18429340e05a6559db9f9127595ab07fe807a28",
+ "description": "lavapipe: Lift fence check into dedicated function",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "4f6c7a6025628fe14e06ac864986ab95bdd25457",
+ "description": "radv: Don't hash ycbcr sampler base object.",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "012bfde7f3412018f75e335ee9d42c90c7ba3d3e",
+ "description": "panvk: Hook up emulated secondary command buffers",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "18fced0226dfba4bdfbaf9139aafb97dcea9b85c",
+ "description": "panvk: Refcount the descriptor set and pipeline layouts",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "df92f56d8d0b89092f0984baf165761a4daf287c",
+ "description": "vulkan/runtime: Add emulated secondary command buffer support",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "25542f12d764f7fd6d5ade4da760d09d0d93cf43",
+ "description": "vulkan/cmd_queue: Fix the allocation scope",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "1437ee749b8a9fee05019582f570b167c0711116",
+ "description": "vulkan/cmd_queue: Track allocation errors in vk_cmd_queue",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "6cb95877b57d79037e43fff22e855e600bcf4c80",
+ "description": "vulkan/cmd_queue: Auto-generate more vk_cmd_enqueue_unless_primary_Cmd*",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "3cffffc4415937556be5f94f3edbf23100c199c3",
+ "description": "vulkan/cmd_queue: Generate enqueue_if_not_primary entrypoints",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "8f29c833da0201271d532f9ea3badbe81a9d077b",
+ "description": "vulkan/cmd_queue: Add a vk_cmd_queue_execute() helper",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "e0910f5ef83a5c68099c0179468a7ee2a970cf47",
+ "description": "Revert \"features: fix some vk extension listings\"",
+ "nominated": false,
+ "nomination_type": 2,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": "a3e9388953f60d22c188d0e40bb4187be9048800"
+ },
+ {
+ "sha": "68fe847a2653b89992ed0b5e1f64e54bafc07d22",
+ "description": "lavapipe: Drop GetPhysicalDeviceQueueFamilyProperties",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "dc8fdab71efdc3e516f2a8abdfdc2dec05fce448",
+ "description": "lavapipe: Use VK_OUTARRAY for GetPhysicalDeviceQueueFamilyProperties[2]",
+ "nominated": true,
+ "nomination_type": 1,
+ "resolution": 0,
+ "main_sha": null,
+ "because_sha": "b38879f8c5f57b7f1802e433e33181bdf5e72aef"
+ },
+ {
+ "sha": "91cb714dc12dd1a669177aec20151ce1504d4aac",
+ "description": "panvk: Drop GetPhysicalDeviceQueueFamilyProperties",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "19f56e3fc47ad4753396ea0831878ad94add85ab",
+ "description": "v3dv: Drop GetPhysicalDeviceQueueFamilyProperties",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "2a779f98dc6741ba87def3a54999e2311e5ab489",
+ "description": "turnip: Drop tu_legacy.c",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "205bf5d9cbab97bc29b319685d69947b5ccb8bb6",
+ "description": "radv: Drop GetPhysicalDeviceQueueFamilyProperties",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "8d7cbe026e344cfb009b0fc88352c8c10388de14",
+ "description": "anv: Drop GetPhysicalDeviceQueueFamilyProperties",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "25664c6194850280d375d58cec75e8af8346c25d",
+ "description": "vulkan: Add a 2 wrapper for vkGetPhysicalDeviceQueueFamilyProperties",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "a3e9388953f60d22c188d0e40bb4187be9048800",
+ "description": "features: fix some vk extension listings",
+ "nominated": false,
+ "nomination_type": null,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null
+ },
+ {
+ "sha": "cdaa3a899cf08858274a79d4c806f26881bead31",
+ "description": "anv: Use layerCount for clears and transitions in BeginRendering",
+ "nominated": false,
+ "nomination_type": 1,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": "3501a3f9ed92831ed039f0d54bf295af41ed0195"
+ },
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/VERSION
^
|
@@ -1 +1 @@
-21.3.0
+21.3.9
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/android/mesa3d_cross.mk
^
|
@@ -88,11 +88,12 @@
-Ddri-search-path=/vendor/$(MESA3D_LIB_DIR)/dri \
-Dplatforms=android \
-Dplatform-sdk-version=$(PLATFORM_SDK_VERSION) \
- -Ddri-drivers= \
+ -Ddri-drivers=$(subst $(space),$(comma),$(BOARD_MESA3D_CLASSIC_DRIVERS)) \
-Dgallium-drivers=$(subst $(space),$(comma),$(BOARD_MESA3D_GALLIUM_DRIVERS)) \
-Dvulkan-drivers=$(subst $(space),$(comma),$(subst radeon,amd,$(BOARD_MESA3D_VULKAN_DRIVERS))) \
-Dgbm=enabled \
-Degl=enabled \
+ -Dcpp_rtti=false \
MESON_BUILD := PATH=/usr/bin:/bin:/sbin:$$PATH ninja -C $(MESON_OUT_DIR)/build
@@ -128,7 +129,6 @@
$(MESON_GEN_FILES_TARGET): PRIVATE_IMPORTED_INCLUDES := $(imported_includes)
$(MESON_GEN_FILES_TARGET): PRIVATE_LDFLAGS := $(my_ldflags)
$(MESON_GEN_FILES_TARGET): PRIVATE_LDLIBS := $(my_ldlibs)
-$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_LDFLAGS := $(my_target_global_ldflags)
$(MESON_GEN_FILES_TARGET): PRIVATE_TIDY_CHECKS := $(my_tidy_checks)
$(MESON_GEN_FILES_TARGET): PRIVATE_TIDY_FLAGS := $(my_tidy_flags)
$(MESON_GEN_FILES_TARGET): PRIVATE_ARFLAGS := $(my_arflags)
@@ -139,6 +139,11 @@
$(MESON_GEN_FILES_TARGET): PRIVATE_ARM_CFLAGS := $(normal_objects_cflags)
+$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_CFLAGS := $(my_target_global_cflags)
+$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_CONLYFLAGS := $(my_target_global_conlyflags)
+$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_CPPFLAGS := $(my_target_global_cppflags)
+$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_GLOBAL_LDFLAGS := $(my_target_global_ldflags)
+
$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_LIBCRT_BUILTINS := $(my_target_libcrt_builtins)
$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_LIBATOMIC := $(my_target_libatomic)
$(MESON_GEN_FILES_TARGET): PRIVATE_TARGET_CRTBEGIN_SO_O := $(my_target_crtbegin_so_o)
@@ -284,13 +289,17 @@
$(foreach driver,$(BOARD_MESA3D_VULKAN_DRIVERS), $(eval $(call vulkan_target,$(driver))))
-$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.symlinks.timestamp: MESA3D_GALLIUM_DRI_DIR:=$(MESA3D_GALLIUM_DRI_DIR)
-$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.symlinks.timestamp: $(MESON_OUT_DIR)/install/.install.timestamp
- # Create Symlinks
+$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.targets.timestamp: MESA3D_GALLIUM_DRI_DIR:=$(MESA3D_GALLIUM_DRI_DIR)
+$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.targets.timestamp: $(MESON_OUT_DIR)/install/.install.timestamp
mkdir -p $(dir $@)
+ # Create Symlinks for gallium and kmsro drivers
ls -1 $(MESA3D_GALLIUM_DRI_DIR)/ | PATH=/usr/bin:$$PATH xargs -I{} ln -s -f libgallium_dri.so $(dir $@)/{}
+ # Remove unwanted Symlinks created for classic dri drivers
+ $(foreach d,$(BOARD_MESA3D_CLASSIC_DRIVERS), rm $(dir $@)/$(d)_dri.so;)
+ # Copy classic dri drivers
+ $(foreach d,$(BOARD_MESA3D_CLASSIC_DRIVERS), cp $(MESA3D_GALLIUM_DRI_DIR)/$(d)_dri.so $(dir $@)/$(d)_dri.so;)
touch $@
-$($(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN): $(TARGET_OUT_VENDOR)/$(MESA3D_LIB_DIR)/dri/.symlinks.timestamp
+$($(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN): $(TARGET_OUT_VENDOR)/$(MESA3D_LIB_DIR)/dri/.targets.timestamp
echo "Build $@"
touch $@
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/drivers/zink.rst
^
|
@@ -129,11 +129,17 @@
OpenGL 4.1
^^^^^^^^^^
-For OpenGL 4.1 support, the following additional ``VkPhysicalDeviceLimits``
-are required:
+For OpenGL 4.1 support, the following additional requirements needs to be
+supported:
+
+* ``VkPhysicalDeviceFeatures``:
-* ``maxImageDimension2D`` ≥ 16384
-* ``maxViewports`` ≥ 16
+ * ``multiViewport``
+
+* ``VkPhysicalDeviceLimits``
+
+ * ``maxImageDimension2D`` ≥ 16384
+ * ``maxViewports`` ≥ 16
OpenGL 4.2
^^^^^^^^^^
@@ -177,7 +183,6 @@
* ``VkPhysicalDeviceFeatures``:
* ``robustBufferAccess``
- * ``multiViewport``
* Formats requiring ``VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT``:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/features.txt
^
|
@@ -331,7 +331,7 @@
GL_EXT_texture_norm16 DONE (freedreno, i965, r600, radeonsi, nvc0i, softpipe, zink)
GL_EXT_texture_sRGB_R8 DONE (all drivers that support GLES 3.0+)
GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+, panfrost)
- GL_KHR_texture_compression_astc_hdr DONE (i965/bxt, panfrost)
+ GL_KHR_texture_compression_astc_hdr DONE (core only)
GL_KHR_texture_compression_astc_sliced_3d DONE (i965/gen9+, r600, radeonsi, panfrost, softpipe, swr, zink, lima)
GL_OES_depth_texture_cube_map DONE (all drivers that support GLSL 1.30+)
GL_OES_EGL_image DONE (all drivers)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.0.rst
^
|
@@ -21,7 +21,7 @@
::
- TBD.
+ a2753c09deef0ba14d35ae8a2ceff3fe5cd13698928c7bb62c2ec8736eb09ce1 mesa-21.3.0.tar.xz
New features
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.1.rst
^
|
@@ -0,0 +1,132 @@
+Mesa 21.3.1 Release Notes / 2021-12-01
+======================================
+
+Mesa 21.3.1 is a bug fix release which fixes bugs found since the 21.3.0 release.
+
+Mesa 21.3.1 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.1 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- GPU Crash in Yuzu 6600xt 5.15
+- [spirv-fuzz] lower_trivial_continues_block: Assertion \`!first_instr || instr_is_continue(first_instr)' failed.
+- [RADV] Crash in Metro Exodus in Caspain chapter and Sam's Story
+- NIR validation failed after nir_copy_prop
+- lima: Corrupted Android-12 UI on Allwinner A64
+- RADV/ACO: Rendering glitches in Forza Horizon 5 windshields
+- dEQP-GLES31.*imulextended* compiling fp64 glsl 4.00 shader
+
+
+Changes
+-------
+
+Connor Abbott (3):
+
+- ir3/ra: Consider reg file size when swapping killed sources
+- ir3/lower_pcopy: Fix shr.b illegal copy lowering
+- ir3/lower_pcopy: Fix bug with "illegal" copies and swaps
+
+Eric Engestrom (2):
+
+- docs: update sha256sum for 21.3.0
+- .pick_status.json: Update to 1ba231fb75be5bffd806cbd09ac285d1f8f15e3d
+
+Erico Nunes (1):
+
+- ci: temporarily disable lima CI
+
+Iago Toral Quiroga (3):
+
+- broadcom/compiler: don't allow RF writes from signals after thrend
+- broadcom/compiler: fix scoreboard locking checks
+- broadcom/compiler: don't move ldvary earlier if current instruction has ldunif
+
+Ian Romanick (1):
+
+- glsl/nir: Don't build soft float64 when it cannot be used
+
+Iván Briano (1):
+
+- intel/nir: also allow unknown format for getting the size of a storage image
+
+Kenneth Graunke (3):
+
+- iris: Make a helper function for cross-batch dependency flushing
+- iris: Check for cross-batch flushing whenever a buffer is newly written.
+- iris: Tidy code in iris_use_pinned_bo a bit
+
+Lionel Landwerlin (3):
+
+- anv: don't try to close fd = -1
+- intel/fs: fix shader call lowering pass
+- util/u_trace: refcount payloads
+
+Mauro Rossi (1):
+
+- android: define cpp_rtti=false because libLLVM is built w/o RTTI (v2)
+
+Mike Blumenkrantz (6):
+
+- zink: block suballocator caching for swapchain/dmabuf images
+- zink: set suballocator bo size to aligned allocation size
+- zink: stop using VK_IMAGE_LAYOUT_PREINITIALIZED for dmabuf
+- zink: always set matching resource export type for dmabuf creation
+- zink: fix memory availability reporting
+- zink: fail context creation more gracefully
+
+Mykhailo Skorokhodov (1):
+
+- nir: Fix read depth for predecessors
+
+Qiang Yu (1):
+
+- glx/dri3: fix glXQueryContext does not return GLX_RENDER_TYPE value
+
+Rhys Perry (4):
+
+- aco/spill: use spills_entry instead of spills_exit to kill linear VGPRs
+- spirv: run nir_copy_prop before nir_rematerialize_derefs_in_use_blocks_impl
+- nir/dce: fix DCE of loops with a halt or return instruction in the pre-header
+- aco: don't create DPP instructions with SGPR operands
+
+Roman Stratiienko (1):
+
+- android.mk: Add missing variables to the make target
+
+Samuel Pitoiset (4):
+
+- radv: disable HTILE for D32S8 format and mipmaps on GFX10
+- radv: fix emitting VBO when vertex input dynamic state is used
+- radv: add a workaround to fix a segfault with Metro Exodus (Linux native)
+- radv: fix resetting the entire vertex input dynamic state
+
+Thomas H.P. Andersen (1):
+
+- svga: fix bitwise/logical and mixup
+
+Vasily Khoruzhick (1):
+
+- lima: disasm: use last argument as a filename
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.2.rst
^
|
@@ -0,0 +1,126 @@
+Mesa 21.3.2 Release Notes / 2021-12-17
+======================================
+
+Mesa 21.3.2 is a bug fix release which fixes bugs found since the 21.3.1 release.
+
+Mesa 21.3.2 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.2 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- Flickering and blackscreen on Mpv and Clapper (and also low performance on glxgears)
+- DXVK SIGBUS with Turnip on Poco F1 at loading to open world.
+- RADV: IsHelperInvocationEXT query is not considered volatile in ACO
+- [GraphicsFuzz] dEQP-VK.graphicsfuzz.stable-binarysearch-tree-nested-if-and-conditional
+- [bisected] Mesa 21.3.x breaks GBM with NVIDIA closed source driver 495.44
+- [DG2] dEQP-GL[45|ES31].functional.shaders.builtin_functions.pack_unpack.packhalf2x16_compute fail
+
+
+Changes
+-------
+
+Alyssa Rosenzweig (1):
+
+- panfrost: Don't shadow Mesa's fui()
+
+Caio Oliveira (1):
+
+- nir: Initialize nir_register::divergent
+
+Danylo Piliaiev (1):
+
+- turnip: Fix operator precedence in address calculation macros for queries
+
+Dave Airlie (1):
+
+- crocus: cleanup bo exports for external objects
+
+Eric Engestrom (1):
+
+- .pick_status.json: Update to a65285f54be6d756a8a558f638c18bb4f075222c
+
+Francisco Jerez (1):
+
+- intel/fs/xehp: Teach SWSB pass about the exec pipeline of FS_OPCODE_PACK_HALF_2x16_SPLIT.
+
+Ian Romanick (2):
+
+- intel/compiler: Don't predicate a WHILE if there is a CONT
+- intel/stub: Silence "initialized field overwritten" warning
+
+Jakob Bornecrantz (1):
+
+- vulkan-device-select: Don't leak drmDevicePtr
+
+James Jones (1):
+
+- gbm: Don't pass default usage flags on ABIs < 1
+
+Jason Ekstrand (3):
+
+- crocus: wm_prog_key::key_alpha_test uses GL enums
+- anv: Stop doing too much per-sample shading
+- radeonsi/nir: Check for VARYING_SLOT_PRIMITIVE_ID not SYSTEM_VALUE
+
+Lionel Landwerlin (4):
+
+- nir/opt_deref: don't try to cast empty structures
+- intel/nir: preserve access value when duping intrinsic
+- nir/lower_io: include the variable access in the lowered intrinsic
+- vulkan: fix missing handling of WSI memory signal
+
+Mauro Rossi (1):
+
+- android: add support for classic dri-drivers (v2)
+
+Michel Zou (1):
+
+- meson: correctly detect linker arguments
+
+Nanley Chery (1):
+
+- iris: Free the local cache bucket in bufmgr_destroy
+
+Pierre-Eric Pelloux-Prayer (1):
+
+- radeonsi: fix fast clear / depth decompression corruption
+
+Rhys Perry (1):
+
+- radv: have the null winsys set more fields
+
+Roman Stratiienko (2):
+
+- v3dv: Fix dEQP-VK.info#instance_extensions test
+- v3dv: Fix V3DV_HAS_SURFACE preprocessor condition
+
+Tapani Pälli (1):
+
+- anv: allow VK_IMAGE_LAYOUT_UNDEFINED as final layout
+
+Timur Kristóf (1):
+
+- aco: Clean up and fix quad group instructions with WQM.
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.3.rst
^
|
@@ -0,0 +1,108 @@
+Mesa 21.3.3 Release Notes / 2021-12-29
+======================================
+
+Mesa 21.3.3 is a bug fix release which fixes bugs found since the 21.3.2 release.
+
+Mesa 21.3.3 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.3 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- Assassin's Creed Syndicate crashes with Mesa 21.3.0+ ACO
+- [21.3 regression] swr: Build failure with MSVC
+- anv: dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store fails
+
+
+Changes
+-------
+
+Alyssa Rosenzweig (2):
+
+- pan/indirect_draw: Don't upload garbage UBO
+- panfrost: Fix Secondary Shader field
+
+Bas Nieuwenhuizen (2):
+
+- radv: Use correct buffer size for query pool result copies.
+- radv: Skip wait timeline ioctl with 0 handles.
+
+Daniel Schürmann (2):
+
+- aco/optimizer: fix fneg modifier propagation on VOP3P
+- aco/ra: fix get_reg_for_operand() in case of stride mismatches
+
+Emma Anholt (7):
+
+- r300: Fix mis-optimization turning -1 - x into 1 - x.
+- r300: Move the instruction filter for r500_transform_IF() to the top.
+- r300: Ensure that immediates have matching negate flags too.
+- r300: Also consider ALU condition modifiers for loop DCE.
+- r300: Disable loop unrolling on r500.
+- r300/vs: Allocate temps we see a use as a source, too.
+- r300/vs: Fix flow control processing just after an endloop.
+
+Eric Engestrom (2):
+
+- .pick_status.json: Update to 4942e108909bbe0f53ec5fd52a3c7ae14c60abe6
+- .pick_status.json: Mark d49d092259829ad9e33d0d9fc8eef9759d9fe56e as denominated
+
+Francisco Jerez (1):
+
+- intel/fs: Add physical fall-through CFG edge for unconditional BREAK instruction.
+
+Jason Ekstrand (1):
+
+- vulkan/log: Don't assert on non-client-visible objects
+
+Jesse Natalie (1):
+
+- microsoft/compiler: Implement inot
+
+Liviu Prodea (1):
+
+- swr: Fix MSVC build
+
+Rob Clark (2):
+
+- freedreno/ir3: Handle instr->address when cloning
+- freedreno/computerator: Fix @buf header
+
+Samuel Pitoiset (1):
+
+- radv: re-apply "Do not access set layout during vkCmdBindDescriptorSets."
+
+Tapani Pälli (1):
+
+- glsl: fix invariant qualifer usage and matching rule for GLSL 4.20
+
+Timur Kristóf (2):
+
+- aco/optimizer_postRA: Fix combining DPP into VALU.
+- aco/optimizer_postRA: Fix applying VCC to branches.
+
+Vinson Lee (1):
+
+- panfrost: Avoid double unlock.
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.4.rst
^
|
@@ -0,0 +1,181 @@
+Mesa 21.3.4 Release Notes / 2022-01-12
+======================================
+
+Mesa 21.3.4 is a bug fix release which fixes bugs found since the 21.3.3 release.
+
+Mesa 21.3.4 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.4 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- i965: gen5 exposes EXT_texture_integer incorrectly
+- [radeonsi, regression, bisected]: Rendering issues with Factorio
+- mesa >= 21.x (with llvmpipe) will break the game JVGS
+- Panfrost - RK3399 - FACTORIO - glitches everywhere
+- Baldurs Gate 3 (Patch 6) - ribbon-like artifacts on textures
+- Game Starsector crashes under certain circumstances with mesa 21.3.+
+- FreeSpace models incorrectly rendered on Polaris cards, causing system freeze
+- Incomplete evaluation of nested DEFINE macros
+- [r300g, bisected] piglit glsl-fs-discard-04 fails
+- Panfrost G52 Firefox terrible glitches on youtube playback
+
+
+Changes
+-------
+
+Alyssa Rosenzweig (3):
+
+- pan/bi: Fix load_const of 1-bit booleans
+- gallium/util: Add pixel->blocks box helper
+- lima,panfrost: Correct pixel vs block mismatches
+
+Bas Nieuwenhuizen (1):
+
+- radv: Set optimal copy alignment to 1.
+
+Boris Brezillon (1):
+
+- microsoft/compiler: Fix dxil_nir_create_bare_samplers()
+
+Connor Abbott (2):
+
+- ir3/ra: Fix logic bug in compress_regs_left
+- ir3: Bump type mismatch penalty to 3
+
+Daniel Schürmann (2):
+
+- aco: don't allow SDWA on VOP3P instructions
+- aco: validate VOP3P opsel correctly
+
+Danylo Piliaiev (1):
+
+- tu: fix workaround for depth bounds test without depth test
+
+Dave Airlie (2):
+
+- intel/genxml/gen4-5: fix more Raster Operation in BLT to be a uint
+- crocus: fail resource allocation properly.
+
+Emma Anholt (4):
+
+- freedreno/afuc: Disable the disassembler on 32-bit builds.
+- i915g: Turn off FP16 in the vertex shaders.
+- r300: Fix omod failing to increase the number of channels stored.
+- nir_to_tgsi: Fix a bug in TXP detection after backend lowering.
+
+Eric Engestrom (3):
+
+- .pick_status.json: Update to 8a78706643ecad8a1f303cc9358873abc29978b4
+- .pick_status.json: Mark 00bea38242d97e7ace1954f1bc7d32cbf0ce3ee0 as denominated
+- .pick_status.json: Mark 2a0253b9b5d21c7571555abe3a1d851468a18740 as denominated
+
+Filip Gawin (1):
+
+- r300: fix handling swizzle in transform_source_conflicts
+
+Henry Goffin (1):
+
+- intel/compiler/test: Fix build with GCC 7
+
+Ian Romanick (1):
+
+- i965: Disable EXT_texture_integer on Gfx4 and Gfx5
+
+Jason Ekstrand (1):
+
+- Revert "anv: Stop doing too much per-sample shading"
+
+Lionel Landwerlin (3):
+
+- anv: don't leave anv_batch fields undefined
+- anv: limit compiler valid color outputs using NIR variables
+- intel/dev: fixup chv workaround
+
+Lucas Stach (3):
+
+- etnaviv: initialize vertex attributes on context reset
+- etnaviv: drm: fix size limit in etna_cmd_stream_realloc
+- etnaviv: drm: properly handle reviving BOs via a lookup
+
+Michel Zou (1):
+
+- zink: fix -Warray-bounds warning
+
+Mike Blumenkrantz (4):
+
+- radv: fix xfb query copy param ordering
+- zink: always unset vertex shader variant key data when changing last vertex stage
+- zink: add extra synchronization for buffer descriptor binds
+- zink: use device-local heap for sparse backing allocations
+
+Pavel Ondračka (1):
+
+- r300: Remove broken optimization in rc_transform_KILL
+
+Pierre-Eric Pelloux-Prayer (4):
+
+- radeonsi/gfx8: use the proper dcc clear size
+- vbo/dlist: fix loopback crash
+- vbo/dlist: add vertices to incomplete primitives
+- radeonsi/gfx10: fix si_texture_get_offset for mipmapped tex
+
+Qiang Yu (1):
+
+- glapi: should not add alias function to static_data.py
+
+Rhys Perry (1):
+
+- aco: remove pack_half_2x16(a, 0) optimization
+
+Rohan Garg (1):
+
+- intel/fs: OpImageQueryLod does not support arrayed images as an operand
+
+Roman Stratiienko (1):
+
+- v3dv: Hotfix: Rename remaining V3DV_HAS_SURFACE->V3DV_USE_WSI_PLATFORM
+
+Samuel Pitoiset (1):
+
+- radv: add drirc radv_disable_htile_layers and enable it for F1 2021
+
+Tapani Pälli (3):
+
+- iris: unref syncobjs and free r/w dependencies array for slab entries
+- mesa: free idalloc storage for display lists
+- mesa: free vbo_save_vertex_list store prims
+
+Timothy Arceri (1):
+
+- glsl/glcpp: make sure to expand new token after concatenation
+
+Yiwei Zhang (1):
+
+- venus: subtract appended header size in vn_CreatePipelineCache
+
+satmandu (1):
+
+- Fix compilation on armv7l with gcc 11.2.0
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.5.rst
^
|
@@ -0,0 +1,143 @@
+Mesa 21.3.5 Release Notes / 2022-01-26
+======================================
+
+Mesa 21.3.5 is a bug fix release which fixes bugs found since the 21.3.4 release.
+
+Mesa 21.3.5 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.5 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- glGetActiveUniform failing with GL_INVALID_VALUE for no reason
+- CopelliaSim crashes on adding vision sensor to a scene on Ubuntu 20+, runs fine on Ubuntu 18.04
+- Dirt Rally: Flickering glitches on certain foliage going from Mesa 21.2.5 to 21.3.0
+- FrontFacing input is broken on Intel/Vulkan
+- llvmpipe: Unimplemented get_driver_uuid/get_device_uuid causes segfaults in e.g. Wine
+
+
+Changes
+-------
+
+Alejandro Piñeiro (1):
+
+- vulkan: return default string for undefined enum
+
+Alyssa Rosenzweig (3):
+
+- pan/bi: Schedule around blend shader register clobbering
+- panfrost: Use u_reduced_prim for primitive checks
+- panfrost: Only cull polygons
+
+Bas Nieuwenhuizen (1):
+
+- util/fossilize_db: Fix double free in error handling.
+
+Carsten Haitzler (1):
+
+- panfrost: Don't double-free when handling error for unsupported GPU
+
+Charles Baker (1):
+
+- zink: Output PackHalf2x16 to uint not float
+
+Emma Anholt (1):
+
+- softpipe: respect !independent_blend_enable for color masks.
+
+Eric Engestrom (4):
+
+- .pick_status.json: Update to 06504fb9e2382e43b889fd6ca642bb785b544d4d
+- .pick_status.json: Mark 1b88777e97f635612c560a2f00d349ea271581b1 as denominated
+- .pick_status.json: Mark d1530a3f3b1625baa42e84cba9844b6eb4ac76ce as denominated
+- .pick_status.json: Mark 58a843ab14e0eecf044a35154da72cdf7ab6f15a as denominated
+
+Ian Romanick (1):
+
+- intel/fs: Fix gl_FrontFacing optimization on Gfx12+
+
+Kenneth Graunke (1):
+
+- iris: Fix and refactor check for clear color being fully zero
+
+Lepton Wu (1):
+
+- driconf: Fix unhandled tags in static conf
+
+Lionel Landwerlin (2):
+
+- intel/fs: disable VRS when omask is written
+- anv: fix missing descriptor copy of bufferview/surfacestate content
+
+Mike Blumenkrantz (10):
+
+- zink: always set number of timestamp results to 1 for internal qbo
+- zink: fix availability buffer sizing/copying for xfb queries
+- zink: skip readback of qbos with no results
+- Revert "zink: when performing an implicit reset, sync qbos"
+- zink: use even more accurate stride values for query result copies
+- aux/trace: copy over stream_output_target_offset method from context
+- util/vbuf: fix buffer translation sizing
+- zink: remove SpvMemorySemanticsMakeVisibleMask from nir_intrinsic_memory_barrier
+- zink: check EXT_image_drm_format_modifier for dmabuf support
+- zink: stop allocating such massive staging regions for buffer maps
+
+Pavel Ondračka (1):
+
+- r300: properly initialize new_vs in r300_draw_init_vertex_shader
+
+Pierre-Eric Pelloux-Prayer (2):
+
+- driconf: enable vs_position_always_invariant for Dirt Rally
+- mesa: use less temporaries in build_lighting
+
+Qiang Yu (1):
+
+- nir: fix nir_tex_instr hash not count is_sparse field
+
+Rhys Perry (1):
+
+- nir/unsigned_upper_bound: don't follow 64-bit f2u32()
+
+Rob Clark (5):
+
+- mesa/st: Lowered ucp should still mark rast state dirty
+- freedreno: Pass shader cache key instead of shader key
+- freedreno: Add FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE
+- freedreno/a6xx: Fix clip_mask
+- freedreno/a5xx: Fix clip_mask
+
+Stefan Brüns (1):
+
+- llvmpipe: Add get_{driver,device}_uuid implementations
+
+Tapani Pälli (2):
+
+- mesa: refactor GetProgramiv to use program resource list
+- mesa: move GetProgramInterfaceiv as a shader_query function
+
+Yiwei Zhang (1):
+
+- venus: VkExternalImageFormatProperties is optional
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.6.rst
^
|
@@ -0,0 +1,149 @@
+Mesa 21.3.6 Release Notes / 2022-02-09
+======================================
+
+Mesa 21.3.6 is a bug fix release which fixes bugs found since the 21.3.5 release.
+
+Mesa 21.3.6 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.6 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- radv: CullDistance fail
+- i965: Segmentation fault during glinfo context destruction, regression in 21.3.x
+- Vulkan Wayland WSI returns empty surface formats
+- [REGRESSION][BISECTED] iris: Qutebrowser/QtWebEngine sporadically flashes the window in white
+- Flickering Intel Uhd 620 Graphics
+- Broken Terraria & Glitches in Forza Horizon 4
+
+
+Changes
+-------
+
+Bas Nieuwenhuizen (1):
+
+- Revert "nir/algebraic: distribute fmul(fadd(a, b), c) when b and c are constants"
+
+Caio Oliveira (1):
+
+- anv: Fix subgroupSupportedStages physical property
+
+Charles Baker (1):
+
+- Revert "zink: handle vertex buffer offset overflows"
+
+Connor Abbott (2):
+
+- ir3: Fix copy-paste mistakes in ir3_block_remove_physical_predecessor()
+- ir3/cp: ir3: Prevent propagating shared regs out of loops harder
+
+Danylo Piliaiev (1):
+
+- ir3: opt_deref in opt loop to remove unnecessary tex casts
+
+Dave Airlie (1):
+
+- crocus: find correct relocation target for the bo.
+
+Emma Anholt (1):
+
+- vulkan: Fix leak of error messages
+
+Eric Engestrom (3):
+
+- .pick_status.json: Update to cb781fc350108584116280fc597c695d2f476c68
+- .pick_status.json: Mark 15e77504461a30038a054c87cc53a694171c9cf4 as denominated
+- .pick_status.json: Mark 960e72417f3e8885699cf384f690853e14ba44da as denominated
+
+Francisco Jerez (1):
+
+- intel/fs: Take into account region strides during SIMD lowering decision of SHUFFLE.
+
+Georg Lehmann (4):
+
+- vulkan/wsi/wayland: Fix add_wl_shm_format alpha/opaqueness.
+- vulkan/wsi/wayland: Convert missing vulkan formats to shm formats.
+- vulkan/wsi/wayland: Add modifiers for RGB formats.
+- vulkan/wsi/wayland: Fix add_drm_format_modifier aplha/opaqueness.
+
+Jason Ekstrand (2):
+
+- anv/pass: Don't set first_subpass_layout for stencil-only attachments
+- vulkan/wsi: Set MUTABLE_FORMAT_BIT in the prime path
+
+Kenneth Graunke (1):
+
+- i965: Avoid NULL drawbuffer in brw_flush_front
+
+Lionel Landwerlin (2):
+
+- intel/fs: don't set allow_sample_mask for CS intrinsics
+- intel/nir: fix shader call lowering
+
+Manas Chaudhary (1):
+
+- panvk: Fix pointer corruption in panvk_add_wait_event_syncobjs
+
+Mike Blumenkrantz (15):
+
+- zink: never use SpvOpImageQuerySizeLod for texel buffers
+- zink: reorder fbfetch flag-setting to avoid null deref
+- zink: fix vertex buffer mask computation for null buffers
+- zink: clamp tbo creation to maxTexelBufferElements
+- zink: add vertex shader pipeline bit for generated barrier construction
+- zink: fix waiting on current batch id
+- zink: cast image atomic op params/results based on image type
+- zink: use SpvScopeDevice over SpvScopeWorkgroup for atomic shader ops
+- zink: disable PIPE_SHADER_CAP_FP16_CONST_BUFFERS
+- llvmpipe: disable PIPE_SHADER_CAP_FP16_CONST_BUFFERS
+- llvmpipe: ci updates
+- zink: add VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT for query binds
+- zink: fix PIPE_CAP_TGSI_BALLOT export conditional
+- zink: reject invalid draws
+- zink: min/max blit region in coverage functions
+
+Nanley Chery (3):
+
+- anv: Disable CCS_E for some 8/16bpp copies on TGL+
+- anv: Use ANV_FAST_CLEAR_DEFAULT_VALUE for CCS on TGL+
+- anv: Re-enable CCS_E on TGL+
+
+Paulo Zanoni (1):
+
+- iris: implement inter-context busy-tracking
+
+Rhys Perry (3):
+
+- aco: fix neg(abs(mul(a, b))) if the mul is not VOP3
+- aco: don't encode src2 for v_writelane_b32_e64
+- radv: fix R_02881C_PA_CL_VS_OUT_CNTL with mixed cull/clip distances
+
+Samuel Pitoiset (1):
+
+- radv/winsys: fix missing buffer_make_resident() for the null winsys
+
+Yiwei Zhang (1):
+
+- tu: VkExternalImageFormatProperties is optional
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.7.rst
^
|
@@ -0,0 +1,148 @@
+Mesa 21.3.7 Release Notes / 2022-02-23
+======================================
+
+Mesa 21.3.7 is a bug fix release which fixes bugs found since the 21.3.6 release.
+
+Mesa 21.3.7 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.7 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- lavapipe: dEQP-VK.spirv_assembly.instruction.compute.float16.arithmetic_3.step fails
+- ANV: Bad output from TransformFeedback . Regression from Mesa 21. Something to do with VB+XFB -> VB+XFB dependency?
+
+
+Changes
+-------
+
+Alyssa Rosenzweig (4):
+
+- pan/bi: Avoid \*FADD.v2f16 hazard in optimizer
+- pan/bi: Avoid \*FADD.v2f16 hazard in scheduler
+- pan/bi: Lower swizzles on CSEL.i32/MUX.i32
+- panvk: Use more reliable assert for UBO pushing
+
+Bas Nieuwenhuizen (1):
+
+- radv: Fix preamble argument order.
+
+Connor Abbott (1):
+
+- ir3/spill: Fix simplify_phi_nodes with multiple loop nesting
+
+Dave Airlie (3):
+
+- lavapipe: fix sampler + sampler view leaks.
+- lavapipe: reference gallium fences correctly.
+- crocus: fix leak on gen4/5 stencil fallback blit path.
+
+Emma Anholt (1):
+
+- i915g: Initialize the rest of the "from_nir" temporary VS struct.
+
+Eric Engestrom (2):
+
+- .pick_status.json: Update to dabba7d7263be6ffb6f3676465e92c65952fa824
+- .pick_status.json: Mark b07372312d7053f2ef5c858ceb1fbf9ade5e7c52 as denominated
+
+Ian Romanick (9):
+
+- gallivm/nir: Call nir_lower_bool_to_int32 after nir_opt_algebraic_late
+- nir: All set-on-comparison opcodes can take all float types
+- intel/fs: Don't optimize out 1.0*x and -1.0*x
+- spriv: Produce correct result for GLSLstd450Step with NaN
+- spirv: Produce correct result for GLSLstd450Modf with Inf
+- spirv: Produce correct result for GLSLstd450Tanh with NaN
+- nir: Properly handle various exceptional values in frexp
+- nir: Produce correct results for atan with NaN
+- nir: Add missing dependency on nir_opcodes.py
+
+Jason Ekstrand (1):
+
+- anv: Call vk_command_buffer_finish if create fails
+
+Jonathan Gray (1):
+
+- dri: avoid NULL deref of DrawBuffer on flush
+
+Lionel Landwerlin (2):
+
+- nir: fix lower_memcpy
+- anv/genxml/intel/fs: fix binding shader record entry
+
+Marcin Ślusarz (1):
+
+- anv: don't set color state when input state was requested
+
+Marek Olšák (1):
+
+- ac/surface: add more elements to meta equations because HTILE can use them
+
+Mike Blumenkrantz (4):
+
+- lavapipe: use util_pack_color_union() for generating clear colors
+- aux/draw: fix llvm tcs lane vec generation
+- zink: always set VkPipelineMultisampleStateCreateInfo::pSampleMask
+- zink: always invalidate streamout counter buffer if not resuming
+
+Nanley Chery (1):
+
+- iris: Don't fast clear with the view format
+
+Pavel Ondračka (1):
+
+- r300: fix transformation of abs modifiers with negate
+
+Qiang Yu (3):
+
+- radeonsi: workaround Specviewperf13 Catia hang on GFX9
+- radeonsi: fix depth stencil multi sample texture blit
+- glx: fix pbuffer refcount init
+
+Samuel Pitoiset (1):
+
+- radv/winsys: fix initializing debug/perftest options if multiple instances
+
+Tapani Pälli (5):
+
+- intel/genxml: add PIPE_CONTROL field for L3 read only cache invalidation
+- anv: invalidate L3 read only cache when VF cache is invalidated
+- iris: invalidate L3 read only cache when VF cache is invalidated
+- iris: fix a leak on surface states
+- mesa/st: always use DXT5 when transcoding ASTC format
+
+Thierry Reding (2):
+
+- tegra: Use private reference count for sampler views
+- tegra: Use private reference count for resources
+
+Timur Kristóf (1):
+
+- radv: Disable IB2 on compute queues.
+
+Yiwei Zhang (1):
+
+- venus: properly destroy deferred ahb image before real image creation
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.8.rst
^
|
@@ -0,0 +1,183 @@
+Mesa 21.3.8 Release Notes / 2022-03-18
+======================================
+
+Mesa 21.3.8 is a bug fix release which fixes bugs found since the 21.3.7 release.
+
+Mesa 21.3.8 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.8 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- freedreno: deqp cts fails
+- radeonsi dEQP-GLES3.functional.buffer.map.write.explicit_flush.* flake crashes
+- Square Artifacts Dragons Dogma
+- radv: When using VS+PS only, primitive ID is mixed up after NGG culling
+- Redraw freeze after upgrade to Xwayland 21.1.3
+- anv doesn't always resolve aux buffers with private bindings on transition to external queue
+- [ANV] Rendering corruption in DOOM Eternal
+
+
+Changes
+-------
+
+Adam Jackson (1):
+
+- meson: Add "amber" option for automatic LTS build configuration
+
+Alyssa Rosenzweig (6):
+
+- panfrost: Fix FD resource_get_handle
+- panfrost: Handle NULL sampler views
+- panfrost: Handle NULL samplers
+- panfrost: Flush resources when shadowing
+- panfrost: Push twice as many uniforms
+- panfrost: Fix set_sampler_views for big GL
+
+Connor Abbott (4):
+
+- ir3: Don't always set bindless_tex with readonly images
+- ir3/nir: Fix 1d array readonly images
+- ir3/ra: Sanitize parallel copy flags better
+- util/bitset: Fix off-by-one in __bitset_set_range
+
+Danylo Piliaiev (1):
+
+- turnip: Use LATE_Z when there might be depth/stencil feedback loop
+
+Dave Airlie (5):
+
+- draw/so: don't use pre clip pos if we have a tes either.
+- crocus: change the line width workaround for gfx4/5
+- gallivm/nir: extract a valid texture index according to exec_mask.
+- zink: workaround depth texture mode alpha.
+- lavapipe: remove broken workaround for zink depth texturing.
+
+Eric Engestrom (14):
+
+- .pick_status.json: Update to 2106c3bab6bdea736c468fb1866fd0f372cc0baa
+- .pick_status.json: Mark 7ec0e2b89351e6e56cb112e00e6c68c6bbc6faea as denominated
+- .pick_status.json: Mark 0136545d169adb75e4f9f6b4de38eef0817c1241 as denominated
+- .pick_status.json: Mark 62b8daa889daefb2f191a63f370541bf2b807e88 as denominated
+- .pick_status.json: Mark 698ae34844b7199b8acc3b4d74a9cad3b903bdef as denominated
+- .pick_status.json: Mark 03a80490a47b0b616566c6f56581560694976b1a as denominated
+- .pick_status.json: Mark e1964e1dde7bf44ceeaf3fa8b3869e791af4a369 as denominated
+- .pick_status.json: Mark 3ef093f697ad9027ba514c7a4a6a10b7bd95bd47 as denominated
+- .pick_status.json: Mark 2d1b506acfe55165511a2bb83acb013353e531ab as denominated
+- .pick_status.json: Mark 204ea77b0674fb611155bd3ba2e6169cc8646b3f as denominated
+- .pick_status.json: Mark a5c7d34fdf8403b0115d5eead7ca67027e93efc7 as denominated
+- .pick_status.json: Mark 432700fc61a33e0c040d47d9b7bd8cfe970d35cc as denominated
+- .pick_status.json: Mark 4ed7329236a576b6b6f615787bb722b960f32c6b as denominated
+- .pick_status.json: Mark 3f7da0c58447979976eb2928625b1f93154f6c57 as denominated
+
+Erik Faye-Lund (2):
+
+- docs: remove incorrect drivers from extension
+- docs: fixup zink gl 4.3 requirements
+
+Icecream95 (6):
+
+- panfrost: Set PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION
+- pan/bi: Check dependencies of both destinations of instructions
+- panfrost: Set dirty state in set_shader_buffers
+- panfrost: Re-emit descriptors after resource shadowing
+- pan/bi: Make disassembler build reproducibly
+- panfrost: Fix ubo_mask calculation
+
+Jason Ekstrand (2):
+
+- anv: Don't assume depth/stencil attachments have depth
+- lavapipe: Reset the free_cmd_buffers list in TrimCommandPool
+
+Jonathan Gray (6):
+
+- util: unbreak non-linux mips64 build
+- util: fix util_cpu_detect_once() build on OpenBSD
+- util/u_atomic: fix build on clang archs without 64-bit atomics
+- util: fix build with clang 10 on mips64
+- util: use correct type in sysctl argument
+- radv: use MAJOR_IN_SYSMACROS for sysmacros.h include
+
+Lionel Landwerlin (3):
+
+- anv: fix conditional render for vkCmdDrawIndirectByteCountEXT
+- anv: fix fast clear type value with external images
+- intel/fs: fix total_scratch computation
+
+Marek Olšák (2):
+
+- amd: add a workaround for an SQ perf counter bug
+- radeonsi: fix an assertion failure with register shadowing
+
+Mike Blumenkrantz (16):
+
+- gallivm: avoid division by zero when computing cube face
+- zink: always update shader variants when rebinding a gfx program
+- zink: use a fence for pipeline cache update jobs
+- zink: wait on program cache fences before destroying programs
+- zink: fix descriptor cache pointer array allocation
+- zink: mark fbfetch push sets as non-cached
+- zink: stop leaking descriptor sets
+- zink: invalidate non-punted recycled descriptor sets that are not valid
+- zink: fix 64bit float shader ops
+- llvmpipe: fix debug print iterating in set_framebuffer_state
+- llvmpipe: clamp surface clear geometry
+- lavapipe: update multisample state after blend state
+- aux/trace: rzalloc the context struct
+- zink: lower dmod on AMD hardware
+- lavapipe: skip format checks for EXTENDED_USAGE
+- lavapipe: run nir_opt_copy_prop_vars during optimization loop
+
+Paulo Zanoni (1):
+
+- iris: fix register spilling on compute shaders on XeHP
+
+Pierre-Eric Pelloux-Prayer (3):
+
+- radeonsi: change rounding mode to round to even
+- util/slab: add slab_zalloc
+- gallium/tc: zero alloc transfers
+
+Rhys Perry (2):
+
+- anv: Enable nir_opt_access
+- radv: include adjust_frag_coord_z in key
+
+Rob Clark (1):
+
+- mesa: Fix discard_framebuffer for fbo vs winsys
+
+Samuel Pitoiset (2):
+
+- radv,drirc: move RADV workarounds to 00-radv-defaults.conf
+- radv: disable DCC for Fable Anniversary, Dragons Dogma, GTA IV and more
+
+Timur Kristóf (1):
+
+- ac/nir/ngg: Fix mixed up primitive ID after culling.
+
+Xiaohui Gu (1):
+
+- iris: Mark a dirty update when vs_needs_sgvs_element value changed
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/docs/relnotes/21.3.9.rst
^
|
@@ -0,0 +1,119 @@
+Mesa 21.3.9 Release Notes / 2022-06-08
+======================================
+
+Mesa 21.3.9 is a bug fix release which fixes bugs found since the 21.3.8 release.
+
+Mesa 21.3.9 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 21.3.9 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- [regression][bisected] MSVC: Build failure in libmesa_util when targeting x86 32-bit
+- A crash in radeonsi driver
+
+
+Changes
+-------
+
+Alyssa Rosenzweig (2):
+
+- panfrost: Emulate GL_CLAMP on Bifrost
+- pan/bi: Handle non-2D arrays
+
+Dave Airlie (2):
+
+- crocus: don't map scanout buffers as write-back
+- intel/perf: use a function to do common allocations
+
+Emma Anholt (2):
+
+- vulkan: Make sure we've loaded our connectors when querying plane props.
+- intel/perf: Move some static blocks of C code out of the python script.
+
+Eric Engestrom (3):
+
+- .pick_status.json: Update to 05d687723530ed3c5c9f7d0addb3b047138613a3
+- .pick_status.json: Update to fc381fa1e341bc9cf71f53a915dc47926a3f621f
+- Revert "glx: Fix build errors with --enable-mangling (v2)"
+
+Erik Faye-Lund (1):
+
+- Revert "ci: downgrade sphinx to v3.x"
+
+Georg Lehmann (1):
+
+- radv: Don't hash ycbcr sampler base object.
+
+Ian Romanick (1):
+
+- intel/fs: Force destination types on DP4A instructions
+
+Icecream95 (2):
+
+- panfrost: Don't initialise the trampolines array
+- panfrost: Optimise recalculation of max sampler view
+
+Jason Ekstrand (1):
+
+- anv: Allow MSAA resolve with different numbers of planes
+
+Jonathan Gray (1):
+
+- util: fix msvc build
+
+Lionel Landwerlin (4):
+
+- anv: fix variable shadowing
+- anv: zero-out anv_batch_bo
+- anv: emit timestamp & availability using the same part of CS
+- anv: flush tile cache with query copy command
+
+Matt Turner (8):
+
+- intel/perf: Don't print leading space from desc_units()
+- intel/perf: Deduplicate perf counters
+- intel/perf: Use a function to initialize perf counters
+- intel/perf: Use slimmer intel_perf_query_counter_data struct
+- intel/perf: Store indices to strings rather than pointers
+- intel/perf: Mark intel_perf_counter_* enums as PACKED
+- intel/perf: Fix mistake in description string
+- intel/perf: Destination array calculation into function
+
+Mike Blumenkrantz (9):
+
+- llvmpipe: fix occlusion queries with early depth test
+- anv: fix xfb usage with rasterizer discard
+- anv: fix CmdSetColorWriteEnableEXT for maximum rts
+- anv: fix some dynamic rasterization discard cases in pipeline construction
+- lavapipe: always clone shader nir for shader states
+- gallivm: fix oob image detection for cube/1dArray/2dArray/cubeArray
+- zink: flag sample locations for re-set on batch flush
+- zink: force-add usage when adding last-ref tracking
+- zink: only update usage on buffer rebind if rebinds occurred
+
+Pierre-Eric Pelloux-Prayer (1):
+
+- radeonsi: don't clear framebuffer.state before dcc decomp
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/meson.build
^
|
@@ -39,6 +39,13 @@
error('`mirror` is the only build directory layout supported')
endif
+amber = get_option('amber')
+if amber
+ package_version_suffix=' Amber'
+else
+ package_version_suffix=''
+endif
+
# Arguments for the preprocessor, put these in a separate array from the C and
# C++ (cpp in meson terminology) arguments since they need to be added to the
# default arguments for both C and C++.
@@ -46,13 +53,18 @@
'-D__STDC_CONSTANT_MACROS',
'-D__STDC_FORMAT_MACROS',
'-D__STDC_LIMIT_MACROS',
- '-DPACKAGE_VERSION="@0@"'.format(meson.project_version()),
+ '-DPACKAGE_VERSION="@0@@1@"'.format(meson.project_version(), package_version_suffix),
'-DPACKAGE_BUGREPORT="https://gitlab.freedesktop.org/mesa/mesa/-/issues"',
]
c_args = []
cpp_args = []
with_moltenvk_dir = get_option('moltenvk-dir')
+
+if amber
+ pre_args += '-DAMBER'
+endif
+
with_vulkan_icd_dir = get_option('vulkan-icd-dir')
with_tests = get_option('build-tests')
with_aco_tests = get_option('build-aco-tests')
@@ -199,7 +211,9 @@
gallium_drivers = get_option('gallium-drivers')
if gallium_drivers.contains('auto')
- if system_has_kms_drm
+ if amber
+ gallium_drivers = []
+ elif system_has_kms_drm
# TODO: PPC, Sparc
if ['x86', 'x86_64'].contains(host_machine.cpu_family())
gallium_drivers = [
@@ -262,7 +276,9 @@
_vulkan_drivers = get_option('vulkan-drivers')
if _vulkan_drivers.contains('auto')
if system_has_kms_drm
- if host_machine.cpu_family().startswith('x86')
+ if amber
+ _vulkan_drivers = []
+ elif host_machine.cpu_family().startswith('x86')
_vulkan_drivers = ['amd', 'intel', 'swrast']
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
_vulkan_drivers = ['swrast']
@@ -531,8 +547,22 @@
endif
endif
-with_glvnd = get_option('glvnd')
+_glvnd = get_option('glvnd')
+if _glvnd == 'true' or _glvnd == 'enabled' or (amber and _glvnd == 'auto')
+ with_glvnd = true
+else
+ with_glvnd = false
+endif
+
glvnd_vendor_name = get_option('glvnd-vendor-name')
+if glvnd_vendor_name == 'auto'
+ if amber
+ glvnd_vendor_name = 'amber'
+ else
+ glvnd_vendor_name = 'mesa'
+ endif
+endif
+
if with_glvnd
if with_platform_windows
error('glvnd cannot be used on Windows')
@@ -1211,7 +1241,7 @@
endif
else
add_project_link_arguments(
- cc.get_supported_arguments(
+ cc.get_supported_link_arguments(
'-Wl,--nxcompat',
'-Wl,--dynamicbase',
'-static-libgcc',
@@ -1220,7 +1250,7 @@
language : ['c'],
)
add_project_link_arguments(
- cpp.get_supported_arguments(
+ cpp.get_supported_link_arguments(
'-Wl,--nxcompat',
'-Wl,--dynamicbase',
'-static-libgcc',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/meson_options.txt
^
|
@@ -302,8 +302,9 @@
)
option(
'glvnd',
- type : 'boolean',
- value : false,
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'true', 'false', 'enabled', 'disabled'],
description : 'Enable GLVND support.'
)
option(
@@ -321,7 +322,7 @@
option(
'glvnd-vendor-name',
type : 'string',
- value : 'mesa',
+ value : 'auto',
description : 'Vendor name string to use for glvnd libraries'
)
option(
@@ -457,14 +458,16 @@
)
option(
'prefer-iris',
- type : 'boolean',
- value : true,
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'true', 'false'],
description : 'Prefer new Intel iris driver over older i965 driver'
)
option(
'prefer-crocus',
- type : 'boolean',
- value : false,
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'true', 'false'],
description : 'Prefer new crocus driver over older i965 driver for gen4-7'
)
option('egl-lib-suffix',
@@ -530,3 +533,9 @@
value : false,
description : 'Build gallium VMware/svga driver with mksGuestStats instrumentation.'
)
+option(
+ 'amber',
+ type : 'boolean',
+ value : false,
+ description : 'Configure LTS build to coexist with Mesa >= 22.0'
+)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.c
^
|
@@ -1080,6 +1080,8 @@
}
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_ARCTURUS;
+ info->never_stop_sq_perf_counters = info->chip_class == GFX10 ||
+ info->chip_class == GFX10_3;
info->max_sgpr_alloc = info->family == CHIP_TONGA || info->family == CHIP_ICELAND ? 96 : 104;
if (!info->has_graphics && info->family >= CHIP_ALDEBARAN) {
@@ -1168,6 +1170,7 @@
fprintf(f, " has_ls_vgpr_init_bug = %i\n", info->has_ls_vgpr_init_bug);
fprintf(f, " has_32bit_predication = %i\n", info->has_32bit_predication);
fprintf(f, " has_3d_cube_border_color_mipmap = %i\n", info->has_3d_cube_border_color_mipmap);
+ fprintf(f, " never_stop_sq_perf_counters = %i\n", info->never_stop_sq_perf_counters);
fprintf(f, "Display features:\n");
fprintf(f, " use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.h
^
|
@@ -83,6 +83,7 @@
bool has_cs_regalloc_hang_bug;
bool has_32bit_predication;
bool has_3d_cube_border_color_mipmap;
+ bool never_stop_sq_perf_counters;
/* Display features. */
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_nir_lower_ngg.c
^
|
@@ -45,6 +45,7 @@
nir_variable *prim_exp_arg_var;
nir_variable *es_accepted_var;
nir_variable *gs_accepted_var;
+ nir_variable *gs_vtx_indices_vars[3];
struct u_vector saved_uniforms;
@@ -317,11 +318,16 @@
return arg;
}
-static nir_ssa_def *
-ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex)
+static void
+ngg_nogs_init_vertex_indices_vars(nir_builder *b, nir_function_impl *impl, lower_ngg_nogs_state *st)
{
- return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u),
- nir_imm_int(b, (vertex & 1u) * 16u), nir_imm_int(b, 16u));
+ for (unsigned v = 0; v < st->num_vertices_per_primitives; ++v) {
+ st->gs_vtx_indices_vars[v] = nir_local_variable_create(impl, glsl_uint_type(), "gs_vtx_addr");
+
+ nir_ssa_def *vtx = nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = v / 2u),
+ nir_imm_int(b, (v & 1u) * 16u), nir_imm_int(b, 16u));
+ nir_store_var(b, st->gs_vtx_indices_vars[v], vtx, 0x1);
+ }
}
static nir_ssa_def *
@@ -333,13 +339,8 @@
} else {
nir_ssa_def *vtx_idx[3] = {0};
- vtx_idx[0] = ngg_input_primitive_vertex_index(b, 0);
- vtx_idx[1] = st->num_vertices_per_primitives >= 2
- ? ngg_input_primitive_vertex_index(b, 1)
- : nir_imm_zero(b, 1, 32);
- vtx_idx[2] = st->num_vertices_per_primitives >= 3
- ? ngg_input_primitive_vertex_index(b, 2)
- : nir_imm_zero(b, 1, 32);
+ for (unsigned v = 0; v < st->num_vertices_per_primitives; ++v)
+ vtx_idx[v] = nir_load_var(b, st->gs_vtx_indices_vars[v]);
return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL, st->use_edgeflags);
}
@@ -358,12 +359,20 @@
arg = emit_ngg_nogs_prim_exp_arg(b, st);
if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) {
- /* Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. */
- nir_ssa_def *prim_id = nir_build_load_primitive_id(b);
- nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx);
- nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
+ nir_ssa_def *prim_valid = nir_ieq_imm(b, nir_ushr_imm(b, arg, 31), 0);
+ nir_if *if_prim_valid = nir_push_if(b, prim_valid);
+ {
+ /* Copy Primitive IDs from GS threads to the LDS address
+ * corresponding to the ES thread of the provoking vertex.
+ * It will be exported as a per-vertex attribute.
+ */
+ nir_ssa_def *prim_id = nir_build_load_primitive_id(b);
+ nir_ssa_def *provoking_vtx_idx = nir_load_var(b, st->gs_vtx_indices_vars[st->provoking_vtx_idx]);
+ nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
- nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u);
+ nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u);
+ }
+ nir_pop_if(b, if_prim_valid);
}
nir_build_export_primitive_amd(b, arg);
@@ -747,6 +756,7 @@
nir_ssa_def *vtx_addr = nir_load_var(b, gs_vtxaddr_vars[v]);
nir_ssa_def *exporter_vtx_idx = nir_build_load_shared(b, 1, 8, vtx_addr, .base = lds_es_exporter_tid, .align_mul = 1u);
exporter_vtx_indices[v] = nir_u2u32(b, exporter_vtx_idx);
+ nir_store_var(b, nogs_state->gs_vtx_indices_vars[v], exporter_vtx_indices[v], 0x1);
}
nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL, nogs_state->use_edgeflags);
@@ -1142,7 +1152,7 @@
/* Load vertex indices from input VGPRs */
nir_ssa_def *vtx_idx[3] = {0};
for (unsigned vertex = 0; vertex < 3; ++vertex)
- vtx_idx[vertex] = ngg_input_primitive_vertex_index(b, vertex);
+ vtx_idx[vertex] = nir_load_var(b, nogs_state->gs_vtx_indices_vars[vertex]);
nir_ssa_def *vtx_addr[3] = {0};
nir_ssa_def *pos[3][4] = {0};
@@ -1320,6 +1330,8 @@
nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body));
b->cursor = nir_before_cf_list(&impl->body);
+ ngg_nogs_init_vertex_indices_vars(b, impl, &state);
+
if (!can_cull) {
/* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0)));
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/common/ac_surface.h
^
|
@@ -223,7 +223,7 @@
* The gfx10 HTILE equation is chip-specific, it requires 64KB_Z_X, and it varies with:
* - number of samples
*/
- uint16_t gfx10_bits[60];
+ uint16_t gfx10_bits[64];
} u;
};
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_assembler.cpp
^
|
@@ -625,6 +625,10 @@
encoding = 0;
if (instr->opcode == aco_opcode::v_interp_mov_f32) {
encoding = 0x3 & instr->operands[0].constantValue();
+ } else if (instr->opcode == aco_opcode::v_writelane_b32_e64) {
+ encoding |= instr->operands[0].physReg() << 0;
+ encoding |= instr->operands[1].physReg() << 9;
+ /* Encoding src2 works fine with hardware but breaks some disassemblers. */
} else {
for (unsigned i = 0; i < instr->operands.size(); i++)
encoding |= instr->operands[i].physReg() << (i * 9);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_instruction_selection.cpp
^
|
@@ -3189,12 +3189,8 @@
case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
case nir_op_pack_half_2x16_split: {
if (dst.regClass() == v1) {
- nir_const_value* val = nir_src_as_const_value(instr->src[1].src);
- if (val && val->u32 == 0 && ctx->program->chip_class <= GFX9) {
- /* upper bits zero on GFX6-GFX9 */
- bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0]));
- } else if (!ctx->block->fp_mode.care_about_round16_64 ||
- ctx->block->fp_mode.round16_64 == fp_round_tz) {
+ if (!ctx->block->fp_mode.care_about_round16_64 ||
+ ctx->block->fp_mode.round16_64 == fp_round_tz) {
if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9)
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
else
@@ -8459,146 +8455,106 @@
}
break;
}
- case nir_intrinsic_quad_broadcast: {
- Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!nir_dest_is_divergent(instr->dest)) {
- emit_uniform_subgroup(ctx, instr, src);
- } else {
- Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
- unsigned lane = nir_src_as_const_value(instr->src[1])->u32;
- uint32_t dpp_ctrl = dpp_quad_perm(lane, lane, lane, lane);
-
- if (instr->dest.ssa.bit_size != 1)
- src = as_vgpr(ctx, src);
-
- if (instr->dest.ssa.bit_size == 1) {
- assert(src.regClass() == bld.lm);
- assert(dst.regClass() == bld.lm);
- uint32_t half_mask = 0x11111111u << lane;
- Temp mask_tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
- Operand::c32(half_mask), Operand::c32(half_mask));
- Temp tmp = bld.tmp(bld.lm);
- bld.sop1(Builder::s_wqm, Definition(tmp),
- bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp,
- bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src,
- Operand(exec, bld.lm))));
- emit_wqm(bld, tmp, dst);
- } else if (instr->dest.ssa.bit_size == 8) {
- Temp tmp = bld.tmp(v1);
- if (ctx->program->chip_class >= GFX8)
- emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
- else
- emit_wqm(bld,
- bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl),
- tmp);
- bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
- } else if (instr->dest.ssa.bit_size == 16) {
- Temp tmp = bld.tmp(v1);
- if (ctx->program->chip_class >= GFX8)
- emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
- else
- emit_wqm(bld,
- bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl),
- tmp);
- bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
- } else if (instr->dest.ssa.bit_size == 32) {
- if (ctx->program->chip_class >= GFX8)
- emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), dst);
- else
- emit_wqm(bld,
- bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl),
- dst);
- } else if (instr->dest.ssa.bit_size == 64) {
- Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
- bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
- if (ctx->program->chip_class >= GFX8) {
- lo = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
- hi = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl));
- } else {
- lo = emit_wqm(
- bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) | dpp_ctrl));
- hi = emit_wqm(
- bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) | dpp_ctrl));
- }
- bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
- emit_split_vector(ctx, dst, 2);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- }
- break;
- }
+ case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_quad_swizzle_amd: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
+
if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src);
break;
}
+
+ /* Quad broadcast lane. */
+ unsigned lane = 0;
+ /* Use VALU for the bool instructions that don't have a SALU-only special case. */
+ bool bool_use_valu = instr->dest.ssa.bit_size == 1;
+
uint16_t dpp_ctrl = 0;
+
switch (instr->intrinsic) {
case nir_intrinsic_quad_swap_horizontal: dpp_ctrl = dpp_quad_perm(1, 0, 3, 2); break;
case nir_intrinsic_quad_swap_vertical: dpp_ctrl = dpp_quad_perm(2, 3, 0, 1); break;
case nir_intrinsic_quad_swap_diagonal: dpp_ctrl = dpp_quad_perm(3, 2, 1, 0); break;
case nir_intrinsic_quad_swizzle_amd: dpp_ctrl = nir_intrinsic_swizzle_mask(instr); break;
+ case nir_intrinsic_quad_broadcast:
+ lane = nir_src_as_const_value(instr->src[1])->u32;
+ dpp_ctrl = dpp_quad_perm(lane, lane, lane, lane);
+ bool_use_valu = false;
+ break;
default: break;
}
- if (ctx->program->chip_class < GFX8)
- dpp_ctrl |= (1 << 15);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
+ Temp tmp(dst);
- if (instr->dest.ssa.bit_size != 1)
- src = as_vgpr(ctx, src);
-
- if (instr->dest.ssa.bit_size == 1) {
- assert(src.regClass() == bld.lm);
+ /* Setup source. */
+ if (bool_use_valu)
src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
Operand::c32(-1), src);
+ else if (instr->dest.ssa.bit_size != 1)
+ src = as_vgpr(ctx, src);
+
+ /* Setup temporary destination. */
+ if (bool_use_valu)
+ tmp = bld.tmp(v1);
+ else if (ctx->program->stage == fragment_fs)
+ tmp = bld.tmp(dst.regClass());
+
+ if (instr->dest.ssa.bit_size == 1 && instr->intrinsic == nir_intrinsic_quad_broadcast) {
+ /* Special case for quad broadcast using SALU only. */
+ assert(src.regClass() == bld.lm && tmp.regClass() == bld.lm);
+
+ uint32_t half_mask = 0x11111111u << lane;
+ Operand mask_tmp = bld.lm.bytes() == 4
+ ? Operand::c32(half_mask)
+ : bld.pseudo(aco_opcode::p_create_vector, bld.def(bld.lm),
+ Operand::c32(half_mask), Operand::c32(half_mask));
+
+ src =
+ bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
+ src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, src);
+ bld.sop1(Builder::s_wqm, Definition(tmp), src);
+ } else if (instr->dest.ssa.bit_size <= 32 || bool_use_valu) {
+ unsigned excess_bytes = bool_use_valu ? 0 : 4 - instr->dest.ssa.bit_size / 8;
+ Definition def = excess_bytes ? bld.def(v1) : Definition(tmp);
+
if (ctx->program->chip_class >= GFX8)
- src = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl);
- else
- src = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl);
- Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
- emit_wqm(bld, tmp, dst);
- } else if (instr->dest.ssa.bit_size == 8) {
- Temp tmp = bld.tmp(v1);
- if (ctx->program->chip_class >= GFX8)
- emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
- else
- emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp);
- bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
- } else if (instr->dest.ssa.bit_size == 16) {
- Temp tmp = bld.tmp(v1);
- if (ctx->program->chip_class >= GFX8)
- emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
- else
- emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp);
- bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
- } else if (instr->dest.ssa.bit_size == 32) {
- Temp tmp;
- if (ctx->program->chip_class >= GFX8)
- tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl);
+ bld.vop1_dpp(aco_opcode::v_mov_b32, def, src, dpp_ctrl);
else
- tmp = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl);
- emit_wqm(bld, tmp, dst);
+ bld.ds(aco_opcode::ds_swizzle_b32, def, src, (1 << 15) | dpp_ctrl);
+
+ if (excess_bytes)
+ bld.pseudo(aco_opcode::p_split_vector, Definition(tmp),
+ bld.def(RegClass::get(tmp.type(), excess_bytes)), def.getTemp());
} else if (instr->dest.ssa.bit_size == 64) {
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
+
if (ctx->program->chip_class >= GFX8) {
- lo = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_ir.cpp
^
|
@@ -187,7 +187,7 @@
if (!instr->isVALU())
return false;
- if (chip < GFX8 || instr->isDPP())
+ if (chip < GFX8 || instr->isDPP() || instr->isVOP3P())
return false;
if (instr->isSDWA())
@@ -318,6 +318,8 @@
return false;
if (instr->format == Format::VOP3)
return false;
+ if (instr->operands.size() > 1 && !instr->operands[1].isOfType(RegType::vgpr))
+ return false;
}
/* there are more cases but those all take 64-bit inputs */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_optimizer.cpp
^
|
@@ -3135,10 +3135,12 @@
* if 0 - pick selection from fneg->lo
* if 1 - pick selection from fneg->hi
*/
- bool opsel_lo = vop3p->opsel_lo & (1 << i);
- bool opsel_hi = vop3p->opsel_hi & (1 << i);
- vop3p->neg_lo[i] ^= true ^ (opsel_lo ? fneg->neg_hi[0] : fneg->neg_lo[0]);
- vop3p->neg_hi[i] ^= true ^ (opsel_hi ? fneg->neg_hi[0] : fneg->neg_lo[0]);
+ bool opsel_lo = (vop3p->opsel_lo >> i) & 1;
+ bool opsel_hi = (vop3p->opsel_hi >> i) & 1;
+ bool neg_lo = true ^ fneg->neg_lo[0] ^ fneg->neg_lo[1];
+ bool neg_hi = true ^ fneg->neg_hi[0] ^ fneg->neg_hi[1];
+ vop3p->neg_lo[i] ^= opsel_lo ? neg_hi : neg_lo;
+ vop3p->neg_hi[i] ^= opsel_hi ? neg_hi : neg_lo;
vop3p->opsel_lo ^= ((opsel_lo ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i;
vop3p->opsel_hi ^= ((opsel_hi ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i;
@@ -3324,12 +3326,16 @@
VOP3_instruction& new_mul = instr->vop3();
if (mul_instr->isVOP3()) {
VOP3_instruction& mul = mul_instr->vop3();
- new_mul.neg[0] = mul.neg[0] && !is_abs;
- new_mul.neg[1] = mul.neg[1] && !is_abs;
- new_mul.abs[0] = mul.abs[0] || is_abs;
- new_mul.abs[1] = mul.abs[1] || is_abs;
+ new_mul.neg[0] = mul.neg[0];
+ new_mul.neg[1] = mul.neg[1];
+ new_mul.abs[0] = mul.abs[0];
+ new_mul.abs[1] = mul.abs[1];
new_mul.omod = mul.omod;
}
+ if (is_abs) {
+ new_mul.neg[0] = new_mul.neg[1] = false;
+ new_mul.abs[0] = new_mul.abs[1] = true;
+ }
new_mul.neg[0] ^= true;
new_mul.clamp = false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_optimizer_postRA.cpp
^
|
@@ -50,18 +50,6 @@
Idx const_or_undef{UINT32_MAX, 2};
Idx written_by_multiple_instrs{UINT32_MAX, 3};
-bool
-is_instr_after(Idx second, Idx first)
-{
- if (first == not_written_in_block && second != not_written_in_block)
- return true;
-
- if (!first.found() || !second.found())
- return false;
-
- return second.block > first.block || (second.block == first.block && second.instr > first.instr);
-}
-
struct pr_opt_ctx {
Program* program;
Block* current_block;
@@ -151,6 +139,44 @@
return instr_idx;
}
+bool
+is_clobbered_since(pr_opt_ctx& ctx, PhysReg reg, RegClass rc, const Idx& idx)
+{
+ /* If we didn't find an instruction, assume that the register is clobbered. */
+ if (!idx.found())
+ return true;
+
+ /* TODO: We currently can't keep track of subdword registers. */
+ if (rc.is_subdword())
+ return true;
+
+ unsigned begin_reg = reg.reg();
+ unsigned end_reg = begin_reg + rc.size();
+ unsigned current_block_idx = ctx.current_block->index;
+
+ for (unsigned r = begin_reg; r < end_reg; ++r) {
+ Idx& i = ctx.instr_idx_by_regs[current_block_idx][r];
+ if (i == clobbered || i == written_by_multiple_instrs)
+ return true;
+ else if (i == not_written_in_block)
+ continue;
+
+ assert(i.found());
+
+ if (i.block > idx.block || (i.block == idx.block && i.instr > idx.instr))
+ return true;
+ }
+
+ return false;
+}
+
+template <typename T>
+bool
+is_clobbered_since(pr_opt_ctx& ctx, const T& t, const Idx& idx)
+{
+ return is_clobbered_since(ctx, t.physReg(), t.regClass(), idx);
+}
+
void
try_apply_branch_vcc(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
@@ -177,16 +203,19 @@
Idx op0_instr_idx = last_writer_idx(ctx, instr->operands[0]);
Idx last_vcc_wr_idx = last_writer_idx(ctx, vcc, ctx.program->lane_mask);
- Idx last_exec_wr_idx = last_writer_idx(ctx, exec, ctx.program->lane_mask);
/* We need to make sure:
+ * - the instructions that wrote the operand register and VCC are both found
* - the operand register used by the branch, and VCC were both written in the current block
- * - VCC was NOT written after the operand register
- * - EXEC is sane and was NOT written after the operand register
+ * - EXEC hasn't been clobbered since the last VCC write
+ * - VCC hasn't been clobbered since the operand register was written
+ * (ie. the last VCC writer precedes the op0 writer)
*/
if (!op0_instr_idx.found() || !last_vcc_wr_idx.found() ||
- !is_instr_after(last_vcc_wr_idx, last_exec_wr_idx) ||
- !is_instr_after(op0_instr_idx, last_vcc_wr_idx))
+ op0_instr_idx.block != ctx.current_block->index ||
+ last_vcc_wr_idx.block != ctx.current_block->index ||
+ is_clobbered_since(ctx, exec, ctx.program->lane_mask, last_vcc_wr_idx) ||
+ is_clobbered_since(ctx, vcc, ctx.program->lane_mask, op0_instr_idx))
return;
Instruction* op0_instr = ctx.get(op0_instr_idx);
@@ -346,6 +375,17 @@
void
try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
+ /* We are looking for the following pattern:
+ *
+ * v_mov_dpp vA, vB, ... ; move instruction with DPP
+ * v_xxx vC, vA, ... ; current instr that uses the result from the move
+ *
+ * If possible, the above is optimized into:
+ *
+ * v_xxx_dpp vC, vB, ... ; current instr modified to use DPP directly
+ *
+ */
+
if (!instr->isVALU() || instr->isDPP() || !can_use_DPP(instr, false))
return;
@@ -365,8 +405,8 @@
(!mov->definitions[0].tempId() || ctx.uses[mov->definitions[0].tempId()] > 1))
continue;
- Idx mov_src_idx = last_writer_idx(ctx, mov->operands[0]);
- if (is_instr_after(mov_src_idx, op_instr_idx))
+ /* Don't propagate DPP if the source register is overwritten since the move. */
+ if (is_clobbered_since(ctx, mov->operands[0], op_instr_idx))
continue;
if (i && !can_swap_operands(instr, &instr->opcode))
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_print_asm.cpp
^
|
@@ -152,12 +152,6 @@
disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size,
size_t pos, char* outline, unsigned outline_size)
{
- /* mask out src2 on v_writelane_b32 */
- if (((chip == GFX8 || chip == GFX9) && (binary[pos] & 0xffff8000) == 0xd28a0000) ||
- (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7610000)) {
- binary[pos + 1] = binary[pos + 1] & 0xF803FFFF;
- }
-
size_t l =
LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),
pos * 4, outline, outline_size);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_register_allocation.cpp
^
|
@@ -1898,10 +1898,9 @@
dst = operand.physReg();
} else {
+ /* clear the operand in case it's only a stride mismatch */
+ register_file.clear(src, operand.regClass());
dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
- update_renames(
- ctx, register_file, parallelcopy, instr,
- instr->opcode != aco_opcode::p_create_vector ? rename_not_killed_ops : (UpdateRenames)0);
}
Operand pc_op = operand;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_spill.cpp
^
|
@@ -1614,7 +1614,7 @@
continue;
bool can_destroy = true;
- for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[block.linear_preds[0]]) {
+ for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block.index]) {
if (ctx.interferences[pair.second].first.type() == RegType::sgpr &&
slots[pair.second] / ctx.wave_size == i) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/aco_validate.cpp
^
|
@@ -235,6 +235,16 @@
if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
instr.get());
+ } else if (instr->isVOP3P()) {
+ VOP3P_instruction& vop3p = instr->vop3p();
+ for (unsigned i = 0; i < instr->operands.size(); i++) {
+ if (instr->operands[i].hasRegClass() &&
+ instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
+ check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0,
+ "Unexpected opsel for subdword operand", instr.get());
+ }
+ check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
+ instr.get());
}
/* check for undefs */
@@ -720,6 +730,9 @@
if (instr->isSDWA())
return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
byte % instr->sdwa().sel[index].size() == 0;
+ if (instr->isVOP3P())
+ return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
+ ((instr->vop3p().opsel_hi >> index) & 1) == (byte >> 1);
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
return true;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/compiler/tests/test_optimizer.cpp
^
|
@@ -954,13 +954,14 @@
END_TEST
BEGIN_TEST(optimizer.dpp)
- //>> v1: %a, v1: %b, s2: %c = p_startpgm
- if (!setup_cs("v1 v1 s2", GFX10_3))
+ //>> v1: %a, v1: %b, s2: %c, s1: %d = p_startpgm
+ if (!setup_cs("v1 v1 s2 s1", GFX10_3))
return;
Operand a(inputs[0]);
Operand b(inputs[1]);
Operand c(inputs[2]);
+ Operand d(inputs[3]);
/* basic optimization */
//! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1
@@ -1028,6 +1029,21 @@
Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c);
writeout(8, res8);
+ /* sgprs */
+ //! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1
+ //! v1: %res9 = v_add_f32 %tmp9, %d
+ //! p_unit_test 9, %res9
+ Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
+ Temp res9 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp9, d);
+ writeout(9, res9);
+
+ //! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1
+ //! v1: %res10 = v_add_f32 %d, %tmp10
+ //! p_unit_test 10, %res10
+ Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
+ Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), d, tmp10);
+ writeout(10, res10);
+
finish_opt_test();
END_TEST
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/00-radv-defaults.conf
^
|
@@ -0,0 +1,148 @@
+<?xml version="1.0" standalone="yes"?>
+<!--
+
+============================================
+Application bugs worked around in this file:
+============================================
+
+-->
+
+<!DOCTYPE driconf [
+ <!ELEMENT driconf (device+)>
+ <!ELEMENT device (application | engine)+>
+ <!ATTLIST device driver CDATA #IMPLIED
+ device CDATA #IMPLIED>
+ <!ELEMENT application (option+)>
+ <!ATTLIST application name CDATA #REQUIRED
+ executable CDATA #IMPLIED
+ executable_regexp CDATA #IMPLIED
+ sha1 CDATA #IMPLIED
+ application_name_match CDATA #IMPLIED
+ application_versions CDATA #IMPLIED>
+ <!ELEMENT engine (option+)>
+
+ <!-- engine_name_match: A regexp matching the engine name -->
+ <!-- engine_versions: A version in range format
+ (version 1 to 4 : "1:4") -->
+
+ <!ATTLIST engine engine_name_match CDATA #REQUIRED
+ engine_versions CDATA #IMPLIED>
+
+ <!ELEMENT option EMPTY>
+ <!ATTLIST option name CDATA #REQUIRED
+ value CDATA #REQUIRED>
+]>
+
+<driconf>
+ <device driver="radv">
+ <!-- Engine workarounds -->
+ <engine engine_name_match="vkd3d">
+ <option name="radv_zero_vram" value="true" />
+ </engine>
+
+ <engine engine_name_match="Quantic Dream Engine">
+ <option name="radv_zero_vram" value="true" />
+ <option name="radv_lower_discard_to_demote" value="true" />
+ <option name="radv_disable_tc_compat_htile_general" value="true" />
+ </engine>
+
+ <!-- Game workarounds -->
+ <application name="Shadow Of The Tomb Raider (Native)" application_name_match="ShadowOfTheTomb">
+ <option name="radv_report_llvm9_version_string" value="true" />
+ <option name="radv_invariant_geom" value="true" />
+ </application>
+
+ <application name="Shadow Of The Tomb Raider (DX11/DX12)" application_name_match="SOTTR.exe">
+ <option name="radv_invariant_geom" value="true" />
+ <option name="radv_split_fma" value="true" />
+ </application>
+
+ <application name="RAGE 2" executable="RAGE2.exe">
+ <option name="radv_enable_mrt_output_nan_fixup" value="true" />
+ </application>
+
+ <application name="Path of Exile (64-bit, Steam)" executable="PathOfExile_x64Steam.exe">
+ <option name="radv_no_dynamic_bounds" value="true" />
+ <option name="radv_absolute_depth_bias" value="true" />
+ </application>
+ <application name="Path of Exile (32-bit, Steam)" executable="PathOfExileSteam.exe">
+ <option name="radv_no_dynamic_bounds" value="true" />
+ <option name="radv_absolute_depth_bias" value="true" />
+ </application>
+ <application name="Path of Exile (64-bit)" executable="PathOfExile_x64.exe">
+ <option name="radv_no_dynamic_bounds" value="true" />
+ <option name="radv_absolute_depth_bias" value="true" />
+ </application>
+ <application name="Path of Exile (32-bit)" executable="PathOfExile.exe">
+ <option name="radv_no_dynamic_bounds" value="true" />
+ <option name="radv_absolute_depth_bias" value="true" />
+ </application>
+
+ <application name="The Surge 2" application_name_match="Fledge">
+ <option name="radv_disable_shrink_image_store" value="true" />
+ <option name="radv_zero_vram" value="true" />
+ </application>
+
+ <application name="World War Z (and World War Z: Aftermath)" application_name_match="WWZ|wwz">
+ <option name="radv_override_uniform_offset_alignment" value="16" />
+ <option name="radv_disable_shrink_image_store" value="true" />
+ <option name="radv_invariant_geom" value="true" />
+ </application>
+
+ <application name="DOOM VFR" application_name_match="DOOM_VFR">
+ <option name="radv_no_dynamic_bounds" value="true" />
+ </application>
+
+ <application name="DOOM Eternal" application_name_match="DOOMEternal">
+ <option name="radv_zero_vram" value="true" />
+ </application>
+
+ <application name="No Man's Sky" application_name_match="No Man's Sky">
+ <option name="radv_lower_discard_to_demote" value="true" />
+ </application>
+
+ <application name="Monster Hunter World" application_name_match="MonsterHunterWorld.exe">
+ <option name="radv_invariant_geom" value="true" />
+ </application>
+
+ <application name="DOOM (2016)" application_name_match="DOOM$">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+
+ <application name="Wolfenstein II" application_name_match="Wolfenstein II The New Colossus">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+
+ <application name="RDR2" application_name_match="Red Dead Redemption 2">
+ <option name="radv_report_apu_as_dgpu" value="true" />
+ </application>
+
+ <application name="Resident Evil Village" application_name_match="re8.exe">
+ <option name="radv_invariant_geom" value="true" />
+ </application>
+
+ <application name="F1 2021" application_name_match="F1_2021_dx12.exe">
+ <option name="radv_disable_htile_layers" value="true" />
+ </application>
+
+ <application name="Fable Anniversary" application_name_match="Fable Anniversary.exe">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+
+ <application name="Dragon's Dogma Dark Ariser" application_name_match="DDDA.exe">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+
+ <application name="Grand Theft Auto IV" application_name_match="GTAIV.exe">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+
+ <application name="Star Wars: The Force Unleashed II" application_name_match="SWTFU2.exe">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+
+ <application name="Starcraft 2" application_name_match="SC2_x64.exe">
+ <option name="radv_disable_dcc" value="true" />
+ </application>
+ </device>
+</driconf>
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/layers/radv_metro_exodus.c
^
|
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2021 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "radv_private.h"
+
+VKAPI_ATTR VkResult VKAPI_CALL
+metro_exodus_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)
+{
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
+
+ /* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/5119. */
+ if (semaphore == NULL) {
+ fprintf(stderr, "RADV: Ignoring vkGetSemaphoreCounterValue() with NULL semaphore (game bug)!\n");
+ return VK_SUCCESS;
+ }
+
+ return radv_GetSemaphoreCounterValue(_device, _semaphore, pValue);
+}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/meson.build
^
|
@@ -25,12 +25,13 @@
command : [
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'radv',
- '--device-prefix', 'sqtt',
+ '--device-prefix', 'sqtt', '--device-prefix', 'metro_exodus',
],
depend_files : vk_entrypoints_gen_depend_files,
)
libradv_files = files(
+ 'layers/radv_metro_exodus.c',
'layers/radv_sqtt_layer.c',
'winsys/null/radv_null_bo.c',
'winsys/null/radv_null_bo.h',
@@ -86,6 +87,10 @@
'vk_format.h',
)
+files_drirc = files('00-radv-defaults.conf')
+
+install_data(files_drirc, install_dir : join_paths(get_option('datadir'), 'drirc.d'))
+
if not with_platform_windows
libradv_files += files(
'winsys/amdgpu/radv_amdgpu_bo.c',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_cmd_buffer.c
^
|
@@ -2968,7 +2968,7 @@
}
static void
-radv_emit_vertex_state(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
+radv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
{
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_shader_variant *vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX);
@@ -3059,8 +3059,8 @@
if (states & RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE)
radv_emit_color_write_enable(cmd_buffer);
- if (states & RADV_CMD_DIRTY_VERTEX_STATE)
- radv_emit_vertex_state(cmd_buffer, pipeline_is_dirty);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT)
+ radv_emit_vertex_input(cmd_buffer, pipeline_is_dirty);
cmd_buffer->state.dirty &= ~states;
}
@@ -4497,7 +4497,8 @@
return;
}
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_STATE;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
+ RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
}
static uint32_t
@@ -4564,7 +4565,6 @@
radv_set_descriptor_set(cmd_buffer, bind_point, set, idx);
assert(set);
- assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
if (!cmd_buffer->device->use_global_bo_list) {
for (unsigned j = 0; j < set->header.buffer_count; ++j)
@@ -4602,7 +4602,7 @@
radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
}
- for (unsigned j = 0; j < set->header.layout->dynamic_offset_count; ++j, ++dyn_idx) {
+ for (unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) {
unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
assert(dyn_idx < dynamicOffsetCount);
@@ -4628,7 +4628,7 @@
}
}
- cmd_buffer->push_constant_stages |= set->header.layout->dynamic_shader_stages;
+ cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages;
}
}
}
@@ -5466,14 +5466,7 @@
cmd_buffer->state.vbo_misaligned_mask = 0;
- state->attribute_mask = 0;
- state->misaligned_mask = 0;
- state->possibly_misaligned_mask = 0;
- state->instance_rate_inputs = 0;
- state->nontrivial_divisors = 0;
- state->post_shuffle = 0;
- state->alpha_adjust_lo = 0;
- state->alpha_adjust_hi = 0;
+ memset(state, 0, sizeof(*state));
enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class;
for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) {
@@ -5529,7 +5522,8 @@
state->post_shuffle |= 1u << loc;
}
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_STATE;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
+ RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
}
void
@@ -5560,6 +5554,11 @@
allow_ib2 = false;
}
+ if (secondary->queue_family_index == RADV_QUEUE_COMPUTE) {
+ /* IB2 packets are not supported on compute queues according to PAL. */
+ allow_ib2 = false;
+ }
+
primary->scratch_size_per_wave_needed =
MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed);
primary->scratch_waves_wanted =
@@ -6326,8 +6325,9 @@
/* Index, vertex and streamout buffers don't change context regs, and
* pipeline is already handled.
*/
- used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_STATE |
- RADV_CMD_DIRTY_STREAMOUT_BUFFER | RADV_CMD_DIRTY_PIPELINE);
+ used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER |
+ RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT | RADV_CMD_DIRTY_STREAMOUT_BUFFER |
+ RADV_CMD_DIRTY_PIPELINE);
if (cmd_buffer->state.dirty & used_states)
return true;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_descriptor_set.c
^
|
@@ -129,8 +129,8 @@
/* Store block of offsets first, followed by the conversion descriptors (padded to the struct
* alignment) */
size += num_bindings * sizeof(uint32_t);
- size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion));
- size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion);
+ size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion_state));
+ size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion_state);
}
set_layout =
@@ -145,7 +145,7 @@
/* We just allocate all the samplers at the end of the struct */
uint32_t *samplers = (uint32_t *)&set_layout->binding[num_bindings];
- struct radv_sampler_ycbcr_conversion *ycbcr_samplers = NULL;
+ struct radv_sampler_ycbcr_conversion_state *ycbcr_samplers = NULL;
uint32_t *ycbcr_sampler_offsets = NULL;
if (ycbcr_sampler_count > 0) {
@@ -155,8 +155,8 @@
uintptr_t first_ycbcr_sampler_offset =
(uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
first_ycbcr_sampler_offset =
- ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion));
- ycbcr_samplers = (struct radv_sampler_ycbcr_conversion *)first_ycbcr_sampler_offset;
+ ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion_state));
+ ycbcr_samplers = (struct radv_sampler_ycbcr_conversion_state *)first_ycbcr_sampler_offset;
} else
set_layout->ycbcr_sampler_offsets_offset = 0;
@@ -198,7 +198,7 @@
if (conversion) {
has_ycbcr_sampler = true;
max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors,
- vk_format_get_plane_count(conversion->format));
+ vk_format_get_plane_count(conversion->state.format));
}
}
}
@@ -311,7 +311,7 @@
for (uint32_t i = 0; i < binding->descriptorCount; i++) {
if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
ycbcr_samplers[i] =
- *radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
+ radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler->state;
else
ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
}
@@ -495,11 +495,16 @@
layout->set[set].layout = set_layout;
layout->set[set].dynamic_offset_start = dynamic_offset_count;
+ layout->set[set].dynamic_offset_count = 0;
+ layout->set[set].dynamic_offset_stages = 0;
for (uint32_t b = 0; b < set_layout->binding_count; b++) {
- dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
- dynamic_shader_stages |= set_layout->dynamic_shader_stages;
+ layout->set[set].dynamic_offset_count +=
+ set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
+ layout->set[set].dynamic_offset_stages |= set_layout->dynamic_shader_stages;
}
+ dynamic_offset_count += layout->set[set].dynamic_offset_count;
+ dynamic_shader_stages |= layout->set[set].dynamic_offset_stages;
/* Hash the entire set layout except for the vk_object_base. The
* rest of the set layout is carefully constructed to not have
@@ -1517,13 +1522,13 @@
vk_object_base_init(&device->vk, &conversion->base, VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION);
- conversion->format = pCreateInfo->format;
- conversion->ycbcr_model = pCreateInfo->ycbcrModel;
- conversion->ycbcr_range = pCreateInfo->ycbcrRange;
- conversion->components = pCreateInfo->components;
- conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
- conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
- conversion->chroma_filter = pCreateInfo->chromaFilter;
+ conversion->state.format = pCreateInfo->format;
+ conversion->state.ycbcr_model = pCreateInfo->ycbcrModel;
+ conversion->state.ycbcr_range = pCreateInfo->ycbcrRange;
+ conversion->state.components = pCreateInfo->components;
+ conversion->state.chroma_offsets[0] = pCreateInfo->xChromaOffset;
+ conversion->state.chroma_offsets[1] = pCreateInfo->yChromaOffset;
+ conversion->state.chroma_filter = pCreateInfo->chromaFilter;
*pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion);
return VK_SUCCESS;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_descriptor_set.h
^
|
@@ -89,7 +89,9 @@
struct {
struct radv_descriptor_set_layout *layout;
uint32_t size;
- uint32_t dynamic_offset_start;
+ uint16_t dynamic_offset_start;
+ uint16_t dynamic_offset_count;
+ VkShaderStageFlags dynamic_offset_stages;
} set[MAX_SETS];
uint32_t num_sets;
@@ -114,7 +116,7 @@
return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
}
-static inline const struct radv_sampler_ycbcr_conversion *
+static inline const struct radv_sampler_ycbcr_conversion_state *
radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set, unsigned binding_index)
{
if (!set->ycbcr_sampler_offsets_offset)
@@ -125,7 +127,7 @@
if (offsets[binding_index] == 0)
return NULL;
- return (const struct radv_sampler_ycbcr_conversion *)((const char *)set +
+ return (const struct radv_sampler_ycbcr_conversion_state *)((const char *)set +
offsets[binding_index]);
}
#endif /* RADV_DESCRIPTOR_SET_H */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_device.c
^
|
@@ -31,7 +31,11 @@
#ifdef __FreeBSD__
#include <sys/types.h>
-#elif !defined(_WIN32)
+#endif
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
#include <sys/sysmacros.h>
#endif
@@ -909,6 +913,7 @@
DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
DRI_CONF_RADV_DISABLE_DCC(false)
DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
+ DRI_CONF_RADV_DISABLE_HTILE_LAYERS(false)
DRI_CONF_SECTION_END
};
// clang-format on
@@ -951,6 +956,9 @@
instance->report_apu_as_dgpu =
driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");
+
+ instance->disable_htile_layers =
+ driQueryOptionb(&instance->dri_options, "radv_disable_htile_layers");
}
VkResult
@@ -1787,8 +1795,8 @@
.lineWidthGranularity = (1.0 / 8.0),
.strictLines = false, /* FINISHME */
.standardSampleLocations = true,
- .optimalBufferCopyOffsetAlignment = 128,
- .optimalBufferCopyRowPitchAlignment = 128,
+ .optimalBufferCopyOffsetAlignment = 1,
+ .optimalBufferCopyRowPitchAlignment = 1,
.nonCoherentAtomSize = 64,
};
@@ -2965,7 +2973,15 @@
struct vk_device_dispatch_table dispatch_table;
- if (radv_thread_trace_enabled()) {
+ if (physical_device->instance->vk.app_info.app_name &&
+ !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) {
+ /* Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it
+ * crashes sometimes. Workaround this game bug by enabling an internal layer. Remove this
+ * when the game is fixed.
+ */
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
+ } else if (radv_thread_trace_enabled()) {
vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
} else {
@@ -4683,7 +4699,7 @@
result =
radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,
- &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
+ &initial_flush_preamble_cs, &initial_preamble_cs, &continue_preamble_cs);
if (result != VK_SUCCESS)
goto fail;
@@ -4845,8 +4861,12 @@
points[syncobj_idx] = submission->wait_values[i];
++syncobj_idx;
}
- bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
- true, timeout);
+
+ bool success = true;
+ if (syncobj_idx > 0) {
+ success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
+ true, timeout);
+ }
free(points);
return success ? VK_SUCCESS : VK_TIMEOUT;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_image.c
^
|
@@ -352,6 +352,11 @@
bool use_htile_for_mips =
image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
+ /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
+ if (device->physical_device->rad_info.chip_class == GFX10 &&
+ image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
+ return false;
+
/* Do not enable HTILE for very small images because it seems less performant but make sure it's
* allowed with VRS attachments because we need HTILE.
*/
@@ -360,6 +365,9 @@
!device->attachment_vrs_enabled)
return false;
+ if (device->instance->disable_htile_layers && image->info.array_size > 1)
+ return false;
+
return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
^
|
@@ -33,7 +33,7 @@
nir_ssa_def *image_size;
nir_tex_instr *origin_tex;
nir_deref_instr *tex_deref;
- const struct radv_sampler_ycbcr_conversion *conversion;
+ const struct radv_sampler_ycbcr_conversion_state *conversion;
bool unnormalized_coordinates;
};
@@ -80,7 +80,7 @@
implicit_downsampled_coords(struct ycbcr_state *state, nir_ssa_def *old_coords)
{
nir_builder *b = state->builder;
- const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
+ const struct radv_sampler_ycbcr_conversion_state *conversion = state->conversion;
nir_ssa_def *image_size = NULL;
nir_ssa_def *comp[4] = {
NULL,
@@ -230,7 +230,7 @@
layout->set[var->data.descriptor_set].layout;
const struct radv_descriptor_set_binding_layout *binding =
&set_layout->binding[var->data.binding];
- const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
+ const struct radv_sampler_ycbcr_conversion_state *ycbcr_samplers =
radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
if (!ycbcr_samplers)
@@ -255,7 +255,7 @@
array_index = nir_src_as_uint(deref->arr.index);
array_index = MIN2(array_index, binding->array_size - 1);
}
- const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
+ const struct radv_sampler_ycbcr_conversion_state *ycbcr_sampler = ycbcr_samplers + array_index;
if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
return false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_pipeline.c
^
|
@@ -2711,6 +2711,7 @@
key.invariant_geom = true;
key.use_ngg = pipeline->device->physical_device->use_ngg;
+ key.adjust_frag_coord_z = pipeline->device->adjust_frag_coord_z;
return key;
}
@@ -4448,7 +4449,7 @@
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
- cull_dist_mask << 8 | clip_dist_mask);
+ total_mask << 8 | clip_dist_mask);
if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index);
@@ -4568,7 +4569,7 @@
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
- cull_dist_mask << 8 | clip_dist_mask);
+ total_mask << 8 | clip_dist_mask);
radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_private.h
^
|
@@ -320,6 +320,7 @@
bool disable_shrink_image_store;
bool absolute_depth_bias;
bool report_apu_as_dgpu;
+ bool disable_htile_layers;
};
VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -1046,7 +1047,6 @@
RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
- RADV_CMD_DIRTY_VERTEX_STATE = RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT,
};
enum radv_cmd_flush_bits {
@@ -2338,8 +2338,7 @@
VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
-struct radv_sampler_ycbcr_conversion {
- struct vk_object_base base;
+struct radv_sampler_ycbcr_conversion_state {
VkFormat format;
VkSamplerYcbcrModelConversion ycbcr_model;
VkSamplerYcbcrRange ycbcr_range;
@@ -2348,6 +2347,12 @@
VkFilter chroma_filter;
};
+struct radv_sampler_ycbcr_conversion {
+ struct vk_object_base base;
+ /* The state is hashed for the descriptor set layout. */
+ struct radv_sampler_ycbcr_conversion_state state;
+};
+
struct radv_buffer_view {
struct vk_object_base base;
struct radeon_winsys_bo *bo;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_query.c
^
|
@@ -806,8 +806,8 @@
radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline,
struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo,
uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride,
- uint32_t dst_stride, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask,
- uint32_t avail_offset)
+ uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags,
+ uint32_t pipeline_stats_mask, uint32_t avail_offset)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
@@ -833,7 +833,7 @@
cmd_buffer->state.predicating = false;
uint64_t src_buffer_size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset);
- uint64_t dst_buffer_size = count == 1 ? src_stride : dst_stride * count;
+ uint64_t dst_buffer_size = dst_stride * (count - 1) + dst_size;
radv_buffer_init(&src_buffer, device, src_bo, src_buffer_size, src_offset);
radv_buffer_init(&dst_buffer, device, dst_bo, dst_buffer_size, dst_offset);
@@ -1184,6 +1184,29 @@
}
}
+static size_t
+radv_query_result_size(const struct radv_query_pool *pool, VkQueryResultFlags flags)
+{
+ unsigned values = (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0;
+ switch (pool->type) {
+ case VK_QUERY_TYPE_TIMESTAMP:
+ case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
+ case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
+ case VK_QUERY_TYPE_OCCLUSION:
+ values += 1;
+ break;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ values += util_bitcount(pool->pipeline_stats_mask);
+ break;
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ values += 2;
+ break;
+ default:
+ unreachable("trying to get size of unhandled query type");
+ }
+ return values * ((flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4);
+}
+
void
radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
@@ -1195,6 +1218,7 @@
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
+ size_t dst_size = radv_query_result_size(pool, flags);
dest_va += dst_buffer->offset + dstOffset;
if (!queryCount)
@@ -1230,8 +1254,8 @@
}
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
- 0);
+ dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
+ flags, 0, 0);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@@ -1246,11 +1270,11 @@
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
}
}
- radv_query_shader(cmd_buffer,
- &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags,
- pool->pipeline_stats_mask, pool->availability_offset + 4 * firstQuery);
+ radv_query_shader(
+ cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
+ pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset,
+ pool->stride, stride, dst_size, queryCount, flags, pool->pipeline_stats_mask,
+ pool->availability_offset + 4 * firstQuery);
break;
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
@@ -1272,8 +1296,8 @@
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
- 0);
+ dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
+ flags, 0, 0);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@@ -1293,8 +1317,8 @@
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
- 0);
+ dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
+ flags, 0, 0);
break;
default:
unreachable("trying to get results of unhandled query type");
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.c
^
|
@@ -1762,7 +1762,7 @@
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
options->enable_mrt_output_nan_fixup =
module && !module->nir && options->key.ps.enable_mrt_output_nan_fixup;
- options->adjust_frag_coord_z = device->adjust_frag_coord_z;
+ options->adjust_frag_coord_z = options->key.adjust_frag_coord_z;
options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.h
^
|
@@ -60,6 +60,7 @@
uint32_t optimisations_disabled : 1;
uint32_t invariant_geom : 1;
uint32_t use_ngg : 1;
+ uint32_t adjust_frag_coord_z : 1;
struct {
uint32_t instance_rate_inputs;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
^
|
@@ -207,6 +207,20 @@
if (ws) {
simple_mtx_unlock(&winsys_creation_mutex);
amdgpu_device_deinitialize(dev);
+
+ /* Check that options don't differ from the existing winsys. */
+ if (((debug_flags & RADV_DEBUG_ALL_BOS) && !ws->debug_all_bos) ||
+ ((debug_flags & RADV_DEBUG_HANG) && !ws->debug_log_bos) ||
+ ((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) ||
+ (perftest_flags != ws->perftest)) {
+ fprintf(stderr, "amdgpu: Found options that differ from the existing winsys.\n");
+ return NULL;
+ }
+
+ /* RADV_DEBUG_ZERO_VRAM is the only option that is allowed to be set again. */
+ if (debug_flags & RADV_DEBUG_ZERO_VRAM)
+ ws->zero_all_vram_allocs = true;
+
return &ws->base;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/winsys/null/radv_null_bo.c
^
|
@@ -65,6 +65,13 @@
{
}
+static VkResult
+radv_null_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
+ bool resident)
+{
+ return VK_SUCCESS;
+}
+
static void
radv_null_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
{
@@ -80,4 +87,5 @@
ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
ws->base.buffer_map = radv_null_winsys_bo_map;
ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
+ ws->base.buffer_make_resident = radv_null_winsys_bo_make_resident;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/amd/vulkan/winsys/null/radv_null_winsys.c
^
|
@@ -135,6 +135,19 @@
info->has_image_load_dcc_bug =
info->family == CHIP_DIMGREY_CAVEFISH || info->family == CHIP_VANGOGH;
+
+ info->has_accelerated_dot_product =
+ info->family == CHIP_ARCTURUS || info->family == CHIP_ALDEBARAN ||
+ info->family == CHIP_VEGA20 || info->family >= CHIP_NAVI12;
+
+ info->address32_hi = info->chip_class >= GFX9 ? 0xffff8000u : 0x0;
+
+ info->has_rbplus = info->family == CHIP_STONEY || info->chip_class >= GFX9;
+ info->rbplus_allowed =
+ info->has_rbplus &&
+ (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
+ info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->chip_class >= GFX10_3);
+
}
static void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/broadcom/compiler/qpu_schedule.c
^
|
@@ -492,7 +492,8 @@
int last_thrsw_tick;
int last_branch_tick;
int last_setmsf_tick;
- bool tlb_locked;
+ bool first_thrsw_emitted;
+ bool last_thrsw_emitted;
bool fixup_ldvary;
int ldvary_count;
};
@@ -576,10 +577,26 @@
}
static bool
-pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard,
+scoreboard_is_locked(struct choose_scoreboard *scoreboard,
+ bool lock_scoreboard_on_first_thrsw)
+{
+ if (lock_scoreboard_on_first_thrsw) {
+ return scoreboard->first_thrsw_emitted &&
+ scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
+ }
+
+ return scoreboard->last_thrsw_emitted &&
+ scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
+}
+
+static bool
+pixel_scoreboard_too_soon(struct v3d_compile *c,
+ struct choose_scoreboard *scoreboard,
const struct v3d_qpu_instr *inst)
{
- return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
+ return qpu_inst_is_tlb(inst) &&
+ !scoreboard_is_locked(scoreboard,
+ c->lock_scoreboard_on_first_thrsw);
}
static bool
@@ -1053,12 +1070,12 @@
if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
continue;
- /* "A scoreboard wait must not occur in the first two
- * instructions of a fragment shader. This is either the
- * explicit Wait for Scoreboard signal or an implicit wait
- * with the first tile-buffer read or write instruction."
+ /* "Before doing a TLB access a scoreboard wait must have been
+ * done. This happens either on the first or last thread
+ * switch, depending on a setting (scb_wait_on_first_thrsw) in
+ * the shader state."
*/
- if (pixel_scoreboard_too_soon(scoreboard, inst))
+ if (pixel_scoreboard_too_soon(c, scoreboard, inst))
continue;
/* ldunif and ldvary both write r5, but ldunif does so a tick
@@ -1131,12 +1148,10 @@
continue;
}
- /* Don't merge in something that will lock the TLB.
- * Hopwefully what we have in inst will release some
- * other instructions, allowing us to delay the
- * TLB-locking instruction until later.
+ /* Don't merge TLB instructions before we have acquired
+ * the scoreboard lock.
*/
- if (!scoreboard->tlb_locked && qpu_inst_is_tlb(inst))
+ if (pixel_scoreboard_too_soon(c, scoreboard, inst))
continue;
/* When we succesfully pair up an ldvary we then try
@@ -1273,9 +1288,6 @@
if (inst->sig.ldvary)
scoreboard->last_ldvary_tick = scoreboard->tick;
-
- if (qpu_inst_is_tlb(inst))
- scoreboard->tlb_locked = true;
}
static void
@@ -1490,6 +1502,11 @@
return false;
}
+ if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
+ !inst->sig_magic) {
+ return false;
+ }
+
if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
return false;
@@ -1747,6 +1764,8 @@
merge_inst = inst;
}
+ scoreboard->first_thrsw_emitted = true;
+
/* If we're emitting the last THRSW (other than program end), then
* signal that to the HW by emitting two THRSWs in a row.
*/
@@ -1758,6 +1777,7 @@
struct qinst *second_inst =
(struct qinst *)merge_inst->link.next;
second_inst->qpu.sig.thrsw = true;
+ scoreboard->last_thrsw_emitted = true;
}
/* Make sure the thread end executes within the program lifespan */
@@ -1981,6 +2001,17 @@
if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
return false;
+ /* The implicit ldvary destination may not be written to by a signal
+ * in the instruction following ldvary. Since we are planning to move
+ * ldvary to the previous instruction, this means we need to check if
+ * the current instruction has any other signal that could create this
+ * conflict. The only other signal that can write to the implicit
+ * ldvary destination that is compatible with ldvary in the same
+ * instruction is ldunif.
+ */
+ if (inst->sig.ldunif)
+ return false;
+
/* The previous instruction can't write to the same destination as the
* ldvary.
*/
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/broadcom/vulkan/v3dv_device.c
^
|
@@ -75,23 +75,25 @@
return VK_SUCCESS;
}
-#define V3DV_HAS_SURFACE (VK_USE_PLATFORM_WIN32_KHR || \
- VK_USE_PLATFORM_WAYLAND_KHR || \
- VK_USE_PLATFORM_XCB_KHR || \
- VK_USE_PLATFORM_XLIB_KHR || \
- VK_USE_PLATFORM_DISPLAY_KHR)
+#if defined(VK_USE_PLATFORM_WIN32_KHR) || \
+ defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
+ defined(VK_USE_PLATFORM_XCB_KHR) || \
+ defined(VK_USE_PLATFORM_XLIB_KHR) || \
+ defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#define V3DV_USE_WSI_PLATFORM
+#endif
static const struct vk_instance_extension_table instance_extensions = {
.KHR_device_group_creation = true,
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
.KHR_display = true,
+ .KHR_get_display_properties2 = true,
#endif
.KHR_external_fence_capabilities = true,
.KHR_external_memory_capabilities = true,
.KHR_external_semaphore_capabilities = true,
- .KHR_get_display_properties2 = true,
.KHR_get_physical_device_properties2 = true,
-#ifdef V3DV_HAS_SURFACE
+#ifdef V3DV_USE_WSI_PLATFORM
.KHR_get_surface_capabilities2 = true,
.KHR_surface = true,
.KHR_surface_protected_capabilities = true,
@@ -135,7 +137,7 @@
.KHR_sampler_mirror_clamp_to_edge = true,
.KHR_storage_buffer_storage_class = true,
.KHR_uniform_buffer_standard_layout = true,
-#ifdef V3DV_HAS_SURFACE
+#ifdef V3DV_USE_WSI_PLATFORM
.KHR_swapchain = true,
.KHR_incremental_present = true,
#endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glcpp/glcpp-parse.y
^
|
@@ -1057,6 +1057,7 @@
token = linear_alloc_child(parser->linalloc, sizeof(token_t));
token->type = type;
token->value.str = str;
+ token->expanding = false;
return token;
}
@@ -1069,6 +1070,7 @@
token = linear_alloc_child(parser->linalloc, sizeof(token_t));
token->type = type;
token->value.ival = ival;
+ token->expanding = false;
return token;
}
@@ -1958,6 +1960,10 @@
struct hash_entry *entry;
macro_t *macro;
+ /* If token is already being expanded return to avoid an infinite loop */
+ if (token->expanding)
+ return NULL;
+
/* We only expand identifiers */
if (token->type != IDENTIFIER) {
return NULL;
@@ -1988,14 +1994,15 @@
/* Finally, don't expand this macro if we're already actively
* expanding it, (to avoid infinite recursion). */
if (_parser_active_list_contains (parser, identifier)) {
- /* We change the token type here from IDENTIFIER to OTHER to prevent any
+ /* We change the `expanding` bool to true to prevent any
* future expansion of this unexpanded token. */
char *str;
token_list_t *expansion;
token_t *final;
str = linear_strdup(parser->linalloc, token->value.str);
- final = _token_create_str(parser, OTHER, str);
+ final = _token_create_str(parser, token->type, str);
+ final->expanding = true;
expansion = _token_list_create(parser);
_token_list_append(parser, expansion, final);
return expansion;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glcpp/glcpp.h
^
|
@@ -103,6 +103,7 @@
} while (0)
struct token {
+ bool expanding;
int type;
YYSTYPE value;
YYLTYPE location;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glsl_parser.yy
^
|
@@ -2047,9 +2047,9 @@
* output from one shader stage will still match an input of a subsequent
* stage without the input being declared as invariant."
*
- * On the desktop side, this text first appears in GLSL 4.30.
+ * On the desktop side, this text first appears in GLSL 4.20.
*/
- if (state->is_version(430, 300) && $$.flags.q.in)
+ if (state->is_version(420, 300) && $$.flags.q.in)
_mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs");
}
| interpolation_qualifier type_qualifier
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/glsl_to_nir.cpp
^
|
@@ -39,6 +39,7 @@
#include "main/errors.h"
#include "main/mtypes.h"
#include "main/shaderobj.h"
+#include "main/context.h"
#include "util/u_math.h"
/*
@@ -2616,6 +2617,13 @@
glsl_float64_funcs_to_nir(struct gl_context *ctx,
const nir_shader_compiler_options *options)
{
+ /* It's not possible to use float64 on GLSL ES, so don't bother trying to
+ * build the support code. The support code depends on higher versions of
+ * desktop GLSL, so it will fail to compile (below) anyway.
+ */
+ if (!_mesa_is_desktop_gl(ctx) || ctx->Const.GLSLVersion < 400)
+ return NULL;
+
/* We pretend it's a vertex shader. Ultimately, the stage shouldn't
* matter because we're not optimizing anything here.
*/
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/glsl/link_varyings.cpp
^
|
@@ -319,13 +319,13 @@
return;
}
- /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
+ /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
*
* "As only outputs need be declared with invariant, an output from
* one shader stage will still match an input of a subsequent stage
* without the input being declared as invariant."
*
- * while GLSL 4.20 says:
+ * while GLSL 4.10 says:
*
* "For variables leaving one shader and coming into another shader,
* the invariant keyword has to be used in both shaders, or a link
@@ -337,7 +337,7 @@
* and fragment shaders must match."
*/
if (input->data.explicit_invariant != output->data.explicit_invariant &&
- prog->data->Version < (prog->IsES ? 300 : 430)) {
+ prog->data->Version < (prog->IsES ? 300 : 420)) {
linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/meson.build
^
|
@@ -20,6 +20,8 @@
nir_depends = files('nir_opcodes.py', 'nir_intrinsics.py')
+nir_algebraic_depends = files('nir_opcodes.py', 'nir_algebraic.py')
+
nir_builder_opcodes_h = custom_target(
'nir_builder_opcodes.h',
input : 'nir_builder_opcodes_h.py',
@@ -62,7 +64,7 @@
output : 'nir_opt_algebraic.c',
command : [prog_python, '@INPUT@'],
capture : true,
- depend_files : files('nir_algebraic.py'),
+ depend_files : nir_algebraic_depends,
)
nir_intrinsics_h = custom_target(
@@ -366,8 +368,6 @@
link_with : _libnir,
)
-nir_algebraic_py = files('nir_algebraic.py')
-
if with_tests
test(
'nir_builder',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir.c
^
|
@@ -154,6 +154,7 @@
reg->num_components = 0;
reg->bit_size = 32;
reg->num_array_elems = 0;
+ reg->divergent = false;
exec_list_push_tail(list, ®->node);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir.h
^
|
@@ -5532,6 +5532,7 @@
bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
+bool nir_copy_prop_impl(nir_function_impl *impl);
bool nir_copy_prop(nir_shader *shader);
bool nir_opt_copy_prop_vars(nir_shader *shader);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_builtin_builder.c
^
|
@@ -223,7 +223,28 @@
tmp);
/* sign fixup */
- return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
+ nir_ssa_def *result = nir_fmul(b, tmp, nir_fsign(b, y_over_x));
+
+ /* The fmin and fmax above will filter out NaN values. This leads to
+ * non-NaN results for NaN inputs. Work around this by doing
+ *
+ * !isnan(y_over_x) ? ... : y_over_x;
+ */
+ if (b->exact ||
+ nir_is_float_control_signed_zero_inf_nan_preserve(b->shader->info.float_controls_execution_mode, bit_size)) {
+ const bool exact = b->exact;
+
+ b->exact = true;
+ nir_ssa_def *is_not_nan = nir_feq(b, y_over_x, y_over_x);
+ b->exact = exact;
+
+ /* The extra 1.0*y_over_x ensures that subnormal results are flushed to
+ * zero.
+ */
+ result = nir_bcsel(b, is_not_nan, result, nir_fmul_imm(b, y_over_x, 1.0));
+ }
+
+ return result;
}
nir_ssa_def *
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_deref.c
^
|
@@ -1052,6 +1052,10 @@
if (!glsl_type_is_struct(parent->type))
return false;
+ /* Empty struct */
+ if (glsl_get_length(parent->type) < 1)
+ return false;
+
if (glsl_get_struct_field_offset(parent->type, 0) != 0)
return false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_from_ssa.c
^
|
@@ -935,9 +935,10 @@
static void
place_phi_read(nir_builder *b, nir_register *reg,
- nir_ssa_def *def, nir_block *block, unsigned depth)
+ nir_ssa_def *def, nir_block *block, struct set *visited_blocks)
{
- if (block != def->parent_instr->block) {
+ /* Search already visited blocks to avoid back edges in tree */
+ if (_mesa_set_search(visited_blocks, block) == NULL) {
/* Try to go up the single-successor tree */
bool all_single_successors = true;
set_foreach(block->predecessors, entry) {
@@ -948,22 +949,16 @@
}
}
- if (all_single_successors && depth < 32) {
+ if (all_single_successors) {
/* All predecessors of this block have exactly one successor and it
* is this block so they must eventually lead here without
* intersecting each other. Place the reads in the predecessors
* instead of this block.
- *
- * We only let this function recurse 32 times because it can recurse
- * indefinitely in the presence of infinite loops. Because we're
- * crawling a single-successor chain, it doesn't matter where we
- * place it so it's ok to stop at an arbitrary distance.
- *
- * TODO: One day, we could detect back edges and avoid the recursion
- * that way.
*/
+ _mesa_set_add(visited_blocks, block);
+
set_foreach(block->predecessors, entry) {
- place_phi_read(b, reg, def, (nir_block *)entry->key, depth + 1);
+ place_phi_read(b, reg, def, (nir_block *)entry->key, visited_blocks);
}
return;
}
@@ -992,6 +987,8 @@
{
nir_builder b;
nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node));
+ struct set *visited_blocks = _mesa_set_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
bool progress = false;
nir_foreach_instr_safe(instr, block) {
@@ -1010,7 +1007,9 @@
nir_foreach_phi_src(src, phi) {
assert(src->src.is_ssa);
- place_phi_read(&b, reg, src->src.ssa, src->pred, 0);
+ _mesa_set_add(visited_blocks, src->src.ssa->parent_instr->block);
+ place_phi_read(&b, reg, src->src.ssa, src->pred, visited_blocks);
+ _mesa_set_clear(visited_blocks, NULL);
}
nir_instr_remove(&phi->instr);
@@ -1018,6 +1017,8 @@
progress = true;
}
+ _mesa_set_destroy(visited_blocks, NULL);
+
return progress;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_instr_set.c
^
|
@@ -272,6 +272,7 @@
hash = HASH(hash, instr->is_array);
hash = HASH(hash, instr->is_shadow);
hash = HASH(hash, instr->is_new_style_shadow);
+ hash = HASH(hash, instr->is_sparse);
unsigned component = instr->component;
hash = HASH(hash, component);
for (unsigned i = 0; i < 4; ++i)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_frexp.c
^
|
@@ -35,7 +35,6 @@
nir_ssa_def *abs_x = nir_fabs(b, x);
nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
nir_ssa_def *sign_mantissa_mask, *exponent_value;
- nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);
switch (x->bit_size) {
case 16:
@@ -89,18 +88,31 @@
* 32 bits using nir_unpack_64_2x32_split_y.
*/
nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
- nir_ssa_def *zero32 = nir_imm_int(b, 0);
+ /* If x is ±0, ±Inf, or NaN, return x unmodified. */
nir_ssa_def *new_upper =
- nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
- nir_bcsel(b, is_not_zero, exponent_value, zero32));
+ nir_bcsel(b,
+ nir_iand(b,
+ nir_flt(b, zero, abs_x),
+ nir_fisfinite(b, x)),
+ nir_ior(b,
+ nir_iand(b, upper_x, sign_mantissa_mask),
+ exponent_value),
+ upper_x);
nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);
return nir_pack_64_2x32_split(b, lower_x, new_upper);
} else {
- return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
- nir_bcsel(b, is_not_zero, exponent_value, zero));
+ /* If x is ±0, ±Inf, or NaN, return x unmodified. */
+ return nir_bcsel(b,
+ nir_iand(b,
+ nir_flt(b, zero, abs_x),
+ nir_fisfinite(b, x)),
+ nir_ior(b,
+ nir_iand(b, x, sign_mantissa_mask),
+ exponent_value),
+ x);
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_io.c
^
|
@@ -306,6 +306,9 @@
nir_intrinsic_set_range(load,
state->type_size(var->type, var->data.bindless));
+ if (nir_intrinsic_has_access(load))
+ nir_intrinsic_set_access(load, var->data.access);
+
nir_intrinsic_set_dest_type(load, dest_type);
if (load->intrinsic != nir_intrinsic_load_uniform) {
@@ -412,6 +415,9 @@
nir_intrinsic_set_write_mask(store, write_mask);
+ if (nir_intrinsic_has_access(store))
+ nir_intrinsic_set_access(store, var->data.access);
+
if (array_index)
store->src[1] = nir_src_for_ssa(array_index);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_memcpy.c
^
|
@@ -111,11 +111,14 @@
uint64_t size = nir_src_as_uint(cpy->src[2]);
uint64_t offset = 0;
while (offset < size) {
- uint64_t remaining = offset - size;
- /* For our chunk size, we choose the largest power-of-two that
- * divides size with a maximum of 16B (a vec4).
+ uint64_t remaining = size - offset;
+ /* Find the largest chunk size power-of-two (MSB in remaining)
+ * and limit our chunk to 16B (a vec4). It's important to do as
+ * many 16B chunks as possible first so that the index
+ * computation is correct for
+ * memcpy_(load|store)_deref_elem_imm.
*/
- unsigned copy_size = 1u << MIN2(ffsll(remaining) - 1, 4);
+ unsigned copy_size = 1u << MIN2(util_last_bit64(remaining) - 1, 4);
const struct glsl_type *copy_type =
copy_type_for_byte_size(copy_size);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opcodes.py
^
|
@@ -830,10 +830,10 @@
# These comparisons for integer-less hardware return 1.0 and 0.0 for true
# and false respectively
-binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
-binop("seq", tfloat32, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
-binop("sne", tfloat32, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
# SPIRV shifts are undefined for shift-operands >= bitsize,
# but SM5 shifts are defined to use only the least significant bits.
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_algebraic.py
^
|
@@ -445,8 +445,6 @@
# (a + #b) * #c => (a * #c) + (#b * #c)
(('imul', ('iadd(is_used_once)', a, '#b'), '#c'), ('iadd', ('imul', a, c), ('imul', b, c))),
- (('~fmul', ('fadd(is_used_once)', a, '#b'), '#c'), ('fadd', ('fmul', a, c), ('fmul', b, c)),
- '!options->avoid_ternary_with_two_constants'),
# ((a + #b) + c) * #d => ((a + c) * #d) + (#b * #d)
(('imul', ('iadd(is_used_once)', ('iadd(is_used_once)', a, '#b'), c), '#d'),
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_copy_propagate.c
^
|
@@ -162,7 +162,7 @@
return progress;
}
-static bool
+bool
nir_copy_prop_impl(nir_function_impl *impl)
{
bool progress = false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_dce.c
^
|
@@ -166,18 +166,21 @@
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(cf_node);
+ struct loop_state inner_state;
+ inner_state.preheader = nir_cf_node_as_block(nir_cf_node_prev(cf_node));
+ inner_state.header_phis_changed = false;
+
/* Fast path if the loop has no continues: we can remove instructions
* as we mark the others live.
*/
- if (nir_loop_first_block(loop)->predecessors->entries == 1) {
+ struct set *predecessors = nir_loop_first_block(loop)->predecessors;
+ if (predecessors->entries == 1 &&
+ _mesa_set_next_entry(predecessors, NULL)->key == inner_state.preheader) {
progress |= dce_cf_list(&loop->body, defs_live, parent_loop);
break;
}
/* Mark instructions as live until there is no more progress. */
- struct loop_state inner_state;
- inner_state.preheader = nir_cf_node_as_block(nir_cf_node_prev(cf_node));
- inner_state.header_phis_changed = false;
do {
/* dce_cf_list() resets inner_state.header_phis_changed itself, so
* it doesn't have to be done here.
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/nir/nir_range_analysis.c
^
|
@@ -1465,7 +1465,6 @@
case nir_op_b32csel:
case nir_op_ubfe:
case nir_op_bfm:
- case nir_op_f2u32:
case nir_op_fmul:
case nir_op_extract_u8:
case nir_op_extract_i8:
@@ -1476,6 +1475,7 @@
case nir_op_u2u8:
case nir_op_u2u16:
case nir_op_u2u32:
+ case nir_op_f2u32:
if (nir_ssa_scalar_chase_alu_src(scalar, 0).def->bit_size > 32) {
/* If src is >32 bits, return max */
return max;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/spirv/vtn_cfg.c
^
|
@@ -1387,6 +1387,8 @@
vtn_foreach_instruction(b, func->start_block->label, func->end,
vtn_handle_phi_second_pass);
+ if (func->nir_func->impl->structured)
+ nir_copy_prop_impl(impl);
nir_rematerialize_derefs_in_use_blocks_impl(impl);
/*
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/compiler/spirv/vtn_glsl450.c
^
|
@@ -332,9 +332,22 @@
break;
case GLSLstd450Modf: {
+ nir_ssa_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size);
+ nir_ssa_def *sign_bit =
+ nir_imm_intN_t(&b->nb, (uint64_t)1 << (src[0]->bit_size - 1),
+ src[0]->bit_size);
nir_ssa_def *sign = nir_fsign(nb, src[0]);
nir_ssa_def *abs = nir_fabs(nb, src[0]);
- dest->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
+
+ /* NaN input should produce a NaN results, and ±Inf input should provide
+ * ±0 result. The fmul(sign(x), ffract(x)) calculation will already
+ * produce the expected NaN. To get ±0, directly compare for equality
+ * with Inf instead of using fisfinite (which is false for NaN).
+ */
+ dest->def = nir_bcsel(nb,
+ nir_ieq(nb, abs, inf),
+ nir_iand(nb, src[0], sign_bit),
+ nir_fmul(nb, sign, nir_ffract(nb, abs)));
struct vtn_pointer *i_ptr = vtn_value(b, w[6], vtn_value_type_pointer)->pointer;
struct vtn_ssa_value *whole = vtn_create_ssa_value(b, i_ptr->type->type);
@@ -344,17 +357,45 @@
}
case GLSLstd450ModfStruct: {
+ nir_ssa_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size);
+ nir_ssa_def *sign_bit =
+ nir_imm_intN_t(&b->nb, (uint64_t)1 << (src[0]->bit_size - 1),
+ src[0]->bit_size);
nir_ssa_def *sign = nir_fsign(nb, src[0]);
nir_ssa_def *abs = nir_fabs(nb, src[0]);
vtn_assert(glsl_type_is_struct_or_ifc(dest_type));
- dest->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
+
+ /* See GLSLstd450Modf for explanation of the Inf and NaN handling. */
+ dest->elems[0]->def = nir_bcsel(nb,
+ nir_ieq(nb, abs, inf),
+ nir_iand(nb, src[0], sign_bit),
+ nir_fmul(nb, sign, nir_ffract(nb, abs)));
dest->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
break;
}
- case GLSLstd450Step:
- dest->def = nir_sge(nb, src[1], src[0]);
+ case GLSLstd450Step: {
+ /* The SPIR-V Extended Instructions for GLSL spec says:
+ *
+ * Result is 0.0 if x < edge; otherwise result is 1.0.
+ *
+ * Here src[1] is x, and src[0] is edge. The direct implementation is
+ *
+ * bcsel(src[1] < src[0], 0.0, 1.0)
+ *
+ * This is effectively b2f(!(src1 < src0)). Previously this was
+ * implemented using sge(src1, src0), but that produces incorrect
+ * results for NaN. Instead, we use the identity b2f(!x) = 1 - b2f(x).
+ */
+ const bool exact = nb->exact;
+ nb->exact = true;
+
+ nir_ssa_def *cmp = nir_slt(nb, src[1], src[0]);
+
+ nb->exact = exact;
+ dest->def = nir_fsub(nb, nir_imm_floatN_t(nb, 1.0f, cmp->bit_size), cmp);
break;
+ }
case GLSLstd450Length:
dest->def = nir_fast_length(nb, src[0]);
@@ -479,11 +520,35 @@
nir_ssa_def *x = nir_fclamp(nb, src[0],
nir_imm_floatN_t(nb, -clamped_x, bit_size),
nir_imm_floatN_t(nb, clamped_x, bit_size));
- dest->def =
- nir_fdiv(nb, nir_fsub(nb, nir_fexp(nb, x),
- nir_fexp(nb, nir_fneg(nb, x))),
- nir_fadd(nb, nir_fexp(nb, x),
- nir_fexp(nb, nir_fneg(nb, x))));
+
+ /* The clamping will filter out NaN values causing an incorrect result.
+ * The comparison is carefully structured to get NaN result for NaN and
+ * get -0 for -0.
+ *
+ * result = abs(s) > 0.0 ? ... : s;
+ */
+ const bool exact = nb->exact;
+
+ nb->exact = true;
+ nir_ssa_def *is_regular = nir_flt(nb,
+ nir_imm_floatN_t(nb, 0, bit_size),
+ nir_fabs(nb, src[0]));
+
+ /* The extra 1.0*s ensures that subnormal inputs are flushed to zero
+ * when that is selected by the shader.
+ */
+ nir_ssa_def *flushed = nir_fmul(nb,
+ src[0],
+ nir_imm_floatN_t(nb, 1.0, bit_size));
+ nb->exact = exact;
+
+ dest->def = nir_bcsel(nb,
+ is_regular,
+ nir_fdiv(nb, nir_fsub(nb, nir_fexp(nb, x),
+ nir_fexp(nb, nir_fneg(nb, x))),
+ nir_fadd(nb, nir_fexp(nb, x),
+ nir_fexp(nb, nir_fneg(nb, x)))),
+ flushed);
break;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/egl/main/egldefines.h
^
|
@@ -46,7 +46,11 @@
#define _EGL_MAX_PBUFFER_WIDTH 4096
#define _EGL_MAX_PBUFFER_HEIGHT 4096
+#ifdef AMBER
+#define _EGL_VENDOR_STRING "Mesa Project (Amber)"
+#else
#define _EGL_VENDOR_STRING "Mesa Project"
+#endif
#ifdef __cplusplus
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/egl/meson.build
^
|
@@ -165,7 +165,7 @@
files_egl += [g_egldispatchstubs_h, g_egldispatchstubs_c]
files_egl += files('main/eglglvnd.c', 'main/egldispatchstubs.c')
glvnd_config = configuration_data()
- glvnd_config.set('glvnd_vendor_name', get_option('glvnd-vendor-name'))
+ glvnd_config.set('glvnd_vendor_name', glvnd_vendor_name)
configure_file(
configuration: glvnd_config,
input : 'main/50_mesa.json',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/etnaviv/drm/etnaviv_bo.c
^
|
@@ -86,7 +86,11 @@
bo = etna_bo_ref(entry->data);
/* don't break the bucket if this bo was found in one */
- list_delinit(&bo->list);
+ if (list_is_linked(&bo->list)) {
+ VG_BO_OBTAIN(bo);
+ etna_device_ref(bo->dev);
+ list_delinit(&bo->list);
+ }
}
return bo;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c
^
|
@@ -56,13 +56,13 @@
void *buffer;
/*
- * Increase the command buffer size by 1 kiB. Here we pick 1 kiB
+ * Increase the command buffer size by 4 kiB. Here we pick 4 kiB
* increment to prevent it from growing too much too quickly.
*/
size = ALIGN(stream->size + n, 1024);
/* Command buffer is too big for older kernel versions */
- if (size >= 32768)
+ if (size > 0x4000)
goto error;
buffer = realloc(stream->buffer, size * 4);
@@ -75,7 +75,7 @@
return;
error:
- WARN_MSG("command buffer too long, forcing flush.");
+ DEBUG_MSG("command buffer too long, forcing flush.");
etna_cmd_stream_force_flush(stream);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/afuc/meson.build
^
|
@@ -18,6 +18,10 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+if with_tests
+ diff = find_program('diff')
+endif
+
afuc_parser = custom_target(
'parser.[ch]',
input: 'parser.y',
@@ -55,50 +59,7 @@
build_by_default : with_tools.contains('freedreno'),
install: install_fd_decode_tools,
)
-
-disasm = executable(
- 'afuc-disasm',
- [
- 'disasm.c',
- 'emu.c',
- 'emu.h',
- 'emu-ds.c',
- 'emu-regs.c',
- 'emu-ui.c',
- 'util.c',
- 'util.h',
- ],
- include_directories: [
- inc_freedreno,
- inc_freedreno_rnn,
- inc_include,
- inc_src,
- inc_util,
- ],
- link_with: [
- libfreedreno_rnn,
- ],
- dependencies: [
- ],
- build_by_default : with_tools.contains('freedreno'),
- install: install_fd_decode_tools,
-)
-
if with_tests
- diff = find_program('diff')
-
- disasm_fw = custom_target('afuc_test.asm',
- output: 'afuc_test.asm',
- command: [disasm, '-u', files('../.gitlab-ci/reference/afuc_test.fw'), '-g', '630'],
- capture: true
- )
- test('afuc-disasm',
- diff,
- args: ['-u', files('../.gitlab-ci/reference/afuc_test.asm'), disasm_fw],
- suite: 'freedreno',
- workdir: meson.source_root()
- )
-
asm_fw = custom_target('afuc_test.fw',
output: 'afuc_test.fw',
command: [asm, '-g', '6', files('../.gitlab-ci/traces/afuc_test.asm'), '@OUTPUT@'],
@@ -110,3 +71,48 @@
workdir: meson.source_root()
)
endif
+
+# Disasm requires mmaping >4GB
+if cc.sizeof('size_t') > 4
+ disasm = executable(
+ 'afuc-disasm',
+ [
+ 'disasm.c',
+ 'emu.c',
+ 'emu.h',
+ 'emu-ds.c',
+ 'emu-regs.c',
+ 'emu-ui.c',
+ 'util.c',
+ 'util.h',
+ ],
+ include_directories: [
+ inc_freedreno,
+ inc_freedreno_rnn,
+ inc_include,
+ inc_src,
+ inc_util,
+ ],
+ link_with: [
+ libfreedreno_rnn,
+ ],
+ dependencies: [
+ ],
+ build_by_default : with_tools.contains('freedreno'),
+ install: install_fd_decode_tools,
+ )
+
+ if with_tests
+ disasm_fw = custom_target('afuc_test.asm',
+ output: 'afuc_test.asm',
+ command: [disasm, '-u', files('../.gitlab-ci/reference/afuc_test.fw'), '-g', '630'],
+ capture: true
+ )
+ test('afuc-disasm',
+ diff,
+ args: ['-u', files('../.gitlab-ci/reference/afuc_test.asm'), disasm_fw],
+ suite: 'freedreno',
+ workdir: meson.source_root()
+ )
+ endif
+endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a630-fails.txt
^
|
@@ -262,8 +262,6 @@
spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8I- swizzled- border color only,Fail
spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8UI- swizzled- border color only,Fail
-spec@arb_texture_view@rendering-layers-image,Fail
-spec@arb_texture_view@rendering-layers-image@layers rendering of image1DArray,Fail
spec@arb_timer_query@timestamp-get,Fail
spec@arb_transform_feedback3@arb_transform_feedback3-ext_interleaved_two_bufs_vs,Fail
spec@arb_transform_feedback3@gl_skipcomponents1-1,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ci/traces-freedreno.yml
^
|
@@ -45,9 +45,9 @@
# checksum: 4b707f385256b380c936186db8c251cb
# 1 minute
- device: freedreno-a530
- checksum: 130dbeac42683b46fed4b268c5aad984
+ checksum: a71d62bb2c0fabeca41468628777b441
- device: freedreno-a630
- checksum: 139861e52f9425b4adb7c0b90b885f91
+ checksum: 339dce29ae08569652438116829510c7
- path: xonotic/xonotic-keybench-high.trace
expectations:
# Skipped since it's long on a530.
@@ -327,9 +327,9 @@
#- device: freedreno-a306
# checksum: 0c57ccc3989b75a940b28ea1cc09cb0d
- device: freedreno-a530
- checksum: 4715d72a7958f2fd5a387c16b3a01579
+ checksum: bc19f0f58935fdb348f401396e6845e1
- device: freedreno-a630
- checksum: 1e397c5c34c9c50350a8db1a060a6bbb
+ checksum: f546f840e916ab0f11f8df0e4eee584d
- path: glmark2/shading:shading=blinn-phong-inf.trace
expectations:
- device: freedreno-a306
@@ -422,7 +422,7 @@
- path: gputest/gimark.trace
expectations:
- device: freedreno-a630
- checksum: dd8fb768033d09f6edc98b4cfff02c6f
+ checksum: e58167bd8eeb8952facbc00ff0449135
- path: gputest/pixmark-julia-fp32.trace
expectations:
- device: freedreno-a630
@@ -452,11 +452,11 @@
- path: gputest/plot3d.trace
expectations:
- device: freedreno-a306
- checksum: 302943895dbdd7730958fb0175f23b7f
+ checksum: f6ecd9b8afc692b0cdb459b9b30db8d4
- device: freedreno-a530
- checksum: 755aa5b521237ddf9fea3181d2ba2b75
+ checksum: 4faafe5fab0d8ec6d7b549c94f663c92
- device: freedreno-a630
- checksum: 302aec1ced68e22182460b617b0f2aef
+ checksum: 0a6a16c394a413f02ec2ebcc3251e366
# Note: Requires GL4 for tess.
- path: gputest/tessmark.trace
expectations:
@@ -473,9 +473,9 @@
- path: humus/AmbientAperture.trace
expectations:
- device: freedreno-a306
- checksum: 3d9243cbd0659cb58b16cade2be3f2c2
+ checksum: 8d4c52f0af9c09710d358f24c73fae3c
- device: freedreno-a530
- checksum: c55c1ba5683306980956b5f89563f343
+ checksum: aab5c853e383e1cda56663d65f6925ad
- device: freedreno-a630
checksum: 83fd7bce0fc1e1f30bd143b7d30ca890
- path: humus/CelShading.trace
@@ -536,7 +536,7 @@
expectations:
# a306/a630 would need higher GL version to run
- device: freedreno-a630
- checksum: e93cf9682c9ca5ed6a6effe5b7fdd386
+ checksum: 0e32ca8fc815a7250f38a07faeafb21b
- path: pathfinder/canvas_text_v2.trace
expectations:
# a306/a630 would need higher GL version to run
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3.c
^
|
@@ -402,9 +402,9 @@
{
for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
if (block->physical_predecessors[i] == pred) {
- if (i < block->predecessors_count - 1) {
+ if (i < block->physical_predecessors_count - 1) {
block->physical_predecessors[i] =
- block->physical_predecessors[block->predecessors_count - 1];
+ block->physical_predecessors[block->physical_predecessors_count - 1];
}
block->physical_predecessors_count--;
@@ -490,6 +490,11 @@
*new_reg = *reg;
}
+ if (instr->address) {
+ assert(instr->srcs_count > 0);
+ new_instr->address = new_instr->srcs[instr->srcs_count - 1];
+ }
+
return new_instr;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_compiler_nir.c
^
|
@@ -1130,10 +1130,10 @@
struct ir3_block *b = ctx->block;
struct tex_src_info info = {0};
nir_intrinsic_instr *bindless_tex = ir3_bindless_resource(intr->src[0]);
- ctx->so->bindless_tex = true;
if (bindless_tex) {
/* Bindless case */
+ ctx->so->bindless_tex = true;
info.flags |= IR3_INSTR_B;
/* Gather information required to determine which encoding to
@@ -1235,11 +1235,20 @@
}
info.flags |= flags;
- for (unsigned i = 0; i < ncoords; i++)
- coords[i] = src0[i];
-
- if (ncoords == 1)
- coords[ncoords++] = create_immed(b, 0);
+ /* hw doesn't do 1d, so we treat it as 2d with height of 1, and patch up the
+ * y coord. Note that the array index must come after the fake y coord.
+ */
+ enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
+ if (dim == GLSL_SAMPLER_DIM_1D || dim == GLSL_SAMPLER_DIM_BUF) {
+ coords[0] = src0[0];
+ coords[1] = create_immed(b, 0);
+ for (unsigned i = 1; i < ncoords; i++)
+ coords[i + 1] = src0[i];
+ ncoords++;
+ } else {
+ for (unsigned i = 0; i < ncoords; i++)
+ coords[i] = src0[i];
+ }
sam = emit_sam(ctx, OPC_ISAM, info, type, 0b1111,
ir3_create_collect(b, coords, ncoords), NULL);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_cp.c
^
|
@@ -303,6 +303,22 @@
return valid_swap;
}
+/* Values that are uniform inside a loop can become divergent outside
+ * it if the loop has a divergent trip count. This means that we can't
+ * propagate a copy of a shared to non-shared register if it would
+ * make the shared reg's live range extend outside of its loop. Users
+ * outside the loop would see the value for the thread(s) that last
+ * exited the loop, rather than for their own thread.
+ */
+static bool
+is_valid_shared_copy(struct ir3_instruction *dst_instr,
+ struct ir3_instruction *src_instr,
+ struct ir3_register *src_reg)
+{
+ return !(src_reg->flags & IR3_REG_SHARED) ||
+ dst_instr->block->loop_id == src_instr->block->loop_id;
+}
+
/**
* Handle cp for a given src register. This additionally handles
* the cases of collapsing immedate/const (which replace the src
@@ -316,22 +332,14 @@
{
struct ir3_instruction *src = ssa(reg);
- /* Values that are uniform inside a loop can become divergent outside
- * it if the loop has a divergent trip count. This means that we can't
- * propagate a copy of a shared to non-shared register if it would
- * make the shared reg's live range extend outside of its loop. Users
- * outside the loop would see the value for the thread(s) that last
- * exited the loop, rather than for their own thread.
- */
- if ((src->dsts[0]->flags & IR3_REG_SHARED) &&
- src->block->loop_id != instr->block->loop_id)
- return false;
-
if (is_eligible_mov(src, instr, true)) {
/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
struct ir3_register *src_reg = src->srcs[0];
unsigned new_flags = reg->flags;
+ if (!is_valid_shared_copy(instr, src, src_reg))
+ return false;
+
combine_flags(&new_flags, src);
if (ir3_valid_flags(instr, n, new_flags)) {
@@ -357,6 +365,9 @@
struct ir3_register *src_reg = src->srcs[0];
unsigned new_flags = reg->flags;
+ if (!is_valid_shared_copy(instr, src, src_reg))
+ return false;
+
if (src_reg->flags & IR3_REG_ARRAY)
return false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_delay.c
^
|
@@ -98,7 +98,7 @@
*/
bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) !=
(consumer->srcs[n]->flags & IR3_REG_HALF);
- unsigned penalty = mismatched_half ? 2 : 0;
+ unsigned penalty = mismatched_half ? 3 : 0;
if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 2)) {
/* special case, 3rd src to cat3 not required on first cycle */
return 1 + penalty;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_lower_parallelcopy.c
^
|
@@ -109,11 +109,18 @@
.flags = entry->flags & ~IR3_REG_HALF,
});
+ /* If src and dst are within the same full register, then swapping src
+ * with tmp above will also move dst to tmp. Account for that here.
+ */
+ unsigned dst =
+ (entry->src.reg & ~1u) == (entry->dst & ~1u) ?
+ tmp + (entry->dst & 1u) : entry->dst;
+
/* Do the original swap with src replaced with tmp */
do_swap(compiler, instr,
&(struct copy_entry){
.src = {.reg = tmp + (entry->src.reg & 1)},
- .dst = entry->dst,
+ .dst = dst,
.flags = entry->flags,
});
@@ -192,9 +199,16 @@
.flags = entry->flags & ~IR3_REG_HALF,
});
+ /* Similar to in do_swap(), account for src being swapped with tmp if
+ * src and dst are in the same register.
+ */
+ struct copy_src src = entry->src;
+ if (!src.flags && (src.reg & ~1u) == (entry->dst & ~1u))
+ src.reg = tmp + (src.reg & 1u);
+
do_copy(compiler, instr,
&(struct copy_entry){
- .src = entry->src,
+ .src = src,
.dst = tmp + (entry->dst & 1),
.flags = entry->flags,
});
@@ -223,12 +237,12 @@
cov->cat1.src_type = TYPE_U32;
ir3_instr_move_before(cov, instr);
} else {
- /* shr.b dst, src, h(16) */
+ /* shr.b dst, src, (16) */
struct ir3_instruction *shr =
ir3_instr_create(instr->block, OPC_SHR_B, 1, 2);
ir3_dst_create(shr, dst_num, entry->flags);
ir3_src_create(shr, src_num, entry->flags & ~IR3_REG_HALF);
- ir3_src_create(shr, 0, entry->flags | IR3_REG_IMMED)->uim_val = 16;
+ ir3_src_create(shr, 0, IR3_REG_IMMED)->uim_val = 16;
ir3_instr_move_before(shr, instr);
}
return;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_nir.c
^
|
@@ -210,6 +210,7 @@
progress |= OPT(s, nir_lower_phis_to_scalar, false);
progress |= OPT(s, nir_copy_prop);
+ progress |= OPT(s, nir_opt_deref);
progress |= OPT(s, nir_opt_dce);
progress |= OPT(s, nir_opt_cse);
static int gcm = -1;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_parser.y
^
|
@@ -674,7 +674,7 @@
}
buf_header_addr_reg:
-| '(' T_CONSTANT ')' {
+ '(' T_CONSTANT ')' {
assert(($2 & 0x1) == 0); /* half-reg not allowed */
unsigned reg = $2 >> 1;
@@ -682,6 +682,7 @@
/* reserve space in immediates for the actual value to be plugged in later: */
add_const($2, 0, 0, 0, 0);
}
+|
buf_header: T_A_BUF const_val {
int idx = info->num_bufs++;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_ra.c
^
|
@@ -780,10 +780,12 @@
return false;
}
+ unsigned conflicting_file_size =
+ reg_file_size(file, conflicting->interval.reg);
unsigned avail_start, avail_end;
bool evicted = false;
BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict,
- reg_file_size(file, conflicting->interval.reg)) {
+ conflicting_file_size) {
unsigned size = avail_end - avail_start;
/* non-half registers must be aligned */
@@ -820,6 +822,10 @@
conflicting->physreg_end - conflicting->physreg_start)
continue;
+ if (killed->physreg_end > conflicting_file_size ||
+ conflicting->physreg_end > reg_file_size(file, killed->interval.reg))
+ continue;
+
/* We can't swap the killed range if it partially/fully overlaps the
* space we're trying to allocate or (in speculative mode) if it's
* already been swapped and will overlap when we actually evict.
@@ -962,9 +968,9 @@
assert(!interval->frozen);
/* Killed sources don't count because they go at the end and can
- * overlap the register we're trying to add.
+ * overlap the register we're trying to add, unless it's a source.
*/
- if (!interval->is_killed && !is_source) {
+ if (!interval->is_killed || is_source) {
removed_size += interval->physreg_end - interval->physreg_start;
if (interval->interval.reg->flags & IR3_REG_HALF) {
removed_half_size += interval->physreg_end -
@@ -1322,7 +1328,8 @@
struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
struct ir3_register *reg =
ir3_dst_create(pcopy, INVALID_REG,
- entry->interval->interval.reg->flags & ~IR3_REG_SSA);
+ entry->interval->interval.reg->flags &
+ (IR3_REG_HALF | IR3_REG_ARRAY));
reg->size = entry->interval->interval.reg->size;
reg->wrmask = entry->interval->interval.reg->wrmask;
assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
@@ -1332,7 +1339,8 @@
struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
struct ir3_register *reg =
ir3_src_create(pcopy, INVALID_REG,
- entry->interval->interval.reg->flags & ~IR3_REG_SSA);
+ entry->interval->interval.reg->flags &
+ (IR3_REG_HALF | IR3_REG_ARRAY));
reg->size = entry->interval->interval.reg->size;
reg->wrmask = entry->interval->interval.reg->wrmask;
assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
@@ -1768,8 +1776,9 @@
pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i];
}
- struct ir3_register *dst_reg =
- ir3_dst_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA);
+ unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
+
+ struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags);
dst_reg->wrmask = reg->wrmask;
dst_reg->size = reg->size;
assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
@@ -1778,8 +1787,7 @@
pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i];
}
- struct ir3_register *src_reg =
- ir3_src_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA);
+ struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags);
src_reg->wrmask = reg->wrmask;
src_reg->size = reg->size;
assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_spill.c
^
|
@@ -1783,15 +1783,31 @@
return true;
}
+static struct ir3_register *
+simplify_phi_def(struct ir3_register *def)
+{
+ if (def->instr->opc == OPC_META_PHI) {
+ struct ir3_instruction *phi = def->instr;
+
+ /* Note: this function is always called at least once after visiting the
+ * phi, so either there has been a simplified phi in the meantime, in
+ * which case we will set progress=true and visit the definition again, or
+ * phi->data already has the most up-to-date value. Therefore we don't
+ * have to recursively check phi->data.
+ */
+ if (phi->data)
+ return phi->data;
+ }
+
+ return def;
+}
+
static void
simplify_phi_srcs(struct ir3_instruction *instr)
{
foreach_src (src, instr) {
- if (src->def && src->def->instr->opc == OPC_META_PHI) {
- struct ir3_instruction *phi = src->def->instr;
- if (phi->data)
- src->def = phi->data;
- }
+ if (src->def)
+ src->def = simplify_phi_def(src->def);
}
}
@@ -1821,6 +1837,10 @@
simplify_phi_srcs(instr);
}
+ /* Visit phi nodes in the sucessors to make sure that phi sources are
+ * always visited at least once after visiting the definition they
+ * point to. See note in simplify_phi_def() for why this is necessary.
+ */
for (unsigned i = 0; i < 2; i++) {
struct ir3_block *succ = block->successors[i];
if (!succ)
@@ -1828,11 +1848,13 @@
foreach_instr (instr, &succ->instr_list) {
if (instr->opc != OPC_META_PHI)
break;
- if (instr->flags & IR3_INSTR_UNUSED)
- continue;
-
- simplify_phi_srcs(instr);
- progress |= simplify_phi_node(instr);
+ if (instr->flags & IR3_INSTR_UNUSED) {
+ if (instr->data)
+ instr->data = simplify_phi_def(instr->data);
+ } else {
+ simplify_phi_srcs(instr);
+ progress |= simplify_phi_node(instr);
+ }
}
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/ir3/meson.build
^
|
@@ -27,7 +27,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
ir3_nir_imul_c = custom_target(
@@ -39,7 +39,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
ir3_parser = custom_target(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_cmd_buffer.c
^
|
@@ -293,7 +293,7 @@
* setting the SINGLE_PRIM_MODE field to the same value that the blob does
* for advanced_blend in sysmem mode if a feedback loop is detected.
*/
- if (subpass->feedback) {
+ if (subpass->feedback_loop_color || subpass->feedback_loop_ds) {
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
@@ -477,7 +477,6 @@
tu6_apply_depth_bounds_workaround(struct tu_device *device,
uint32_t *rb_depth_cntl)
{
- return;
if (!device->physical_device->info->a6xx.depth_bounds_require_depth_test_quirk)
return;
@@ -3832,7 +3831,8 @@
bool depth_write = tu6_writes_depth(cmd, depth_test_enable);
bool stencil_write = tu6_writes_stencil(cmd);
- if (cmd->state.pipeline->lrz.fs_has_kill &&
+ if ((cmd->state.pipeline->lrz.fs_has_kill ||
+ cmd->state.pipeline->subpass_feedback_loop_ds) &&
(depth_write || stencil_write)) {
zmode = cmd->state.lrz.valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_formats.c
^
|
@@ -484,7 +484,7 @@
const struct tu_physical_device *physical_device,
const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
VkExternalMemoryHandleTypeFlagBits handleType,
- VkExternalMemoryProperties *external_properties)
+ VkExternalImageFormatProperties *external_properties)
{
VkExternalMemoryFeatureFlagBits flags = 0;
VkExternalMemoryHandleTypeFlags export_flags = 0;
@@ -526,11 +526,14 @@
handleType);
}
- *external_properties = (VkExternalMemoryProperties) {
- .externalMemoryFeatures = flags,
- .exportFromImportedHandleTypes = export_flags,
- .compatibleHandleTypes = compat_flags,
- };
+ if (external_properties) {
+ external_properties->externalMemoryProperties =
+ (VkExternalMemoryProperties) {
+ .externalMemoryFeatures = flags,
+ .exportFromImportedHandleTypes = export_flags,
+ .compatibleHandleTypes = compat_flags,
+ };
+ }
return VK_SUCCESS;
}
@@ -597,7 +600,7 @@
if (external_info && external_info->handleType != 0) {
result = tu_get_external_image_format_properties(
physical_device, base_info, external_info->handleType,
- &external_props->externalMemoryProperties);
+ external_props);
if (result != VK_SUCCESS)
goto fail;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_pass.c
^
|
@@ -448,7 +448,7 @@
continue;
for (unsigned k = 0; k < subpass->input_count; k++) {
if (subpass->input_attachments[k].attachment == a) {
- subpass->feedback = true;
+ subpass->feedback_loop_color = true;
break;
}
}
@@ -458,7 +458,7 @@
for (unsigned k = 0; k < subpass->input_count; k++) {
if (subpass->input_attachments[k].attachment ==
subpass->depth_stencil_attachment.attachment) {
- subpass->feedback = true;
+ subpass->feedback_loop_ds = true;
break;
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_pipeline.c
^
|
@@ -273,6 +273,8 @@
VkFormat depth_attachment_format;
uint32_t render_components;
uint32_t multiview_mask;
+
+ bool subpass_feedback_loop_ds;
};
static bool
@@ -3077,6 +3079,7 @@
return VK_ERROR_OUT_OF_HOST_MEMORY;
(*pipeline)->layout = builder->layout;
+ (*pipeline)->subpass_feedback_loop_ds = builder->subpass_feedback_loop_ds;
(*pipeline)->executables_mem_ctx = ralloc_context(NULL);
util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
@@ -3190,6 +3193,8 @@
const struct tu_subpass *subpass =
&pass->subpasses[create_info->subpass];
+ builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds;
+
builder->multiview_mask = subpass->multiview_mask;
builder->rasterizer_discard =
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_private.h
^
|
@@ -1262,6 +1262,8 @@
struct tu_lrz_pipeline lrz;
+ bool subpass_feedback_loop_ds;
+
void *executables_mem_ctx;
/* tu_pipeline_executable */
struct util_dynarray executables;
@@ -1610,8 +1612,8 @@
uint32_t resolve_count;
bool resolve_depth_stencil;
- /* True if there is any feedback loop at all. */
- bool feedback;
+ bool feedback_loop_color;
+ bool feedback_loop_ds;
/* True if we must invalidate UCHE thanks to a feedback loop. */
bool feedback_invalidate;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/freedreno/vulkan/tu_query.c
^
|
@@ -117,7 +117,7 @@
query_iova(struct occlusion_query_slot, pool, query, field)
#define pipeline_stat_query_iova(pool, query, field) \
- pool->bo.iova + pool->stride * query + \
+ pool->bo.iova + pool->stride * (query) + \
offsetof(struct pipeline_stat_query_slot, field)
#define primitive_query_iova(pool, query, field, i) \
@@ -125,9 +125,9 @@
offsetof(struct primitive_slot_value, values[i])
#define perf_query_iova(pool, query, field, i) \
- pool->bo.iova + pool->stride * query + \
+ pool->bo.iova + pool->stride * (query) + \
sizeof(struct query_slot) + \
- sizeof(struct perfcntr_query_slot) * i + \
+ sizeof(struct perfcntr_query_slot) * (i) + \
offsetof(struct perfcntr_query_slot, field)
#define query_available_iova(pool, query) \
@@ -135,11 +135,11 @@
#define query_result_iova(pool, query, type, i) \
pool->bo.iova + pool->stride * (query) + \
- sizeof(struct query_slot) + sizeof(type) * i
+ sizeof(struct query_slot) + sizeof(type) * (i)
#define query_result_addr(pool, query, type, i) \
- pool->bo.map + pool->stride * query + \
- sizeof(struct query_slot) + sizeof(type) * i
+ pool->bo.map + pool->stride * (query) + \
+ sizeof(struct query_slot) + sizeof(type) * (i)
#define query_is_available(slot) slot->available
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/draw/draw_llvm.c
^
|
@@ -3515,8 +3515,9 @@
LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
for (i = 0; i < vector_length; i++) {
- LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), lp_build_const_int32(gallivm, i), "");
- invocvec = LLVMBuildInsertElement(builder, invocvec, idx, idx, "");
+ LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
+ LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
+ invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
}
system_values.invocation_id = invocvec;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
^
|
@@ -316,7 +316,7 @@
draw->rasterizer->clip_halfz,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
- draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL );
+ draw_pt_so_emit_prepare( fpme->so_emit, (gs == NULL && tes == NULL));
if (!(opt & PT_PIPELINE)) {
draw_pt_emit_prepare( fpme->emit, out_prim,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/driver_trace/tr_context.c
^
|
@@ -2191,7 +2191,7 @@
if (!trace_enabled())
goto error1;
- tr_ctx = ralloc(NULL, struct trace_context);
+ tr_ctx = rzalloc(NULL, struct trace_context);
if (!tr_ctx)
goto error1;
@@ -2271,6 +2271,8 @@
TR_CTX_INIT(create_stream_output_target);
TR_CTX_INIT(stream_output_target_destroy);
TR_CTX_INIT(set_stream_output_targets);
+ /* this is lavapipe-only and can't be traced */
+ tr_ctx->base.stream_output_target_offset = pipe->stream_output_target_offset;
TR_CTX_INIT(resource_copy_region);
TR_CTX_INIT(blit);
TR_CTX_INIT(flush_resource);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h
^
|
@@ -132,8 +132,10 @@
return 1;
case PIPE_SHADER_CAP_FP16:
case PIPE_SHADER_CAP_FP16_DERIVATIVES:
- case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
return lp_has_fp16();
+ //enabling this breaks GTF-GL46.gtf21.GL2Tests.glGetUniform.glGetUniform
+ case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+ return 0;
case PIPE_SHADER_CAP_INT64_ATOMICS:
return 0;
case PIPE_SHADER_CAP_INT16:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
^
|
@@ -732,8 +732,7 @@
break;
}
case nir_op_fisfinite32:
- result = lp_build_isfinite(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
- break;
+ unreachable("Should have been lowered in nir_opt_algebraic_late.");
case nir_op_flog2:
result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
@@ -2458,7 +2457,6 @@
NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
} while (progress);
- nir_lower_bool_to_int32(nir);
do {
progress = false;
@@ -2469,4 +2467,9 @@
NIR_PASS_V(nir, nir_opt_cse);
}
} while (progress);
+
+ if (nir_lower_bool_to_int32(nir)) {
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ }
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
^
|
@@ -1438,6 +1438,7 @@
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
params->type = bld_base->base.type;
params->context_ptr = bld->context_ptr;
@@ -1491,10 +1492,25 @@
return;
}
- if (params->texture_index_offset)
- params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
- params->texture_index_offset,
- lp_build_const_int32(bld_base->base.gallivm, 0), "");
+ if (params->texture_index_offset) {
+ struct lp_build_loop_state loop_state;
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
+ LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
+
+ struct lp_build_if_state ifthen;
+ lp_build_if(&ifthen, gallivm, if_cond);
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
+ loop_state.counter, "");
+ LLVMBuildStore(builder, value_ptr, res_store);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
+ NULL, LLVMIntUGE);
+ LLVMValueRef idx_val = LLVMBuildLoad(builder, res_store, "");
+ params->texture_index_offset = idx_val;
+ }
params->type = bld_base->base.type;
bld->sampler->emit_tex_sample(bld->sampler,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c
^
|
@@ -1726,7 +1726,10 @@
/* ima = +0.5 / abs(coord); */
LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_div(coord_bld, posHalf, absCoord);
+ /* avoid div by zero */
+ LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
+ LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
+ LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
return ima;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
^
|
@@ -4660,7 +4660,7 @@
out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
}
- if (dims >= 3) {
+ if (dims >= 3 || layer_coord) {
out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c
^
|
@@ -2106,7 +2106,7 @@
switch (instr->op) {
case nir_texop_tex:
if (nir_tex_instr_src_size(instr, nir_tex_instr_src_index(instr, nir_tex_src_backend1)) >
- instr->coord_components + instr->is_shadow)
+ MAX2(instr->coord_components, 2) + instr->is_shadow)
tex_opcode = TGSI_OPCODE_TXP;
else
tex_opcode = TGSI_OPCODE_TEX;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/util/u_box.h
^
|
@@ -3,6 +3,7 @@
#include "pipe/p_state.h"
#include "util/u_math.h"
+#include "util/format/u_format.h"
static inline void
u_box_1d(unsigned x, unsigned w, struct pipe_box *box)
@@ -239,4 +240,22 @@
dst->depth = MAX2(src->depth >> l, 1);
}
+/* Converts a box specified in pixels to an equivalent box specified
+ * in blocks, where the boxes represent a region-of-interest of an image with
+ * the given format. This is trivial (a copy) for uncompressed formats.
+ */
+static inline void
+u_box_pixels_to_blocks(struct pipe_box *blocks,
+ const struct pipe_box *pixels, enum pipe_format format)
+{
+ u_box_3d(
+ pixels->x / util_format_get_blockwidth(format),
+ pixels->y / util_format_get_blockheight(format),
+ pixels->z,
+ DIV_ROUND_UP(pixels->width, util_format_get_blockwidth(format)),
+ DIV_ROUND_UP(pixels->height, util_format_get_blockheight(format)),
+ pixels->depth,
+ blocks);
+}
+
#endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/util/u_threaded_context.c
^
|
@@ -2148,11 +2148,9 @@
* only get resource_copy_region.
*/
if (usage & PIPE_MAP_DISCARD_RANGE) {
- struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
+ struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
uint8_t *map;
- ttrans->staging = NULL;
-
u_upload_alloc(tc->base.stream_uploader, 0,
box->width + (box->x % tc->map_buffer_alignment),
tc->map_buffer_alignment, &ttrans->b.offset,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/auxiliary/util/u_vbuf.c
^
|
@@ -491,9 +491,10 @@
* themselves, meaning that if stride < element_size, the mapped size will
* be too small and conversion will overrun the map buffer
*
- * instead, add the size of the largest possible attribute to ensure the map is large enough
+ * instead, add the size of the largest possible attribute to the final attribute's offset
+ * in order to ensure the map is large enough
*/
- unsigned last_offset = offset + size - vb->stride;
+ unsigned last_offset = size - vb->stride;
size = MAX2(size, last_offset + sizeof(double)*4);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_batch.c
^
|
@@ -264,21 +264,30 @@
crocus_batch_reset(batch);
}
-static struct drm_i915_gem_exec_object2 *
-find_validation_entry(struct crocus_batch *batch, struct crocus_bo *bo)
+static int
+find_exec_index(struct crocus_batch *batch, struct crocus_bo *bo)
{
unsigned index = READ_ONCE(bo->index);
if (index < batch->exec_count && batch->exec_bos[index] == bo)
- return &batch->validation_list[index];
+ return index;
/* May have been shared between multiple active batches */
for (index = 0; index < batch->exec_count; index++) {
if (batch->exec_bos[index] == bo)
- return &batch->validation_list[index];
+ return index;
}
+ return -1;
+}
+
+static struct drm_i915_gem_exec_object2 *
+find_validation_entry(struct crocus_batch *batch, struct crocus_bo *bo)
+{
+ int index = find_exec_index(batch, bo);
- return NULL;
+ if (index == -1)
+ return NULL;
+ return &batch->validation_list[index];
}
static void
@@ -410,7 +419,7 @@
(struct drm_i915_gem_relocation_entry) {
.offset = offset,
.delta = target_offset,
- .target_handle = target->index,
+ .target_handle = find_exec_index(batch, target),
.presumed_offset = entry->offset,
};
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_blit.c
^
|
@@ -433,6 +433,7 @@
info->src.level,
&info->src.box, NULL);
+ pipe_surface_release(ctx, &dst_view);
}
return;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_bufmgr.c
^
|
@@ -430,6 +430,9 @@
bo->index = -1;
bo->kflags = 0;
+ if (flags & BO_ALLOC_SCANOUT)
+ bo->scanout = 1;
+
if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) {
struct drm_i915_gem_caching arg = {
.handle = bo->gem_handle,
@@ -610,6 +613,16 @@
entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
_mesa_hash_table_remove(bufmgr->handle_table, entry);
+
+ list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) {
+ struct drm_gem_close close = { .handle = export->gem_handle };
+ intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close);
+
+ list_del(&export->link);
+ free(export);
+ }
+ } else {
+ assert(list_is_empty(&bo->exports));
}
/* Close this object */
@@ -1001,6 +1014,9 @@
static bool
can_map_cpu(struct crocus_bo *bo, unsigned flags)
{
+ if (bo->scanout)
+ return false;
+
if (bo->cache_coherent)
return true;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_bufmgr.h
^
|
@@ -141,12 +141,18 @@
*/
bool userptr;
+ /**
+ * Boolean of if this is used for scanout.
+ */
+ bool scanout;
+
/** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
uint32_t hash;
};
#define BO_ALLOC_ZEROED (1 << 0)
#define BO_ALLOC_COHERENT (1 << 1)
+#define BO_ALLOC_SCANOUT (1 << 2)
/**
* Allocate a buffer object.
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_resource.c
^
|
@@ -688,9 +688,10 @@
devinfo->ver < 6)
return NULL;
- UNUSED const bool isl_surf_created_successfully =
+ const bool isl_surf_created_successfully =
crocus_resource_configure_main(screen, res, templ, modifier, 0);
- assert(isl_surf_created_successfully);
+ if (!isl_surf_created_successfully)
+ return NULL;
const char *name = "miptree";
@@ -698,6 +699,10 @@
if (templ->usage == PIPE_USAGE_STAGING)
flags |= BO_ALLOC_COHERENT;
+ /* Scanout buffers need to be WC. */
+ if (templ->bind & PIPE_BIND_SCANOUT)
+ flags |= BO_ALLOC_SCANOUT;
+
uint64_t aux_size = 0;
uint32_t aux_preferred_alloc_flags;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/crocus/crocus_state.c
^
|
@@ -1983,9 +1983,9 @@
* "Grid Intersection Quantization" rules as specified by the
* "Zero-Width (Cosmetic) Line Rasterization" section of the docs.
*/
- line_width = 0.0f;
+ /* hack around this for gfx4/5 fps counters in hud. */
+ line_width = GFX_VER < 6 ? 1.5f : 0.0f;
}
-
return line_width;
}
@@ -4750,6 +4750,22 @@
key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
}
+static inline GLenum
+compare_func_to_gl(enum pipe_compare_func pipe_func)
+{
+ static const unsigned map[] = {
+ [PIPE_FUNC_NEVER] = GL_NEVER,
+ [PIPE_FUNC_LESS] = GL_LESS,
+ [PIPE_FUNC_EQUAL] = GL_EQUAL,
+ [PIPE_FUNC_LEQUAL] = GL_LEQUAL,
+ [PIPE_FUNC_GREATER] = GL_GREATER,
+ [PIPE_FUNC_NOTEQUAL] = GL_NOTEQUAL,
+ [PIPE_FUNC_GEQUAL] = GL_GEQUAL,
+ [PIPE_FUNC_ALWAYS] = GL_ALWAYS,
+ };
+ return map[pipe_func];
+}
+
/**
* Populate FS program key fields based on the current state.
*/
@@ -4836,7 +4852,7 @@
#if GFX_VER <= 5
if (fb->nr_cbufs > 1 && zsa->cso.alpha_enabled) {
- key->alpha_test_func = zsa->cso.alpha_func;
+ key->alpha_test_func = compare_func_to_gl(zsa->cso.alpha_func);
key->alpha_test_ref = zsa->cso.alpha_ref_value;
}
#endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_context.c
^
|
@@ -407,6 +407,7 @@
{
struct etna_cmd_stream *stream = ctx->stream;
struct etna_screen *screen = ctx->screen;
+ uint32_t dummy_attribs[VIVS_NFE_GENERIC_ATTRIB__LEN] = { 0 };
etna_set_state(stream, VIVS_GL_API_MODE, VIVS_GL_API_MODE_OPENGL);
etna_set_state(stream, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x00000001);
@@ -467,6 +468,20 @@
VIVS_VS_ICACHE_INVALIDATE_UNK4);
}
+ /* It seems that some GPUs (at least some GC400 have shown this behavior)
+ * come out of reset with random vertex attributes enabled and also don't
+ * disable them on the write to the first config register as normal. Enabling
+ * all attributes seems to provide the GPU with the required edge to actually
+ * disable the unused attributes on the next draw.
+ */
+ if (screen->specs.halti >= 5) {
+ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
+ VIVS_NFE_GENERIC_ATTRIB__LEN, dummy_attribs);
+ } else {
+ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
+ screen->specs.halti >= 0 ? 16 : 12, dummy_attribs);
+ }
+
ctx->dirty = ~0L;
ctx->dirty_sampler_views = ~0L;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c
^
|
@@ -454,7 +454,7 @@
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key) in_dt
+ const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
^
|
@@ -560,7 +560,7 @@
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key) in_dt
+ const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
struct fd4_program_state *state = CALLOC_STRUCT(fd4_program_state);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
^
|
@@ -91,6 +91,7 @@
.vastc_srgb = fd5_ctx->vastc_srgb,
.fastc_srgb = fd5_ctx->fastc_srgb,
},
+ .clip_plane_enable = ctx->rasterizer->clip_plane_enable,
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
^
|
@@ -658,7 +658,7 @@
OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG)
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE))
fd5_program_emit(ctx, ring, emit);
if (dirty & FD_DIRTY_RASTERIZER) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c
^
|
@@ -250,9 +250,12 @@
setup_stages(emit, s);
bool do_streamout = (s[VS].v->shader->stream_output.num_outputs > 0);
- uint8_t clip_mask = s[VS].v->clip_mask, cull_mask = s[VS].v->cull_mask;
+ uint8_t clip_mask = s[VS].v->clip_mask,
+ cull_mask = s[VS].v->cull_mask;
uint8_t clip_cull_mask = clip_mask | cull_mask;
+ clip_mask &= ctx->rasterizer->clip_plane_enable;
+
fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS;
pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
@@ -711,7 +714,7 @@
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key) in_dt
+ const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
struct fd5_program_state *state = CALLOC_STRUCT(fd5_program_state);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h
^
|
@@ -52,7 +52,7 @@
const struct ir3_shader_variant *so);
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit);
+ struct fd5_emit *emit) in_dt;
void fd5_prog_init(struct pipe_context *pctx);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_context.c
^
|
@@ -151,7 +151,8 @@
BIT(FD6_GROUP_ZSA));
fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG,
BIT(FD6_GROUP_LRZ) | BIT(FD6_GROUP_LRZ_BINNING));
- fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_PROG));
+ fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE,
+ BIT(FD6_GROUP_PROG));
fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER));
fd_context_add_map(ctx,
FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
^
|
@@ -156,6 +156,7 @@
.sample_shading = (ctx->min_samples > 1),
.msaa = (ctx->framebuffer.samples > 1),
},
+ .clip_plane_enable = ctx->rasterizer->clip_plane_enable,
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
@@ -195,7 +196,7 @@
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
- if (!(ctx->dirty & FD_DIRTY_PROG)) {
+ if (!(ctx->gen_dirty & BIT(FD6_GROUP_PROG))) {
emit.prog = fd6_ctx->prog;
} else {
fd6_ctx->prog = fd6_emit_get_prog(&emit);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_program.c
^
|
@@ -351,8 +351,10 @@
static void
setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
struct fd6_program_state *state,
- const struct ir3_shader_key *key, bool binning_pass) assert_dt
+ const struct ir3_cache_key *cache_key,
+ bool binning_pass) assert_dt
{
+ const struct ir3_shader_key *key = &cache_key->key;
uint32_t pos_regid, psize_regid, color_regid[8], posz_regid;
uint32_t clip0_regid, clip1_regid;
uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
@@ -536,6 +538,8 @@
cull_mask = last_shader->cull_mask;
uint8_t clip_cull_mask = clip_mask | cull_mask;
+ clip_mask &= cache_key->clip_plane_enable;
+
/* If we have streamout, link against the real FS, rather than the
* dummy FS used for binning pass state, to ensure the OUTLOC's
* match. Depending on whether we end up doing sysmem or gmem,
@@ -1184,7 +1188,7 @@
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key) in_dt
+ const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_context.h
^
|
@@ -172,8 +172,9 @@
* from hw perspective:
*/
FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
- FD_DIRTY_BLEND_DUAL = BIT(25),
-#define NUM_DIRTY_BITS 26
+ FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
+ FD_DIRTY_BLEND_DUAL = BIT(26),
+#define NUM_DIRTY_BITS 27
/* additional flag for state requires updated resource tracking: */
FD_DIRTY_RESOURCE = BIT(31),
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_state.c
^
|
@@ -38,6 +38,8 @@
#include "freedreno_texture.h"
#include "freedreno_util.h"
+#define get_safe(ptr, field) ((ptr) ? (ptr)->field : 0)
+
/* All the generic state handling.. In case of CSO's that are specific
* to the GPU version, when the bind and the delete are common they can
* go in here.
@@ -434,7 +436,8 @@
{
struct fd_context *ctx = fd_context(pctx);
struct pipe_scissor_state *old_scissor = fd_context_get_scissor(ctx);
- bool discard = ctx->rasterizer && ctx->rasterizer->rasterizer_discard;
+ bool discard = get_safe(ctx->rasterizer, rasterizer_discard);
+ unsigned clip_plane_enable = get_safe(ctx->rasterizer, clip_plane_enable);
ctx->rasterizer = hwcso;
fd_context_dirty(ctx, FD_DIRTY_RASTERIZER);
@@ -453,8 +456,11 @@
if (old_scissor != fd_context_get_scissor(ctx))
fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
- if (ctx->rasterizer && (discard != ctx->rasterizer->rasterizer_discard))
+ if (discard != get_safe(ctx->rasterizer, rasterizer_discard))
fd_context_dirty(ctx, FD_DIRTY_RASTERIZER_DISCARD);
+
+ if (clip_plane_enable != get_safe(ctx->rasterizer, clip_plane_enable))
+ fd_context_dirty(ctx, FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE);
}
static void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_util.h
^
|
@@ -106,12 +106,14 @@
#include <unistd.h>
#include <sys/types.h>
+#include <sys/syscall.h>
#define DBG(fmt, ...) \
do { \
if (FD_DBG(MSGS)) \
- mesa_logi("%5d: %s:%d: " fmt, gettid(), __FUNCTION__, __LINE__, \
- ##__VA_ARGS__); \
+ mesa_logi("%5d: %s:%d: " fmt, ((pid_t)syscall(SYS_gettid)), \
+ __FUNCTION__, __LINE__, \
+ ##__VA_ARGS__); \
} while (0)
#define perf_debug_message(debug, type, ...) \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c
^
|
@@ -157,7 +157,7 @@
cache->data, bs, variants[MESA_SHADER_VERTEX],
variants[MESA_SHADER_TESS_CTRL], variants[MESA_SHADER_TESS_EVAL],
variants[MESA_SHADER_GEOMETRY], variants[MESA_SHADER_FRAGMENT],
- &key->key);
+ key);
state->key = *key;
/* NOTE: uses copy of key in state obj, because pointer passed by caller
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h
^
|
@@ -27,6 +27,8 @@
#ifndef IR3_CACHE_H_
#define IR3_CACHE_H_
+#include "pipe/p_state.h"
+
#include "ir3/ir3_shader.h"
/*
@@ -39,6 +41,11 @@
struct ir3_cache_key {
struct ir3_shader_state *vs, *hs, *ds, *gs, *fs; // 5 pointers
struct ir3_shader_key key; // 7 dwords
+
+ /* Additional state that effects the cached program state, but
+ * not the compiled shader:
+ */
+ unsigned clip_plane_enable : PIPE_MAX_CLIP_PLANES;
};
/* per-gen backend program state object should subclass this for it's
@@ -54,7 +61,7 @@
void *data, struct ir3_shader_variant *bs, /* binning pass vs */
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs, const struct ir3_shader_key *key);
+ struct ir3_shader_variant *fs, const struct ir3_cache_key *key);
void (*destroy_state)(void *data, struct ir3_program_state *state);
};
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/i915/i915_screen.c
^
|
@@ -283,7 +283,13 @@
*/
return 0;
+ /* i915 can't do these, and even if gallivm NIR can we call nir_to_tgsi
+ * manually and TGSI can't.
+ */
case PIPE_SHADER_CAP_INT16:
+ case PIPE_SHADER_CAP_FP16:
+ case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+ case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
return 0;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
@@ -343,9 +349,6 @@
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
case PIPE_SHADER_CAP_INT64_ATOMICS:
- case PIPE_SHADER_CAP_FP16:
- case PIPE_SHADER_CAP_FP16_DERIVATIVES:
- case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
return 0;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/i915/i915_state.c
^
|
@@ -608,7 +608,7 @@
{
struct i915_context *i915 = i915_context(pipe);
- struct pipe_shader_state from_nir;
+ struct pipe_shader_state from_nir = { PIPE_SHADER_IR_TGSI };
if (templ->type == PIPE_SHADER_IR_NIR) {
nir_shader *s = templ->ir.nir;
@@ -619,7 +619,6 @@
* per-stage, and i915 FS can't do native integers. So, convert to TGSI,
* where the draw path *does* support non-native-integers.
*/
- from_nir.type = PIPE_SHADER_IR_TGSI;
from_nir.tokens = nir_to_tgsi(s, pipe->screen);
templ = &from_nir;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_batch.c
^
|
@@ -290,6 +290,42 @@
MAX2(batch->max_gem_handle, iris_get_backing_bo(bo)->gem_handle);
}
+static void
+flush_for_cross_batch_dependencies(struct iris_batch *batch,
+ struct iris_bo *bo,
+ bool writable)
+{
+ if (batch->measure && bo == batch->measure->bo)
+ return;
+
+ /* When a batch uses a buffer for the first time, or newly writes a buffer
+ * it had already referenced, we may need to flush other batches in order
+ * to correctly synchronize them.
+ */
+ for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {
+ struct iris_batch *other_batch = batch->other_batches[b];
+ int other_index = find_exec_index(other_batch, bo);
+
+ /* If the buffer is referenced by another batch, and either batch
+ * intends to write it, then flush the other batch and synchronize.
+ *
+ * Consider these cases:
+ *
+ * 1. They read, we read => No synchronization required.
+ * 2. They read, we write => Synchronize (they need the old value)
+ * 3. They write, we read => Synchronize (we need their new value)
+ * 4. They write, we write => Synchronize (order writes)
+ *
+ * The read/read case is very common, as multiple batches usually
+ * share a streaming state buffer or shader assembly buffer, and
+ * we want to avoid synchronizing in this case.
+ */
+ if (other_index != -1 &&
+ (writable || BITSET_TEST(other_batch->bos_written, other_index)))
+ iris_batch_flush(other_batch);
+ }
+}
+
/**
* Add a buffer to the current batch's validation list.
*
@@ -320,44 +356,17 @@
int existing_index = find_exec_index(batch, bo);
- if (existing_index != -1) {
- /* The BO is already in the list; mark it writable */
- if (writable)
- BITSET_SET(batch->bos_written, existing_index);
+ if (existing_index == -1) {
+ flush_for_cross_batch_dependencies(batch, bo, writable);
- return;
- }
+ ensure_exec_obj_space(batch, 1);
+ add_bo_to_batch(batch, bo, writable);
+ } else if (writable && !BITSET_TEST(batch->bos_written, existing_index)) {
+ flush_for_cross_batch_dependencies(batch, bo, writable);
- if (!batch->measure || bo != batch->measure->bo) {
- /* This is the first time our batch has seen this BO. Before we use it,
- * we may need to flush and synchronize with other batches.
- */
- for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {
- struct iris_batch *other_batch = batch->other_batches[b];
- int other_index = find_exec_index(other_batch, bo);
-
- /* If the buffer is referenced by another batch, and either batch
- * intends to write it, then flush the other batch and synchronize.
- *
- * Consider these cases:
- *
- * 1. They read, we read => No synchronization required.
- * 2. They read, we write => Synchronize (they need the old value)
- * 3. They write, we read => Synchronize (we need their new value)
- * 4. They write, we write => Synchronize (order writes)
- *
- * The read/read case is very common, as multiple batches usually
- * share a streaming state buffer or shader assembly buffer, and
- * we want to avoid synchronizing in this case.
- */
- if (other_index != -1 &&
- (writable || BITSET_TEST(other_batch->bos_written, other_index)))
- iris_batch_flush(other_batch);
- }
+ /* The BO is already in the list; mark it writable */
+ BITSET_SET(batch->bos_written, existing_index);
}
-
- ensure_exec_obj_space(batch, 1);
- add_bo_to_batch(batch, bo, writable);
}
static void
@@ -708,6 +717,12 @@
move_syncobj_to_batch(batch, &deps->write_syncobjs[other_batch_idx],
I915_EXEC_FENCE_WAIT);
+ /* If it's being written by our screen, wait on it too. This is relevant
+ * when there are multiple contexts on the same screen. */
+ if (deps->write_syncobjs[batch_idx])
+ move_syncobj_to_batch(batch, &deps->write_syncobjs[batch_idx],
+ I915_EXEC_FENCE_WAIT);
+
struct iris_syncobj *batch_syncobj = iris_batch_get_signal_syncobj(batch);
if (write) {
@@ -720,6 +735,8 @@
move_syncobj_to_batch(batch, &deps->read_syncobjs[other_batch_idx],
I915_EXEC_FENCE_WAIT);
+ move_syncobj_to_batch(batch, &deps->read_syncobjs[batch_idx],
+ I915_EXEC_FENCE_WAIT);
} else {
/* If we're reading, replace the other read from our batch index. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_blit.c
^
|
@@ -337,6 +337,16 @@
}
}
+static bool
+clear_color_is_fully_zero(const struct iris_resource *res)
+{
+ return !res->aux.clear_color_unknown &&
+ res->aux.clear_color.u32[0] == 0 &&
+ res->aux.clear_color.u32[1] == 0 &&
+ res->aux.clear_color.u32[2] == 0 &&
+ res->aux.clear_color.u32[3] == 0;
+}
+
/**
* The pipe->blit() driver hook.
*
@@ -590,10 +600,7 @@
* original format (e.g. A8_UNORM/R8_UINT).
*/
*out_clear_supported = (devinfo->ver >= 11 && !is_render_target) ||
- (res->aux.clear_color.u32[0] == 0 &&
- res->aux.clear_color.u32[1] == 0 &&
- res->aux.clear_color.u32[2] == 0 &&
- res->aux.clear_color.u32[3] == 0);
+ clear_color_is_fully_zero(res);
break;
default:
*out_aux_usage = ISL_AUX_USAGE_NONE;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_bufmgr.c
^
|
@@ -594,19 +594,26 @@
assert(!slab->bo->aux_map_address);
- if (aux_map_ctx) {
- /* Since we're freeing the whole slab, all buffers allocated out of it
- * must be reclaimable. We require buffers to be idle to be reclaimed
- * (see iris_can_reclaim_slab()), so we know all entries must be idle.
- * Therefore, we can safely unmap their aux table entries.
- */
- for (unsigned i = 0; i < pslab->num_entries; i++) {
- struct iris_bo *bo = &slab->entries[i];
- if (bo->aux_map_address) {
- intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
- bo->aux_map_address = 0;
+ /* Since we're freeing the whole slab, all buffers allocated out of it
+ * must be reclaimable. We require buffers to be idle to be reclaimed
+ * (see iris_can_reclaim_slab()), so we know all entries must be idle.
+ * Therefore, we can safely unmap their aux table entries.
+ */
+ for (unsigned i = 0; i < pslab->num_entries; i++) {
+ struct iris_bo *bo = &slab->entries[i];
+ if (aux_map_ctx && bo->aux_map_address) {
+ intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
+ bo->aux_map_address = 0;
+ }
+
+ /* Unref read/write dependency syncobjs and free the array. */
+ for (int d = 0; d < bo->deps_size; d++) {
+ for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
+ iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
+ iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
}
}
+ free(bo->deps);
}
iris_bo_unreference(slab->bo);
@@ -1659,6 +1666,16 @@
list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
list_del(&bo->head);
+
+ bo_free(bo);
+ }
+ }
+
+ for (int i = 0; i < bufmgr->num_local_buckets; i++) {
+ struct bo_cache_bucket *bucket = &bufmgr->local_cache_bucket[i];
+
+ list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
+ list_del(&bo->head);
bo_free(bo);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_clear.c
^
|
@@ -321,7 +321,8 @@
iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
p_res, res->aux.usage, level, true);
- blorp_fast_clear(&blorp_batch, &surf, format, ISL_SWIZZLE_IDENTITY,
+ blorp_fast_clear(&blorp_batch, &surf, res->surf.format,
+ ISL_SWIZZLE_IDENTITY,
level, box->z, box->depth,
box->x, box->y, box->x + box->width,
box->y + box->height);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_program.c
^
|
@@ -2820,7 +2820,8 @@
if (ice->state.vs_uses_draw_params != uses_draw_params ||
ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
- ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag) {
+ ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
+ ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
IRIS_DIRTY_VERTEX_ELEMENTS;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_state.c
^
|
@@ -2934,6 +2934,7 @@
pipe_resource_reference(&surf->surface_state.ref.res, NULL);
pipe_resource_reference(&surf->surface_state_read.ref.res, NULL);
free(surf->surface_state.cpu);
+ free(surf->surface_state_read.cpu);
free(surf);
}
@@ -6921,10 +6922,9 @@
iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total - 1;
- if (prog_data->total_scratch > 0) {
- cfe.ScratchSpaceBuffer =
- iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4;
- }
+ uint32_t scratch_addr = pin_scratch_space(ice, batch, prog_data,
+ MESA_SHADER_COMPUTE);
+ cfe.ScratchSpaceBuffer = scratch_addr >> 4;
}
}
@@ -7861,6 +7861,13 @@
pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
pc.StateCacheInvalidationEnable =
flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+#if GFX_VER >= 12
+ /* Invalidates the L3 cache part in which index & vertex data is loaded
+ * when VERTEX_BUFFER_STATE::L3BypassDisable is set.
+ */
+ pc.L3ReadOnlyCacheInvalidationEnable =
+ flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
+#endif
pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
pc.ConstantCacheInvalidationEnable =
flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/lima/ci/gitlab-ci.yml
^
|
@@ -1,4 +1,4 @@
-lima-mali450-test:arm64:
+.lima-mali450-test:arm64:
extends:
- .lava-test:arm64
- .lima-rules
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/lima/meson.build
^
|
@@ -92,7 +92,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
liblima = static_library(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/lima/standalone/lima_disasm.c
^
|
@@ -166,7 +166,7 @@
}
char *filename = NULL;
- filename = argv[n];
+ filename = argv[argc - 1];
uint32_t size = 0;
uint32_t *prog = extract_shader_binary(filename, &size, &is_frag);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-quick_shader.txt
^
|
@@ -188,7 +188,6 @@
spec/glsl-4.00/execution/conversion/vert-conversion-explicit-dvec3-vec3: fail
spec/glsl-4.00/execution/conversion/vert-conversion-explicit-dvec4-vec4: fail
spec/glsl-4.50/execution/ssbo-atomiccompswap-int: fail
-spec/glsl-es-1.00/linker/glsl-mismatched-uniform-precision-unused: fail
spec/intel_shader_atomic_float_minmax/execution/shared-atomiccompswap-float: skip
spec/intel_shader_atomic_float_minmax/execution/shared-atomicexchange-float: skip
spec/intel_shader_atomic_float_minmax/execution/shared-atomicmax-float: skip
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml
^
|
@@ -37,7 +37,7 @@
- path: gputest/pixmark-piano.trace
expectations:
- device: gl-vmware-llvmpipe
- checksum: 4262587e893cf98c61a8467a15677181
+ checksum: b580ae01560380461a103975cab77393
- path: gputest/triangle.trace
expectations:
- device: gl-vmware-llvmpipe
@@ -169,7 +169,7 @@
- path: bgfx/39-assao.rdc
expectations:
- device: gl-vmware-llvmpipe
- checksum: bc6f44e63010db07e7ba588b216e38b1
+ checksum: 5d9c6dd6399db34ac81951cd7152ec1c
- path: bgfx/40-svt.rdc
expectations:
- device: gl-vmware-llvmpipe
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
^
|
@@ -560,6 +560,18 @@
return 0;
}
+static void
+llvmpipe_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ memset(uuid, 0, PIPE_UUID_SIZE);
+}
+
+static void
+llvmpipe_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ memset(uuid, 0, PIPE_UUID_SIZE);
+}
+
static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_scmp = true,
.lower_flrp32 = true,
@@ -1040,6 +1052,9 @@
screen->base.get_timestamp = llvmpipe_get_timestamp;
+ screen->base.get_driver_uuid = llvmpipe_get_driver_uuid;
+ screen->base.get_device_uuid = llvmpipe_get_device_uuid;
+
screen->base.finalize_nir = llvmpipe_finalize_nir;
screen->base.get_disk_shader_cache = lp_get_disk_shader_cache;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
^
|
@@ -304,6 +304,7 @@
#define LATE_DEPTH_TEST 0x2
#define EARLY_DEPTH_WRITE 0x4
#define LATE_DEPTH_WRITE 0x8
+#define EARLY_DEPTH_TEST_INFERRED 0x10 //only with EARLY_DEPTH_TEST
static int
find_output_by_semantic( const struct tgsi_shader_info *info,
@@ -647,10 +648,10 @@
key->stencil[1].writemask)))
depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
else
- depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE | EARLY_DEPTH_TEST_INFERRED;
}
else
- depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE | EARLY_DEPTH_TEST_INFERRED;
}
else {
depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
@@ -1141,8 +1142,10 @@
if (key->min_samples == 1)
s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), "");
- /* if the shader writes sample mask use that */
- if (shader->info.base.writes_samplemask) {
+ /* if the shader writes sample mask use that,
+ * but only if this isn't genuine early-depth to avoid breaking occlusion query */
+ if (shader->info.base.writes_samplemask &&
+ (!(depth_mode & EARLY_DEPTH_TEST) || (depth_mode & (EARLY_DEPTH_TEST_INFERRED)))) {
LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx);
LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, "");
@@ -1258,6 +1261,23 @@
key->multisample ? s_mask : lp_build_mask_value(&mask), counter);
}
+ /* if this is genuine early-depth in the shader, write samplemask now
+ * after occlusion count has been updated
+ */
+ if (key->multisample && shader->info.base.writes_samplemask &&
+ (depth_mode & (EARLY_DEPTH_TEST_INFERRED | EARLY_DEPTH_TEST)) == EARLY_DEPTH_TEST) {
+ /* if the shader writes sample mask use that */
+ LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+ out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx);
+ LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, "");
+ LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, out_smask_idx, "");
+ LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int_vec(gallivm, int_type, 0), "");
+ smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, "");
+
+ s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+ }
+
+
if (key->multisample) {
/* store the sample mask for this loop */
LLVMBuildStore(builder, s_mask, s_mask_ptr);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c
^
|
@@ -67,13 +67,13 @@
const struct util_format_description *depth_desc =
util_format_description(depth_format);
- if (lp->framebuffer.zsbuf && lp->framebuffer.zsbuf->context != pipe) {
+ if (fb->zsbuf && fb->zsbuf->context != pipe) {
debug_printf("Illegal setting of fb state with zsbuf created in "
"another context\n");
}
for (i = 0; i < fb->nr_cbufs; i++) {
- if (lp->framebuffer.cbufs[i] &&
- lp->framebuffer.cbufs[i]->context != pipe) {
+ if (fb->cbufs[i] &&
+ fb->cbufs[i]->context != pipe) {
debug_printf("Illegal setting of fb state with cbuf %d created in "
"another context\n", i);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
^
|
@@ -311,6 +311,9 @@
if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
return;
+ width = MIN2(width, dst->texture->width0 - dstx);
+ height = MIN2(height, dst->texture->height0 - dsty);
+
if (dst->texture->nr_samples > 1) {
struct pipe_box box;
u_box_2d(dstx, dsty, width, height, &box);
@@ -379,6 +382,9 @@
if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
return;
+ width = MIN2(width, dst->texture->width0 - dstx);
+ height = MIN2(height, dst->texture->height0 - dsty);
+
if (dst->texture->nr_samples > 1) {
uint64_t zstencil = util_pack64_z_stencil(dst->format, depth, stencil);
struct pipe_box box;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_cmdstream.c
^
|
@@ -120,31 +120,27 @@
static unsigned
translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
{
- /* Bifrost doesn't support the GL_CLAMP wrap mode, so instead use
- * CLAMP_TO_EDGE and CLAMP_TO_BORDER. On Midgard, CLAMP is broken for
- * nearest filtering, so use CLAMP_TO_EDGE in that case. */
+ /* CLAMP is only supported on Midgard, where it is broken for nearest
+ * filtering. Use CLAMP_TO_EDGE in that case.
+ */
switch (w) {
case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
- case PIPE_TEX_WRAP_CLAMP:
- return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
-#if PAN_ARCH <= 5
- MALI_WRAP_MODE_CLAMP;
-#else
- MALI_WRAP_MODE_CLAMP_TO_BORDER;
-#endif
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+
+#if PAN_ARCH <= 5
+ case PIPE_TEX_WRAP_CLAMP:
+ return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
+ MALI_WRAP_MODE_CLAMP;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
-#if PAN_ARCH <= 5
- MALI_WRAP_MODE_MIRRORED_CLAMP;
-#else
- MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+ MALI_WRAP_MODE_MIRRORED_CLAMP;
#endif
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+
default: unreachable("Invalid wrap");
}
}
@@ -1367,6 +1363,12 @@
for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
+
+ if (!view) {
+ memset(&out[i], 0, sizeof(out[i]));
+ continue;
+ }
+
struct pipe_sampler_view *pview = &view->base;
struct panfrost_resource *rsrc = pan_resource(pview->texture);
@@ -1384,6 +1386,11 @@
for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
+ if (!view) {
+ trampolines[i] = 0;
+ continue;
+ }
+
panfrost_update_sampler_view(view, &ctx->base);
trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
@@ -1411,8 +1418,11 @@
SAMPLER);
struct mali_sampler_packed *out = (struct mali_sampler_packed *) T.cpu;
- for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i)
- out[i] = ctx->samplers[stage][i]->hw;
+ for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) {
+ struct panfrost_sampler_state *st = ctx->samplers[stage][i];
+
+ out[i] = st ? st->hw : (struct mali_sampler_packed){0};
+ }
return T.gpu;
}
@@ -2715,7 +2725,8 @@
}
}
- bool points = info->mode == PIPE_PRIM_POINTS;
+ enum pipe_prim_type prim = u_reduced_prim(info->mode);
+ bool polygon = (prim == PIPE_PRIM_TRIANGLES);
void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
#if PAN_ARCH >= 6
@@ -2731,8 +2742,17 @@
cfg.four_components_per_vertex = true;
cfg.draw_descriptor_is_64b = true;
cfg.front_face_ccw = rast->front_ccw;
- cfg.cull_front_face = rast->cull_face & PIPE_FACE_FRONT;
- cfg.cull_back_face = rast->cull_face & PIPE_FACE_BACK;
+
+ /*
+ * From the Gallium documentation,
+ * pipe_rasterizer_state::cull_face "indicates which faces of
+ * polygons to cull". Points and lines are not considered
+ * polygons and should be drawn even if all faces are culled.
+ * The hardware does not take primitive type into account when
+ * culling, so we need to do that check ourselves.
+ */
+ cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
+ cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
cfg.position = pos;
cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
@@ -2746,9 +2766,7 @@
* be set to 0 and the provoking vertex is selected with the
* PRIMITIVE.first_provoking_vertex field.
*/
- if (info->mode == PIPE_PRIM_LINES ||
- info->mode == PIPE_PRIM_LINE_LOOP ||
- info->mode == PIPE_PRIM_LINE_STRIP) {
+ if (prim == PIPE_PRIM_LINES) {
/* The logic is inverted across arches. */
cfg.flat_shading_vertex = rast->flatshade_first
^ (PAN_ARCH <= 5);
@@ -2769,7 +2787,7 @@
}
}
- panfrost_emit_primitive_size(ctx, points, psiz, prim_size);
+ panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size);
}
static void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_context.c
^
|
@@ -631,28 +631,43 @@
unsigned new_nr = 0;
unsigned i;
- assert(start_slot == 0);
+ for (i = 0; i < num_views; ++i) {
+ struct pipe_sampler_view *view = views ? views[i] : NULL;
+ unsigned p = i + start_slot;
- if (!views)
- num_views = 0;
+ if (view)
+ new_nr = p + 1;
- for (i = 0; i < num_views; ++i) {
- if (views[i])
- new_nr = i + 1;
if (take_ownership) {
- pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][p],
NULL);
- ctx->sampler_views[shader][i] = (struct panfrost_sampler_view *)views[i];
+ ctx->sampler_views[shader][i] = (struct panfrost_sampler_view *)view;
} else {
- pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
- views[i]);
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][p],
+ view);
}
}
- for (; i < ctx->sampler_view_count[shader]; i++) {
- pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
+ for (; i < num_views + unbind_num_trailing_slots; i++) {
+ unsigned p = i + start_slot;
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][p],
NULL);
}
+
+ /* If the sampler view count is higher than the greatest sampler view
+ * we touch, it can't change */
+ if (ctx->sampler_view_count[shader] > start_slot + num_views + unbind_num_trailing_slots)
+ return;
+
+ /* If we haven't set any sampler views here, search lower numbers for
+ * set sampler views */
+ if (new_nr == 0) {
+ for (i = 0; i < start_slot; ++i) {
+ if (ctx->sampler_views[shader][i])
+ new_nr = i + 1;
+ }
+ }
+
ctx->sampler_view_count[shader] = new_nr;
}
@@ -668,6 +683,8 @@
util_set_shader_buffers_mask(ctx->ssbo[shader], &ctx->ssbo_mask[shader],
buffers, start, count);
+
+ ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_SSBO;
}
static void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_resource.c
^
|
@@ -153,31 +153,15 @@
return true;
}
} else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
- if (scanout) {
- struct drm_prime_handle args = {
- .handle = scanout->handle,
- .flags = DRM_CLOEXEC,
- };
-
- int ret = drmIoctl(dev->ro->kms_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
- if (ret == -1)
- return false;
+ int fd = panfrost_bo_export(rsrc->image.data.bo);
- handle->stride = scanout->stride;
- handle->handle = args.fd;
-
- return true;
- } else {
- int fd = panfrost_bo_export(rsrc->image.data.bo);
-
- if (fd < 0)
- return false;
+ if (fd < 0)
+ return false;
- handle->handle = fd;
- handle->stride = rsrc->image.layout.slices[0].line_stride;
- handle->offset = rsrc->image.layout.slices[0].offset;
- return true;
- }
+ handle->handle = fd;
+ handle->stride = rsrc->image.layout.slices[0].line_stride;
+ handle->offset = rsrc->image.layout.slices[0].offset;
+ return true;
}
return false;
@@ -839,7 +823,8 @@
struct panfrost_context *ctx = pan_context(pctx);
struct panfrost_device *dev = pan_device(pctx->screen);
struct panfrost_resource *rsrc = pan_resource(resource);
- int bytes_per_pixel = util_format_get_blocksize(rsrc->image.layout.format);
+ enum pipe_format format = rsrc->image.layout.format;
+ int bytes_per_block = util_format_get_blocksize(format);
struct panfrost_bo *bo = rsrc->image.data.bo;
/* Can't map tiled/compressed directly */
@@ -916,6 +901,9 @@
}
if (create_new_bo) {
+ /* Make sure we re-emit any descriptors using this resource */
+ panfrost_dirty_state_all(ctx);
+
/* If the BO is used by one of the pending batches or if it's
* not ready yet (still accessed by one of the already flushed
* batches), we try to allocate a new one to avoid waiting.
@@ -942,6 +930,12 @@
panfrost_bo_unreference(bo);
rsrc->image.data.bo = newbo;
+ /* Swapping out the BO will invalidate batches
+ * accessing this resource, flush them but do
+ * not wait for them.
+ */
+ panfrost_flush_batches_accessing_rsrc(ctx, rsrc, "Resource shadowing");
+
if (!copy_resource &&
drm_is_afbc(rsrc->image.layout.modifier))
panfrost_resource_init_afbc_headers(rsrc);
@@ -970,9 +964,17 @@
}
}
+ /* For access to compressed textures, we want the (x, y, w, h)
+ * region-of-interest in blocks, not pixels. Then we compute the stride
+ * between rows of blocks as the width in blocks times the width per
+ * block, etc.
+ */
+ struct pipe_box box_blocks;
+ u_box_pixels_to_blocks(&box_blocks, box, format);
+
if (rsrc->image.layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
- transfer->base.stride = box->width * bytes_per_pixel;
- transfer->base.layer_stride = transfer->base.stride * box->height;
+ transfer->base.stride = box_blocks.width * bytes_per_block;
+ transfer->base.layer_stride = transfer->base.stride * box_blocks.height;
transfer->map = ralloc_size(transfer, transfer->base.layer_stride * box->depth);
assert(box->depth == 1);
@@ -1013,9 +1015,9 @@
return bo->ptr.cpu
+ rsrc->image.layout.slices[level].offset
- + transfer->base.box.z * transfer->base.layer_stride
- + transfer->base.box.y * rsrc->image.layout.slices[level].line_stride
- + transfer->base.box.x * bytes_per_pixel;
+ + box->z * transfer->base.layer_stride
+ + box_blocks.y * rsrc->image.layout.slices[level].line_stride
+ + box_blocks.x * bytes_per_block;
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_screen.c
^
|
@@ -121,6 +121,7 @@
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
@@ -153,7 +154,6 @@
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -284,6 +284,8 @@
return MAX_VARYING;
/* Removed in v6 (Bifrost) */
+ case PIPE_CAP_GL_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_ALPHA_TEST:
return dev->arch <= 5;
@@ -841,8 +843,6 @@
if (dev->debug & PAN_DBG_NO_AFBC)
dev->has_afbc = false;
- dev->ro = ro;
-
/* Check if we're loading against a supported GPU model. */
switch (dev->gpu_id) {
@@ -862,6 +862,8 @@
return NULL;
}
+ dev->ro = ro;
+
screen->base.destroy = panfrost_destroy_screen;
screen->base.get_name = panfrost_get_name;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
^
|
@@ -120,7 +120,6 @@
/* This transformation needs to be done before any of the IF
* instructions are modified. */
{"transform KILP", 1, 1, rc_transform_KILL, NULL},
- {"unroll loops", 1, is_r500, rc_unroll_loops, NULL},
{"transform loops", 1, !is_r500, rc_transform_loops, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one},
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
^
|
@@ -22,6 +22,7 @@
#include "radeon_compiler.h"
+#include <stdbool.h>
#include <stdio.h>
#include "r300_reg.h"
@@ -559,15 +560,33 @@
struct rc_instruction * LastRead;
};
+static int get_reg(struct radeon_compiler *c, struct temporary_allocation *ta, bool *hwtemps,
+ unsigned int orig)
+{
+ if (!ta[orig].Allocated) {
+ int j;
+ for (j = 0; j < c->max_temp_regs; ++j)
+ {
+ if (!hwtemps[j])
+ break;
+ }
+ ta[orig].Allocated = 1;
+ ta[orig].HwTemp = j;
+ hwtemps[ta[orig].HwTemp] = true;
+ }
+
+ return ta[orig].HwTemp;
+}
+
static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
{
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *inst;
struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
- char hwtemps[RC_REGISTER_MAX_INDEX];
+ bool hwtemps[RC_REGISTER_MAX_INDEX];
struct temporary_allocation * ta;
- unsigned int i, j;
+ unsigned int i;
memset(hwtemps, 0, sizeof(hwtemps));
@@ -638,28 +657,17 @@
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
unsigned int orig = inst->U.I.SrcReg[i].Index;
- inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+ inst->U.I.SrcReg[i].Index = get_reg(c, ta, hwtemps, orig);
if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = 0;
+ hwtemps[ta[orig].HwTemp] = false;
}
}
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
unsigned int orig = inst->U.I.DstReg.Index;
-
- if (!ta[orig].Allocated) {
- for(j = 0; j < c->max_temp_regs; ++j) {
- if (!hwtemps[j])
- break;
- }
- ta[orig].Allocated = 1;
- ta[orig].HwTemp = j;
- hwtemps[ta[orig].HwTemp] = 1;
- }
-
- inst->U.I.DstReg.Index = ta[orig].HwTemp;
+ inst->U.I.DstReg.Index = get_reg(c, ta, hwtemps, orig);
}
}
}
@@ -695,10 +703,10 @@
new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
- memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[i].Index = temp;
inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[i].RelAddr = 0;
}
}
return 1;
@@ -724,10 +732,13 @@
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = tmpreg;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst_mov->U.I.SrcReg[0].Negate = 0;
+ inst_mov->U.I.SrcReg[0].Abs = 0;
- reset_srcreg(&inst->U.I.SrcReg[2]);
inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[2].Index = tmpreg;
+ inst->U.I.SrcReg[2].RelAddr = false;
}
}
@@ -739,10 +750,13 @@
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = tmpreg;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst_mov->U.I.SrcReg[0].Negate = 0;
+ inst_mov->U.I.SrcReg[0].Abs = 0;
- reset_srcreg(&inst->U.I.SrcReg[1]);
inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[1].Index = tmpreg;
+ inst->U.I.SrcReg[1].RelAddr = false;
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/r500_fragprog.c
^
|
@@ -44,16 +44,15 @@
struct rc_instruction * inst_if,
void *data)
{
+ if (inst_if->U.I.Opcode != RC_OPCODE_IF)
+ return 0;
+
struct rc_variable * writer;
struct rc_list * writer_list, * list_ptr;
struct rc_list * var_list = rc_get_variables(c);
unsigned int generic_if = 0;
unsigned int alu_chan;
- if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
- return 0;
- }
-
writer_list = rc_variable_list_get_writers(
var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
if (!writer_list) {
@@ -220,8 +219,6 @@
return 1;
return 0;
- } else if (reg.File == RC_FILE_INLINE) {
- return 1;
} else {
/* ALU instructions support almost everything */
relevant = 0;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
^
|
@@ -253,8 +253,13 @@
if(opcode->HasDstReg){
int src = 0;
unsigned int srcmasks[3];
- rc_compute_sources_for_writemask(ptr,
- ptr->U.I.DstReg.WriteMask, srcmasks);
+ unsigned int writemask = ptr->U.I.DstReg.WriteMask;
+ if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
+ writemask |= RC_MASK_X;
+ else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
+ writemask |= RC_MASK_W;
+
+ rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
for(src=0; src < opcode->NumSrcRegs; src++){
mark_used(&s,
ptr->U.I.SrcReg[src].File,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
^
|
@@ -499,22 +499,6 @@
}
}
-void rc_unroll_loops(struct radeon_compiler *c, void *user)
-{
- struct rc_instruction * inst;
- struct loop_info loop;
-
- for(inst = c->Program.Instructions.Next;
- inst != &c->Program.Instructions; inst = inst->Next) {
-
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
- if (build_loop_info(c, &loop, inst)) {
- try_unroll_loop(c, &loop);
- }
- }
- }
-}
-
void rc_emulate_loops(struct radeon_compiler *c, void *user)
{
struct emulate_loop_state * s = &c->loop_state;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
^
|
@@ -50,8 +50,6 @@
void rc_transform_loops(struct radeon_compiler *c, void *user);
-void rc_unroll_loops(struct radeon_compiler * c, void *user);
-
void rc_emulate_loops(struct radeon_compiler * c, void *user);
#endif /* RADEON_EMULATE_LOOPS_H */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_optimize.c
^
|
@@ -26,6 +26,8 @@
*
*/
+#include "util/u_math.h"
+
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
@@ -653,11 +655,12 @@
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
for(i = 0; i < 4; i++ ) {
+ if (!(inst_add->U.I.DstReg.WriteMask & (1 << i)))
+ continue;
+
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
- if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
- && swz != RC_SWIZZLE_ONE) {
+ if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i))
return 0;
- }
}
/* Check src1. */
@@ -832,8 +835,15 @@
return 0;
}
- /* Rewrite the instructions */
writemask_sum = rc_variable_writemask_sum(writer_list->Item);
+
+ /* rc_normal_rewrite_writemask can't expand a previous writemask to store
+ * more channels replicated.
+ */
+ if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask))
+ return 0;
+
+ /* Rewrite the instructions */
for (var = writer_list->Item; var; var = var->Friend) {
struct rc_variable * writer = var;
unsigned conversion_swizzle = rc_make_conversion_swizzle(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_program_alu.c
^
|
@@ -1215,12 +1215,6 @@
*
* === OR ===
*
- * IF Temp[0].x -\
- * KILL - > KIL -abs(Temp[0].x)
- * ENDIF -/
- *
- * === OR ===
- *
* IF Temp[0].x -> IF Temp[0].x
* ... -> ...
* ELSE -> ELSE
@@ -1265,21 +1259,6 @@
* block, because -0.0 is considered negative. */
inst->U.I.SrcReg[0] =
negate(absolute(if_inst->U.I.SrcReg[0]));
-
- if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
- && inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
-
- /* Optimize the special case:
- * IF Temp[0].x
- * KILP
- * ENDIF
- */
-
- /* Remove IF */
- rc_remove_instruction(inst->Prev);
- /* Remove ENDIF */
- rc_remove_instruction(inst->Next);
- }
}
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
^
|
@@ -257,10 +257,10 @@
if (fc_state.BranchDepth != 0
|| fc_state.LoopDepth != 1) {
lower_endloop(inst, &fc_state);
+ /* Skip the new PRED_RESTORE */
+ inst = inst->Next;
}
fc_state.LoopDepth--;
- /* Skip PRED_RESTORE */
- inst = inst->Next;
break;
case RC_OPCODE_IF:
lower_if(inst, &fc_state);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_vs_draw.c
^
|
@@ -312,15 +312,17 @@
struct r300_vertex_shader *vs)
{
struct draw_context *draw = r300->draw;
- struct pipe_shader_state new_vs;
struct tgsi_shader_info info;
struct vs_transform_context transform;
const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
+ struct pipe_shader_state new_vs = {
+ .type = PIPE_SHADER_IR_TGSI,
+ .tokens = tgsi_alloc_tokens(newLen)
+ };
unsigned i;
tgsi_scan_shader(vs->state.tokens, &info);
- new_vs.tokens = tgsi_alloc_tokens(newLen);
if (new_vs.tokens == NULL)
return;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/navi10-piglit-quick-fail.csv
^
|
@@ -115,10 +115,6 @@
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
spec@egl_ext_protected_content@conformance,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
-spec@ext_framebuffer_multisample@turn-on-off 2,Fail
-spec@ext_framebuffer_multisample@turn-on-off 4,Fail
-spec@ext_framebuffer_multisample@turn-on-off 6,Fail
-spec@ext_framebuffer_multisample@turn-on-off 8,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
spec@ext_texture_integer@fbo-integer,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/radeonsi-stoney-fails.txt
^
|
@@ -54,10 +54,6 @@
spec@egl_khr_surfaceless_context@viewport,Fail
spec@egl_mesa_configless_context@basic,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
-spec@ext_framebuffer_multisample@turn-on-off 2,Fail
-spec@ext_framebuffer_multisample@turn-on-off 4,Fail
-spec@ext_framebuffer_multisample@turn-on-off 6,Fail
-spec@ext_framebuffer_multisample@turn-on-off 8,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/raven-piglit-quick-fail.csv
^
|
@@ -201,10 +201,6 @@
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
spec@egl_ext_protected_content@conformance,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
-spec@ext_framebuffer_multisample@turn-on-off 2,Fail
-spec@ext_framebuffer_multisample@turn-on-off 4,Fail
-spec@ext_framebuffer_multisample@turn-on-off 6,Fail
-spec@ext_framebuffer_multisample@turn-on-off 8,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
spec@ext_texture_integer@fbo-integer,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/sienna_cichlid-piglit-quick-fail.csv
^
|
@@ -116,10 +116,6 @@
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
spec@egl_ext_protected_content@conformance,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
-spec@ext_framebuffer_multisample@turn-on-off 2,Fail
-spec@ext_framebuffer_multisample@turn-on-off 4,Fail
-spec@ext_framebuffer_multisample@turn-on-off 6,Fail
-spec@ext_framebuffer_multisample@turn-on-off 8,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
spec@ext_texture_integer@fbo-integer,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/ci/traces-radeonsi.yml
^
|
@@ -5,27 +5,27 @@
- path: glmark2/desktop:windows=4:effect=blur:blur-radius=5:passes=1:separable=true.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: d8c9bf6295525e65e318adeff02520e2
+ checksum: 740fa8f8e9a9d815cf160b1893370755
- path: glmark2/jellyfish.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: f68bf374e535ad4a43a08786b0d536d8
+ checksum: 5bc7d5c250b7d568313c4afd064082f6
- path: glxgears/glxgears-2.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: eb9b3d497be567f02a6e039fa32f2b13
+ checksum: ef3653f50d4853d3e9cb3244c799565a
- path: 0ad/0ad.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 8fb8cd54f1ff908952fe0b6dd9f28999
+ checksum: 1a089d8584a9e68e7ab08eada954741b
- path: pathfinder/demo.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 8ff636268dfa0d54b6f15d70d15e354d
+ checksum: c81c85f9b247dd1b06c3dd5b669cc283
- path: pathfinder/canvas_moire.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 505b9cad6e65c13463a0786944f8b679
+ checksum: 78dd2357ad6e5ffc049a75bfb11c5497
- path: pathfinder/canvas_text_v2.trace
expectations:
- device: gl-radeonsi-stoney
@@ -33,11 +33,11 @@
- path: gputest/furmark.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 84c499203944cdc59e70450c324bb8df
+ checksum: 4ceea12000bb5995b915228d2d4b49c7
- path: gputest/pixmark-piano.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 58a86d233d03e2a174cb79c16028f916
+ checksum: 86ebe6ff8038975de8724fa9536edb7e
- path: gputest/triangle.trace
expectations:
- device: gl-radeonsi-stoney
@@ -45,47 +45,47 @@
- path: humus/Portals.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: fc7d00efe380cacbd4e9ef9b231aea2f
+ checksum: 5b96333495b794691e4ed071ae92ff19
- path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=false.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 5af6e31cc78320cb3f9db483c7a426e0
+ checksum: 5db05161041946e8971f39f12bbd847c
- path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=true.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 6e9dc5a7dc5a9cbb2b396bfce88a2084
+ checksum: e2154c522fcdb4f43b31b31c17adda74
- path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=subdata:interleave=false.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 35e384f833f37411f15bf8ef80ca1914
+ checksum: 70298e48479147af2d848a5441fb5f47
- path: glmark2/bump:bump-render=height.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 526cf3805b9b64bb8edea1b7d86b9cae
+ checksum: f5129b06e401a5fefa18a9895b18deec
- path: glmark2/bump:bump-render=high-poly.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: e026d36eaa71ecd957b47c7e6a5a100b
+ checksum: 3fc1adf0caa289b3296a80c2c13834ca
- path: glmark2/bump:bump-render=normals.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 4508a1be8c33a63fbfa695b141edf48b
+ checksum: 596fd7a084d3a7a6b61b322593385f45
- path: glmark2/conditionals:vertex-steps=0:fragment-steps=0.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: d12ecac5894705295e4fa076d77a72ab
+ checksum: fb2eda378ace8ca8b81d73d20cbfbbf7
- path: glmark2/conditionals:vertex-steps=0:fragment-steps=5.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 8999ff7eda7d7cf25440b96ab0efd4ee
+ checksum: b8575de0e043f540b12f13209054d000
- path: glmark2/conditionals:vertex-steps=5:fragment-steps=0.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: e65fdae9fe7bbd95c5cc0fb0c3eb7bf4
+ checksum: f70625a1f9bd9d2c211e330032b86f85
- path: glmark2/desktop:windows=4:effect=shadow.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 2aff87605464dd3f61aefd4e1dc0bffd
+ checksum: 384015de55daf7dd406c9463576018e9
- path: glmark2/effect2d:kernel=0,1,0;1,-4,1;0,1,0;.trace
expectations:
- device: gl-radeonsi-stoney
@@ -97,87 +97,87 @@
- path: glmark2/function:fragment-steps=5:fragment-complexity=low.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 13149880306d2183703a5c327f4d750a
+ checksum: 9efd8bb5df15f9483a18a00f9650caa9
- path: glmark2/function:fragment-steps=5:fragment-complexity=medium.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: c6983ffb3a74e234f84e5d817f876f54
+ checksum: 9bdd506c0404cb11a7148cb08b429d1b
- path: glmark2/build:use-vbo=false.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 203a0205580b2c39ed8dcbed57b18f3c
+ checksum: 506b1910317b04e5d32aacf2bd70bd0d
- path: glmark2/build:use-vbo=true.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 437034f264c469757683e51e3e25beca
+ checksum: 793dc29115ae442b279276adb89d0999
- path: glmark2/ideas:speed=10000.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 381d973b00b46fcc15f72808eabb6237
+ checksum: 1ae057093620f868aad846167f04c6e0
- path: glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-loop=false.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 5a32f7917c130581fae23e58b71fd740
+ checksum: 6fb2f9bce414879e3751bb51d1a8d481
- path: glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-uniform=false.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 20586c936a7051ce63503df6f9785d01
+ checksum: 27fabda45ca2a989c21b4ec386a2e8f6
- path: glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-uniform=true.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 255e412701afdc4a7b62654e93b92cc9
+ checksum: 05ac8be6e2e0c03ea1caec85f037cddd
- path: glmark2/pulsar:quads=5:texture=false:light=false.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 42f913c6119a685da4450ea116060614
+ checksum: 0b62b9c04e4c00f44eba64b366c47783
- path: glmark2/refract.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 41d105bdd10a354f6d161c67f715b7f9
+ checksum: c711f3a07f6aa9e0f19c544c6d7c2000
- path: glmark2/shading:shading=blinn-phong-inf.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 4a2cf8a13b248f470e58f785d0a9207d
+ checksum: 429c6bbdf99d573cc4eaaee3c0471257
- path: glmark2/shading:shading=cel.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 8325ce4073135c03eec241087e51a584
+ checksum: 330d9a8375970e42ba5ddc3142dc6477
- path: glmark2/shading:shading=gouraud.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: fbe5b7d038866f6cd4fc801b062e4ce5
+ checksum: e22908309d41af8c9753c5c7cae73b29
- path: glmark2/shading:shading=phong.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 65f9468a37d683b4c1f38d34f09a97db
+ checksum: 2ac9f1b6ba39f8924b374d18181edeeb
- path: glmark2/shadow.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: abd705b0ae76cf6f19905bfea1d3db76
+ checksum: 9215a1525dfe5b12999652b3a3ba05d8
- path: glmark2/terrain.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 59690f1438a1e44fc655d16ce8bb348b
+ checksum: 80a1bba6ff969c9a82c68de0306f2b61
- path: glmark2/texture:texture-filter=linear.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 54bf32b499f3ebfe0e727e5716e54b1a
+ checksum: 928479421abda4823a673393cd59ff81
- path: glmark2/texture:texture-filter=mipmap.trace
expectations:
- device: gl-radeonsi-stoney
- checksum: 3a3abce164eef2be10f58604b22583f2
+ checksum: cb94bca58ed8f41c5f6f6dda3fb15600
- path: glmark2/texture:texture-filter=nearest.trace
expectations:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_blit.c
^
|
@@ -395,11 +395,12 @@
si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, false, true /* no DCC */);
}
-static void si_decompress_sampler_depth_textures(struct si_context *sctx,
+static bool si_decompress_sampler_depth_textures(struct si_context *sctx,
struct si_samplers *textures)
{
unsigned i;
unsigned mask = textures->needs_depth_decompress_mask;
+ bool need_flush = false;
while (mask) {
struct pipe_sampler_view *view;
@@ -418,7 +419,14 @@
si_decompress_depth(sctx, tex, sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
view->u.tex.first_level, view->u.tex.last_level, 0,
util_max_layer(&tex->buffer.b.b, view->u.tex.first_level));
+
+ if (tex->need_flush_after_depth_decompression) {
+ need_flush = true;
+ tex->need_flush_after_depth_decompression = false;
+ }
}
+
+ return need_flush;
}
static void si_blit_decompress_color(struct si_context *sctx, struct si_texture *tex,
@@ -757,6 +765,7 @@
void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
{
unsigned compressed_colortex_counter, mask;
+ bool need_flush = false;
if (sctx->blitter_running)
return;
@@ -774,7 +783,7 @@
unsigned i = u_bit_scan(&mask);
if (sctx->samplers[i].needs_depth_decompress_mask) {
- si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
+ need_flush |= si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
}
if (sctx->samplers[i].needs_color_decompress_mask) {
si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
@@ -784,6 +793,16 @@
}
}
+ if (sctx->chip_class == GFX10_3 && need_flush) {
+ /* This fixes a corruption with the following sequence:
+ * - fast clear depth
+ * - decompress depth
+ * - draw
+ * (see https://gitlab.freedesktop.org/drm/amd/-/issues/1810#note_1170171)
+ */
+ sctx->b.flush(&sctx->b, NULL, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW);
+ }
+
if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) {
if (sctx->uses_bindless_samplers)
si_decompress_resident_textures(sctx);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_clear.c
^
|
@@ -352,7 +352,7 @@
return false;
dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset;
- clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size * num_layers;
+ clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size;
}
si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value);
@@ -829,6 +829,8 @@
clear_value = !zstex->htile_stencil_disabled ? 0xfffff30f : 0xfffc000f;
}
+ zstex->need_flush_after_depth_decompression = sctx->chip_class == GFX10_3;
+
assert(num_clears < ARRAY_SIZE(info));
si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b,
zstex->surface.meta_offset, zstex->surface.meta_size, clear_value);
@@ -934,6 +936,8 @@
}
}
+ zstex->need_flush_after_depth_decompression = update_db_depth_clear && sctx->chip_class == GFX10_3;
+
/* Update DB_DEPTH_CLEAR. */
if (update_db_depth_clear &&
zstex->depth_clear_value[level] != (float)depth) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_perfcounter.c
^
|
@@ -158,7 +158,10 @@
radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
radeon_set_uconfig_reg(
R_036020_CP_PERFMON_CNTL,
- S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
+ S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ?
+ V_036020_CP_PERFMON_STATE_START_COUNTING :
+ V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
+ S_036020_PERFMON_SAMPLE_ENABLE(1));
radeon_end();
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pipe.h
^
|
@@ -374,6 +374,7 @@
bool db_compatible : 1;
bool can_sample_z : 1;
bool can_sample_s : 1;
+ bool need_flush_after_depth_decompression: 1;
/* We need to track DCC dirtiness, because st/dri usually calls
* flush_resource twice per frame (not a bug) and we don't wanna
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pm4.c
^
|
@@ -137,7 +137,7 @@
* added to the buffer list on the next draw call.
*/
for (unsigned i = 0; i < SI_NUM_STATES; i++) {
- struct si_pm4_state *state = sctx->emitted.array[i];
+ struct si_pm4_state *state = sctx->queued.array[i];
if (state && state->is_shader) {
sctx->emitted.array[i] = NULL;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c
^
|
@@ -115,7 +115,7 @@
info->input[loc].semantic = semantic + i;
- if (semantic == SYSTEM_VALUE_PRIMITIVE_ID)
+ if (semantic == VARYING_SLOT_PRIMITIVE_ID)
info->input[loc].interpolate = INTERP_MODE_FLAT;
else
info->input[loc].interpolate = interp;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state.c
^
|
@@ -2753,7 +2753,6 @@
bool old_has_stencil =
old_has_zsbuf &&
((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
- bool unbound = false;
int i;
/* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs
@@ -2778,16 +2777,6 @@
if (!surf->dcc_incompatible)
continue;
- /* Since the DCC decompression calls back into set_framebuffer-
- * _state, we need to unbind the framebuffer, so that
- * vi_separate_dcc_stop_query isn't called twice with the same
- * color buffer.
- */
- if (!unbound) {
- util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);
- unbound = true;
- }
-
if (vi_dcc_enabled(tex, surf->base.u.tex.level))
if (!si_texture_disable_dcc(sctx, tex))
si_decompress_dcc(sctx, tex);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_draw.cpp
^
|
@@ -1175,7 +1175,9 @@
min_vertex_count);
/* Draw state. */
- if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
+ if (ia_multi_vgt_param != sctx->last_multi_vgt_param ||
+ /* Workaround for SpecviewPerf13 Catia hang on GFX9. */
+ (GFX_VERSION == GFX9 && prim != sctx->last_prim)) {
radeon_begin(cs);
if (GFX_VERSION == GFX9)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c
^
|
@@ -366,7 +366,7 @@
S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
radeon_opt_set_context_reg(
ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
- S_028BE4_PIX_CENTER(rs->half_pixel_center) |
+ S_028BE4_PIX_CENTER(rs->half_pixel_center) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode));
radeon_end_update_context_roll(ctx);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_texture.c
^
|
@@ -123,7 +123,14 @@
unsigned *layer_stride)
{
if (sscreen->info.chip_class >= GFX9) {
- *stride = tex->surface.u.gfx9.surf_pitch * tex->surface.bpe;
+ unsigned pitch;
+ if (tex->surface.is_linear) {
+ pitch = tex->surface.u.gfx9.pitch[level];
+ } else {
+ pitch = tex->surface.u.gfx9.surf_pitch;
+ }
+
+ *stride = pitch * tex->surface.bpe;
*layer_stride = tex->surface.u.gfx9.surf_slice_size;
if (!box)
@@ -133,9 +140,8 @@
* of mipmap levels. */
return tex->surface.u.gfx9.surf_offset + box->z * tex->surface.u.gfx9.surf_slice_size +
tex->surface.u.gfx9.offset[level] +
- (box->y / tex->surface.blk_h * tex->surface.u.gfx9.surf_pitch +
- box->x / tex->surface.blk_w) *
- tex->surface.bpe;
+ (box->y / tex->surface.blk_h * pitch + box->x / tex->surface.blk_w) *
+ tex->surface.bpe;
} else {
*stride = tex->surface.u.legacy.level[level].nblk_x * tex->surface.bpe;
assert((uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
@@ -1647,6 +1653,7 @@
resource.array_size = texture->array_size;
resource.last_level = texture->last_level;
resource.nr_samples = texture->nr_samples;
+ resource.nr_storage_samples = texture->nr_storage_samples;
resource.usage = PIPE_USAGE_DEFAULT;
resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
resource.flags = texture->flags | SI_RESOURCE_FLAG_FLUSHED_DEPTH;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/ci/softpipe-fails.txt
^
|
@@ -440,7 +440,6 @@
dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.read_pixels_fbo_format_mismatch,Fail
dEQP-GLES31.functional.debug.negative_coverage.log.buffer.read_pixels_fbo_format_mismatch,Fail
dEQP-GLES31.functional.draw_base_vertex.draw_elements_instanced_base_vertex.line_loop.instanced_attributes,Fail
-dEQP-GLES31.functional.draw_buffers_indexed.overwrite_indexed.common_color_mask_buffer_color_mask,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.0,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.1,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.10,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/ci/softpipe-flakes.txt
^
|
@@ -1,3 +1,2 @@
-dEQP-GLES31.functional.draw_buffers_indexed.overwrite_indexed.common_color_mask_buffer_color_mask
dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.geometry.isampler2darray
dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.geometry.isampler3d
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/sp_quad_blend.c
^
|
@@ -1005,7 +1005,7 @@
rebase_colors(bqs->base_format[cbuf], quadColor);
if (blend->rt[blend_buf].colormask != 0xf)
- colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
+ colormask_quad( blend->rt[blend_buf].colormask, quadColor, dest);
/* Output color values
*/
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
^
|
@@ -1450,7 +1450,7 @@
need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
unsigned index)
{
- if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY))
+ if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
&& emit->current_loop_depth == 0) {
if (!emit->temp_map[index].initialized &&
emit->temp_map[index].index < emit->num_shader_temps) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/swr/swr_draw.cpp
^
|
@@ -62,7 +62,7 @@
if (!indirect &&
!info->primitive_restart &&
- !u_trim_pipe_prim(info->mode, (unsigned*)&draws[0].count))
+ !u_trim_pipe_prim((enum pipe_prim_type)info->mode, (unsigned*)&draws[0].count))
return;
if (!swr_check_render_cond(pipe))
@@ -102,7 +102,7 @@
STREAMOUT_COMPILE_STATE state = {0};
struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
- state.numVertsPerPrim = u_vertices_per_prim(info->mode);
+ state.numVertsPerPrim = u_vertices_per_prim((enum pipe_prim_type)info->mode);
uint32_t offsets[MAX_SO_STREAMS] = {0};
uint32_t num = 0;
@@ -221,7 +221,7 @@
if (ctx->gs)
topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
else
- topology = info->mode;
+ topology = (enum pipe_prim_type)info->mode;
switch (topology) {
case PIPE_PRIM_TRIANGLE_FAN:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/swr/swr_state.cpp
^
|
@@ -1731,7 +1731,7 @@
/* Has to be before fragment shader, since it sets SWR_NEW_FS */
if (p_draw_info) {
bool new_prim_is_poly =
- (u_reduced_prim(p_draw_info->mode) == PIPE_PRIM_TRIANGLES) &&
+ (u_reduced_prim((enum pipe_prim_type)p_draw_info->mode) == PIPE_PRIM_TRIANGLES) &&
(ctx->derived.rastState.fillMode == SWR_FILLMODE_SOLID);
if (new_prim_is_poly != ctx->poly_stipple.prim_is_poly) {
ctx->dirty |= SWR_NEW_FS;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_context.c
^
|
@@ -567,10 +567,22 @@
{
struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
struct tegra_context *context = to_tegra_context(pcontext);
+ struct tegra_sampler_view *view;
unsigned i;
- for (i = 0; i < num_views; i++)
+ for (i = 0; i < num_views; i++) {
+ /* adjust private reference count */
+ view = to_tegra_sampler_view(pviews[i]);
+ if (view) {
+ view->refcount--;
+ if (!view->refcount) {
+ view->refcount = 100000000;
+ p_atomic_add(&view->gpu->reference.count, view->refcount);
+ }
+ }
+
views[i] = tegra_sampler_view_unwrap(pviews[i]);
+ }
context->gpu->set_sampler_views(context->gpu, shader, start_slot,
num_views, unbind_num_trailing_slots,
@@ -836,15 +848,19 @@
if (!view)
return NULL;
- view->gpu = context->gpu->create_sampler_view(context->gpu, resource->gpu,
- template);
- memcpy(&view->base, view->gpu, sizeof(*view->gpu));
+ view->base = *template;
+ view->base.context = pcontext;
/* overwrite to prevent reference from being released */
view->base.texture = NULL;
-
pipe_reference_init(&view->base.reference, 1);
pipe_resource_reference(&view->base.texture, presource);
- view->base.context = pcontext;
+
+ view->gpu = context->gpu->create_sampler_view(context->gpu, resource->gpu,
+ template);
+
+ /* use private reference count */
+ view->gpu->reference.count += 100000000;
+ view->refcount = 100000000;
return &view->base;
}
@@ -856,6 +872,8 @@
struct tegra_sampler_view *view = to_tegra_sampler_view(pview);
pipe_resource_reference(&view->base.texture, NULL);
+ /* adjust private reference count */
+ p_atomic_add(&view->gpu->reference.count, -view->refcount);
pipe_sampler_view_reference(&view->gpu, NULL);
free(view);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_context.h
^
|
@@ -47,6 +47,7 @@
struct tegra_sampler_view {
struct pipe_sampler_view base;
struct pipe_sampler_view *gpu;
+ unsigned int refcount;
};
static inline struct tegra_sampler_view *
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_resource.h
^
|
@@ -31,6 +31,7 @@
struct tegra_resource {
struct pipe_resource base;
struct pipe_resource *gpu;
+ unsigned int refcount;
uint64_t modifier;
uint32_t stride;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/tegra/tegra_screen.c
^
|
@@ -245,6 +245,10 @@
pipe_reference_init(&resource->base.reference, 1);
resource->base.screen = &screen->base;
+ /* use private reference count for wrapped resources */
+ resource->gpu->reference.count += 100000000;
+ resource->refcount = 100000000;
+
return &resource->base;
destroy:
@@ -352,6 +356,8 @@
{
struct tegra_resource *resource = to_tegra_resource(presource);
+ /* adjust private reference count */
+ p_atomic_add(&resource->gpu->reference.count, -resource->refcount);
pipe_resource_reference(&resource->gpu, NULL);
free(resource);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/virgl/ci/traces-virgl.yml
^
|
@@ -17,11 +17,11 @@
- path: gputest/furmark.trace
expectations:
- device: gl-virgl
- checksum: d5682aaa762a4849f0cae1692623bdcb
+ checksum: a38d4c123d13c5ccd3a86f0663fe1aab
- path: gputest/pixmark-piano.trace
expectations:
- device: gl-virgl
- checksum: 1bcded27a6ba04fe0f76ff997b98dbc3
+ checksum: b580ae01560380461a103975cab77393
- path: gputest/triangle.trace
expectations:
- device: gl-virgl
@@ -121,7 +121,7 @@
- path: glmark2/refract.trace
expectations:
- device: gl-virgl
- checksum: b1332df324d0fc1db22b362231d3ed01
+ checksum: cdadfee0518b964433d80c01329ec191
- path: glmark2/shading:shading=blinn-phong-inf.trace
expectations:
- device: gl-virgl
@@ -178,7 +178,7 @@
- path: gputest/plot3d.trace
expectations:
- device: gl-virgl
- checksum: a1af286874f7060171cb3ca2e765c448
+ checksum: 7e818a6070005056700e5ef8590a3f8e
# Times out
# - path: gputest/tessmark.trace
# expectations:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt
^
|
@@ -16,17 +16,11 @@
dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail
dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail
dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail
-dEQP-GLES3.functional.multisample.fbo_4_samples.proportionality_sample_coverage,Fail
-dEQP-GLES3.functional.multisample.fbo_4_samples.sample_coverage_invert,Fail
-dEQP-GLES3.functional.multisample.fbo_max_samples.proportionality_sample_coverage,Fail
-dEQP-GLES3.functional.multisample.fbo_max_samples.sample_coverage_invert,Fail
-KHR-GL32.transform_feedback.capture_geometry_separate_test,Fail
KHR-GL32.transform_feedback.capture_vertex_interleaved_test,Fail
KHR-GL32.transform_feedback.capture_vertex_separate_test,Fail
KHR-GL32.transform_feedback.discard_vertex_test,Fail
KHR-GL32.transform_feedback.draw_xfb_instanced_test,Crash
KHR-GL32.transform_feedback.draw_xfb_stream_instanced_test,Crash
-KHR-GL32.transform_feedback.query_geometry_separate_test,Fail
KHR-GL32.transform_feedback.query_vertex_interleaved_test,Fail
KHR-GL32.transform_feedback.query_vertex_separate_test,Fail
dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
@@ -162,7 +156,6 @@
spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE16F_ARB,Fail
spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE32F_ARB,Fail
spec@arb_texture_float@fbo-blending-formats@GL_RGB32F,Fail
-spec@arb_texture_rg@multisample-fast-clear gl_arb_texture_rg-int,Fail
spec@arb_texture_view@rendering-formats,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16I,Fail
@@ -177,16 +170,12 @@
spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail
@@ -225,9 +214,6 @@
spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail
spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail
spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail
@@ -254,20 +240,15 @@
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGB10_A2,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32F,Fail
@@ -307,13 +288,9 @@
spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16,Fail
@@ -337,12 +314,9 @@
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16F,Fail
@@ -353,9 +327,6 @@
spec@arb_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16_SNORM,Fail
@@ -367,9 +338,6 @@
spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8UI,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8,Fail
@@ -389,11 +357,8 @@
spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16_SNORM,Fail
@@ -423,16 +388,11 @@
spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGB10_A2,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16F,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8I,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8_SNORM,Fail
spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32F,Fail
@@ -470,8 +430,6 @@
spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
-spec@ext_framebuffer_multisample@sample-coverage 2 non-inverted,Fail
-spec@ext_framebuffer_multisample@sample-coverage 4 non-inverted,Fail
spec@ext_framebuffer_object@fbo-blending-formats,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY12,Fail
@@ -511,7 +469,6 @@
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-unsupported_format,Fail
spec@ext_packed_float@query-rgba-signed-components,Fail
-spec@ext_texture_integer@multisample-fast-clear gl_ext_texture_integer,Fail
spec@ext_texture_snorm@fbo-blending-formats,Fail
spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY16_SNORM,Fail
spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY8_SNORM,Fail
@@ -591,16 +548,12 @@
spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail
spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail
@@ -639,9 +592,6 @@
spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail
spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail
spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail
spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail
spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail
spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail
@@ -668,20 +618,15 @@
spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail
spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail
spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/meson.build
^
|
@@ -71,7 +71,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
zink_c_args = []
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
^
|
@@ -1408,16 +1408,16 @@
emit_atomic(struct ntv_context *ctx, SpvId op, SpvId type, SpvId src0, SpvId src1, SpvId src2)
{
if (op == SpvOpAtomicLoad)
- return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup),
+ return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice),
emit_uint_const(ctx, 32, 0));
if (op == SpvOpAtomicCompareExchange)
- return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup),
+ return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice),
emit_uint_const(ctx, 32, 0),
emit_uint_const(ctx, 32, 0),
/* these params are intentionally swapped */
src2, src1);
- return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup),
+ return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice),
emit_uint_const(ctx, 32, 0), src1);
}
@@ -1742,7 +1742,13 @@
BUILTIN_UNOP(nir_op_ufind_msb, GLSLstd450FindUMsb)
BUILTIN_UNOP(nir_op_find_lsb, GLSLstd450FindILsb)
BUILTIN_UNOP(nir_op_ifind_msb, GLSLstd450FindSMsb)
- BUILTIN_UNOPF(nir_op_pack_half_2x16, GLSLstd450PackHalf2x16)
+
+ case nir_op_pack_half_2x16:
+ assert(nir_op_infos[alu->op].num_inputs == 1);
+ result = emit_builtin_unop(ctx, GLSLstd450PackHalf2x16, get_dest_type(ctx, &alu->dest.dest, nir_type_uint), src[0]);
+ force_float = true;
+ break;
+
BUILTIN_UNOPF(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16)
BUILTIN_UNOPF(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32)
#undef BUILTIN_UNOP
@@ -2481,12 +2487,12 @@
}
static void
-handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2)
+handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2, nir_alu_type type)
{
- SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32);
+ SpvId dest_type = get_dest_type(ctx, &intr->dest, type);
SpvId result = emit_atomic(ctx, get_atomic_op(intr->intrinsic), dest_type, ptr, param, param2);
assert(result);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ store_dest(ctx, &intr->dest, result, type);
}
static void
@@ -2525,7 +2531,7 @@
if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
param2 = get_src(ctx, &intr->src[3]);
- handle_atomic_op(ctx, intr, ptr, param, param2);
+ handle_atomic_op(ctx, intr, ptr, param, param2, nir_type_uint32);
}
static void
@@ -2546,7 +2552,7 @@
if (intr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
param2 = get_src(ctx, &intr->src[2]);
- handle_atomic_op(ctx, intr, ptr, param, param2);
+ handle_atomic_op(ctx, intr, ptr, param, param2, nir_type_uint32);
}
static void
@@ -2681,13 +2687,24 @@
type_to_dim(glsl_get_sampler_dim(type), &is_ms);
SpvId sample = is_ms ? get_src(ctx, &intr->src[2]) : emit_uint_const(ctx, 32, 0);
SpvId coord = get_image_coords(ctx, type, &intr->src[1]);
- SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type));
+ enum glsl_base_type glsl_type = glsl_get_sampler_result_type(type);
+ SpvId base_type = get_glsl_basetype(ctx, glsl_type);
SpvId texel = spirv_builder_emit_image_texel_pointer(&ctx->builder, base_type, img_var, coord, sample);
SpvId param2 = 0;
- if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
+ /* The type of Value must be the same as Result Type.
+ * The type of the value pointed to by Pointer must be the same as Result Type.
+ */
+ nir_alu_type ntype = nir_get_nir_type_for_glsl_base_type(glsl_type);
+ SpvId cast_type = get_dest_type(ctx, &intr->dest, ntype);
+ param = emit_bitcast(ctx, cast_type, param);
+
+ if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
param2 = get_src(ctx, &intr->src[4]);
- handle_atomic_op(ctx, intr, texel, param, param2);
+ param2 = emit_bitcast(ctx, cast_type, param2);
+ }
+
+ handle_atomic_op(ctx, intr, texel, param, param2, ntype);
}
static void
@@ -2869,7 +2886,7 @@
case nir_intrinsic_memory_barrier:
spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup,
SpvMemorySemanticsImageMemoryMask | SpvMemorySemanticsUniformMemoryMask |
- SpvMemorySemanticsMakeVisibleMask | SpvMemorySemanticsAcquireReleaseMask);
+ SpvMemorySemanticsAcquireReleaseMask);
break;
case nir_intrinsic_memory_barrier_image:
@@ -3249,13 +3266,16 @@
lod = emit_float_const(ctx, 32, 0.0);
if (tex->op == nir_texop_txs) {
SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load);
- /* Additionally, if its Dim is 1D, 2D, 3D, or Cube,
+ /* Its Dim operand must be one of 1D, 2D, 3D, or Cube
+ * - OpImageQuerySizeLod specification
+ *
+ * Additionally, if its Dim is 1D, 2D, 3D, or Cube,
* it must also have either an MS of 1 or a Sampled of 0 or 2.
* - OpImageQuerySize specification
*
* all spirv samplers use these types
*/
- if (tex->sampler_dim != GLSL_SAMPLER_DIM_MS && !lod)
+ if (!lod && tex_instr_is_lod_allowed(tex))
lod = emit_uint_const(ctx, 32, 0);
SpvId result = spirv_builder_emit_image_query_size(&ctx->builder,
dest_type, image,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_blit.c
^
|
@@ -357,12 +357,18 @@
zink_blit_region_fills(struct u_rect region, unsigned width, unsigned height)
{
struct u_rect intersect = {0, width, 0, height};
+ struct u_rect r = {
+ MIN2(region.x0, region.x1),
+ MAX2(region.x0, region.x1),
+ MIN2(region.y0, region.y1),
+ MAX2(region.y0, region.y1),
+ };
- if (!u_rect_test_intersection(®ion, &intersect))
+ if (!u_rect_test_intersection(&r, &intersect))
/* is this even a thing? */
return false;
- u_rect_find_intersection(®ion, &intersect);
+ u_rect_find_intersection(&r, &intersect);
if (intersect.x0 != 0 || intersect.y0 != 0 ||
intersect.x1 != width || intersect.y1 != height)
return false;
@@ -373,11 +379,23 @@
bool
zink_blit_region_covers(struct u_rect region, struct u_rect covers)
{
+ struct u_rect r = {
+ MIN2(region.x0, region.x1),
+ MAX2(region.x0, region.x1),
+ MIN2(region.y0, region.y1),
+ MAX2(region.y0, region.y1),
+ };
+ struct u_rect c = {
+ MIN2(covers.x0, covers.x1),
+ MAX2(covers.x0, covers.x1),
+ MIN2(covers.y0, covers.y1),
+ MAX2(covers.y0, covers.y1),
+ };
struct u_rect intersect;
- if (!u_rect_test_intersection(®ion, &covers))
+ if (!u_rect_test_intersection(&r, &c))
return false;
- u_rect_union(&intersect, ®ion, &covers);
- return intersect.x0 == covers.x0 && intersect.y0 == covers.y0 &&
- intersect.x1 == covers.x1 && intersect.y1 == covers.y1;
+ u_rect_union(&intersect, &r, &c);
+ return intersect.x0 == c.x0 && intersect.y0 == c.y0 &&
+ intersect.x1 == c.x1 && intersect.y1 == c.y1;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_bo.c
^
|
@@ -259,7 +259,7 @@
}
/* all non-suballocated bo can cache */
- init_pb_cache = true;
+ init_pb_cache = !pNext;
bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
if (!bo) {
@@ -278,7 +278,7 @@
simple_mtx_init(&bo->lock, mtx_plain);
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment_log2 = util_logbase2(alignment);
- bo->base.size = size;
+ bo->base.size = mai.allocationSize;
bo->base.vtbl = &bo_vtbl;
bo->base.placement = vk_domain_from_heap(heap);
bo->base.usage = flags;
@@ -347,7 +347,7 @@
size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE,
- bo->base.placement, ZINK_ALLOC_NO_SUBALLOC, NULL);
+ ZINK_HEAP_DEVICE_LOCAL, ZINK_ALLOC_NO_SUBALLOC, NULL);
if (!buf) {
FREE(best_backing->chunks);
FREE(best_backing);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_compiler.c
^
|
@@ -379,7 +379,7 @@
.lower_unpack_32_2x16_split = true,
.lower_vector_cmp = true,
.lower_int64_options = 0,
- .lower_doubles_options = ~nir_lower_fp64_full_software,
+ .lower_doubles_options = 0,
.lower_uniforms_to_ubo = true,
.has_fsub = true,
.has_isub = true,
@@ -397,6 +397,21 @@
screen->nir_options.lower_flrp64 = true;
screen->nir_options.lower_ffma64 = true;
}
+
+ /*
+ The OpFRem and OpFMod instructions use cheap approximations of remainder,
+ and the error can be large due to the discontinuity in trunc() and floor().
+ This can produce mathematically unexpected results in some cases, such as
+ FMod(x,x) computing x rather than 0, and can also cause the result to have
+ a different sign than the infinitely precise result.
+
+ -Table 84. Precision of core SPIR-V Instructions
+ * for drivers that are known to have imprecise fmod for doubles, lower dmod
+ */
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
+ screen->nir_options.lower_doubles_options = nir_lower_dmod;
}
const void *
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_context.c
^
|
@@ -73,8 +73,18 @@
ALWAYS_INLINE static void
check_resource_for_batch_ref(struct zink_context *ctx, struct zink_resource *res)
{
- if (!zink_resource_has_binds(res))
- zink_batch_reference_resource(&ctx->batch, res);
+ if (!zink_resource_has_binds(res)) {
+ /* avoid desync between usage and tracking:
+ * - if usage exists, it must be removed before the context is destroyed
+ * - having usage does not imply having tracking
+ * - if tracking will be added here, also reapply usage to avoid dangling usage once tracking is removed
+ * TODO: somehow fix this for perf because it's an extra hash lookup
+ */
+ if (res->obj->bo->reads || res->obj->bo->writes)
+ zink_batch_reference_resource_rw(&ctx->batch, res, !!res->obj->bo->writes);
+ else
+ zink_batch_reference_resource(&ctx->batch, res);
+ }
}
static void
@@ -100,11 +110,14 @@
pipe_surface_release(&ctx->base, &ctx->dummy_surface[i]);
zink_buffer_view_reference(screen, &ctx->dummy_bufferview, NULL);
- zink_descriptors_deinit_bindless(ctx);
+ if (ctx->dd)
+ zink_descriptors_deinit_bindless(ctx);
simple_mtx_destroy(&ctx->batch_mtx);
- zink_clear_batch_state(ctx, ctx->batch.state);
- zink_batch_state_destroy(screen, ctx->batch.state);
+ if (ctx->batch.state) {
+ zink_clear_batch_state(ctx, ctx->batch.state);
+ zink_batch_state_destroy(screen, ctx->batch.state);
+ }
struct zink_batch_state *bs = ctx->batch_states;
while (bs) {
struct zink_batch_state *bs_next = bs->next;
@@ -149,7 +162,8 @@
_mesa_hash_table_destroy(ctx->render_pass_cache, NULL);
slab_destroy_child(&ctx->transfer_pool_unsync);
- screen->descriptors_deinit(ctx);
+ if (ctx->dd)
+ screen->descriptors_deinit(ctx);
zink_descriptor_layouts_deinit(ctx);
@@ -595,8 +609,10 @@
struct zink_sampler_state *sampler = sampler_state;
struct zink_batch *batch = &zink_context(pctx)->batch;
zink_descriptor_set_refs_clear(&sampler->desc_set_refs, sampler_state);
- util_dynarray_append(&batch->state->zombie_samplers, VkSampler,
- sampler->sampler);
+ /* may be called if context_create fails */
+ if (batch->state)
+ util_dynarray_append(&batch->state->zombie_samplers, VkSampler,
+ sampler->sampler);
if (sampler->custom_border_color)
p_atomic_dec(&zink_screen(pctx->screen)->cur_custom_border_color_samplers);
FREE(sampler);
@@ -637,6 +653,9 @@
assert(bvci.format);
bvci.offset = offset;
bvci.range = !offset && range == res->base.b.width0 ? VK_WHOLE_SIZE : range;
+ uint32_t clamp = util_format_get_blocksize(format) * screen->info.props.limits.maxTexelBufferElements;
+ if (bvci.range == VK_WHOLE_SIZE && res->base.b.width0 > clamp)
+ bvci.range = clamp;
bvci.flags = 0;
return bvci;
}
@@ -736,10 +755,24 @@
ivci.subresourceRange.aspectMask = sampler_aspect_from_format(state->format);
/* samplers for stencil aspects of packed formats need to always use stencil swizzle */
if (ivci.subresourceRange.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- ivci.components.r = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r));
- ivci.components.g = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g));
- ivci.components.b = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b));
- ivci.components.a = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a));
+ if (sampler_view->base.swizzle_r == PIPE_SWIZZLE_0 &&
+ sampler_view->base.swizzle_g == PIPE_SWIZZLE_0 &&
+ sampler_view->base.swizzle_b == PIPE_SWIZZLE_0 &&
+ sampler_view->base.swizzle_a == PIPE_SWIZZLE_X) {
+ /*
+ * When the state tracker asks for 000x swizzles, this is depth mode GL_ALPHA,
+ * however with the single dref fetch this will fail, so just spam all the channels.
+ */
+ ivci.components.r = VK_COMPONENT_SWIZZLE_R;
+ ivci.components.g = VK_COMPONENT_SWIZZLE_R;
+ ivci.components.b = VK_COMPONENT_SWIZZLE_R;
+ ivci.components.a = VK_COMPONENT_SWIZZLE_R;
+ } else {
+ ivci.components.r = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r));
+ ivci.components.g = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g));
+ ivci.components.b = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b));
+ ivci.components.a = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a));
+ }
} else {
/* if we have e.g., R8G8B8X8, then we have to ignore alpha since we're just emulating
* these formats
@@ -903,36 +936,9 @@
return;
struct zink_resource *res = zink_resource(ctx->vertex_buffers[slot].buffer.resource);
res->vbo_bind_mask &= ~BITFIELD_BIT(slot);
- ctx->vbufs[slot] = VK_NULL_HANDLE;
- ctx->vbuf_offsets[slot] = 0;
update_res_bind_count(ctx, res, false, true);
}
-ALWAYS_INLINE static struct zink_resource *
-set_vertex_buffer_clamped(struct zink_context *ctx, unsigned slot)
-{
- const struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[slot];
- struct zink_resource *res = zink_resource(ctx_vb->buffer.resource);
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (ctx_vb->buffer_offset > screen->info.props.limits.maxVertexInputAttributeOffset) {
- /* buffer offset exceeds maximum: make a tmp buffer at this offset */
- ctx->vbufs[slot] = zink_resource_tmp_buffer(screen, res, ctx_vb->buffer_offset, 0, &ctx->vbuf_offsets[slot]);
- util_dynarray_append(&res->obj->tmp, VkBuffer, ctx->vbufs[slot]);
- /* the driver is broken and sets a min alignment that's larger than its max offset: rebind as staging buffer */
- if (unlikely(ctx->vbuf_offsets[slot] > screen->info.props.limits.maxVertexInputAttributeOffset)) {
- static bool warned = false;
- if (!warned)
- debug_printf("zink: this vulkan driver is BROKEN! maxVertexInputAttributeOffset < VkMemoryRequirements::alignment\n");
- warned = true;
- }
- } else {
- ctx->vbufs[slot] = res->obj->buffer;
- ctx->vbuf_offsets[slot] = ctx_vb->buffer_offset;
- }
- assert(ctx->vbufs[slot]);
- return res;
-}
-
static void
zink_set_vertex_buffers(struct pipe_context *pctx,
unsigned start_slot,
@@ -970,9 +976,9 @@
/* always barrier before possible rebind */
zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
- set_vertex_buffer_clamped(ctx, start_slot + i);
- } else
- enabled_buffers &= ~BITFIELD_BIT(i);
+ } else {
+ enabled_buffers &= ~BITFIELD_BIT(start_slot + i);
+ }
}
} else {
if (need_state_change)
@@ -1105,7 +1111,7 @@
update_res_bind_count(ctx, new_res, shader == PIPE_SHADER_COMPUTE, false);
}
zink_batch_resource_usage_set(&ctx->batch, new_res, false);
- zink_fake_buffer_barrier(new_res, VK_ACCESS_UNIFORM_READ_BIT,
+ zink_resource_buffer_barrier(ctx, new_res, VK_ACCESS_UNIFORM_READ_BIT,
zink_pipeline_flags_from_pipe_stage(shader));
}
update |= ((index || screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) && ctx->ubos[shader][index].buffer_offset != offset) ||
@@ -1200,7 +1206,7 @@
ssbo->buffer_size = MIN2(buffers[i].buffer_size, new_res->base.b.width0 - ssbo->buffer_offset);
util_range_add(&new_res->base.b, &new_res->valid_buffer_range, ssbo->buffer_offset,
ssbo->buffer_offset + ssbo->buffer_size);
- zink_fake_buffer_barrier(new_res, access,
+ zink_resource_buffer_barrier(ctx, new_res, access,
zink_pipeline_flags_from_pipe_stage(p_stage));
update = true;
max_slot = MAX2(max_slot, start_slot + i);
@@ -1386,7 +1392,7 @@
image_view->buffer_view = create_image_bufferview(ctx, &images[i]);
assert(image_view->buffer_view);
zink_batch_usage_set(&image_view->buffer_view->batch_uses, ctx->batch.state);
- zink_fake_buffer_barrier(res, access,
+ zink_resource_buffer_barrier(ctx, res, access,
zink_pipeline_flags_from_pipe_stage(p_stage));
} else {
image_view->surface = create_image_surface(ctx, &images[i], p_stage == PIPE_SHADER_COMPUTE);
@@ -1479,7 +1485,7 @@
update = true;
}
zink_batch_usage_set(&b->buffer_view->batch_uses, ctx->batch.state);
- zink_fake_buffer_barrier(res, VK_ACCESS_SHADER_READ_BIT,
+ zink_resource_buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT,
zink_pipeline_flags_from_pipe_stage(shader_type));
if (!a || a->buffer_view->buffer_view != b->buffer_view->buffer_view)
update = true;
@@ -1646,7 +1652,7 @@
rebind_bindless_bufferview(ctx, res, ds);
VkBufferView *bv = &ctx->di.bindless[0].buffer_infos[handle];
*bv = ds->bufferview->buffer_view;
- zink_fake_buffer_barrier(res, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
+ zink_resource_buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
} else {
VkDescriptorImageInfo *ii = &ctx->di.bindless[0].img_infos[handle];
ii->sampler = bd->sampler->sampler;
@@ -1765,7 +1771,7 @@
rebind_bindless_bufferview(ctx, res, ds);
VkBufferView *bv = &ctx->di.bindless[1].buffer_infos[handle];
*bv = ds->bufferview->buffer_view;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_context.h
^
|
@@ -261,8 +261,6 @@
uint16_t rp_clears_enabled;
uint16_t fbfetch_outputs;
- VkBuffer vbufs[PIPE_MAX_ATTRIBS];
- unsigned vbuf_offsets[PIPE_MAX_ATTRIBS];
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
bool vertex_buffers_dirty;
@@ -399,8 +397,6 @@
void
zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
-void
-zink_fake_buffer_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
bool
zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
bool
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_descriptors.c
^
|
@@ -696,13 +696,13 @@
#endif
switch (type) {
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- zds->sampler_states = (struct zink_sampler_state**)&samplers[i * pool->key.layout->num_descriptors];
+ zds->sampler_states = (struct zink_sampler_state**)&samplers[i * num_resources];
FALLTHROUGH;
case ZINK_DESCRIPTOR_TYPE_IMAGE:
- zds->surfaces = &surfaces[i * pool->key.layout->num_descriptors];
+ zds->surfaces = &surfaces[i * num_resources];
break;
default:
- zds->res_objs = (struct zink_resource_object**)&res_objs[i * pool->key.layout->num_descriptors];
+ zds->res_objs = (struct zink_resource_object**)&res_objs[i * num_resources];
break;
}
zds->desc_set = desc_set[i];
@@ -790,20 +790,28 @@
simple_mtx_lock(&pool->mtx);
if (last_set && last_set->hash == hash && desc_state_equal(&last_set->key, &key)) {
+ bool was_recycled = false;
zds = last_set;
*cache_hit = !zds->invalid;
if (zds->recycled) {
struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->free_desc_sets, hash, &key);
- if (he)
+ if (he) {
+ was_recycled = true;
_mesa_hash_table_remove(pool->free_desc_sets, he);
+ }
zds->recycled = false;
}
if (zds->invalid) {
if (zink_batch_usage_exists(zds->batch_uses))
punt_invalid_set(zds, NULL);
- else
+ else {
+ if (was_recycled) {
+ descriptor_set_invalidate(zds);
+ goto out;
+ }
/* this set is guaranteed to be in pool->alloc_desc_sets */
goto skip_hash_tables;
+ }
zds = NULL;
}
if (zds)
@@ -828,6 +836,8 @@
zds = (void*)he->data;
*cache_hit = !zds->invalid;
if (recycled) {
+ if (zds->invalid)
+ descriptor_set_invalidate(zds);
/* need to migrate this entry back to the in-use hash */
_mesa_hash_table_remove(pool->free_desc_sets, he);
goto out;
@@ -1419,6 +1429,7 @@
if (pg->dd->push_usage) {
if (pg->dd->fbfetch) {
/* fbfetch is not cacheable: grab a lazy set because it's faster */
+ cache_hit = false;
desc_set = zink_descriptors_alloc_lazy_push(ctx);
} else {
zds = zink_descriptor_set_get(ctx, ZINK_DESCRIPTOR_TYPES, is_compute, &cache_hit);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_descriptors_lazy.c
^
|
@@ -140,20 +140,20 @@
struct zink_shader **stages;
if (pg->is_compute)
stages = &((struct zink_compute_program*)pg)->shader;
- else {
+ else
stages = ((struct zink_gfx_program*)pg)->shaders;
- if (stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) {
- zink_descriptor_util_init_fbfetch(ctx);
- push_count = 1;
- pg->dd->fbfetch = true;
- }
- }
if (!pg->dd)
pg->dd = (void*)rzalloc(pg, struct zink_program_descriptor_data);
if (!pg->dd)
return false;
+ if (!pg->is_compute && stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) {
+ zink_descriptor_util_init_fbfetch(ctx);
+ push_count = 1;
+ pg->dd->fbfetch = true;
+ }
+
unsigned entry_idx[ZINK_DESCRIPTOR_TYPES] = {0};
unsigned num_shaders = pg->is_compute ? 1 : ZINK_SHADER_COUNT;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_draw.cpp
^
|
@@ -134,16 +134,16 @@
return;
for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
- const unsigned buffer_id = ctx->element_state->binding_map[i];
- struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id;
+ struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
assert(vb);
if (vb->buffer.resource) {
- buffers[i] = ctx->vbufs[buffer_id];
- assert(buffers[i]);
+ struct zink_resource *res = zink_resource(vb->buffer.resource);
+ assert(res->obj->buffer);
+ buffers[i] = res->obj->buffer;
+ buffer_offsets[i] = vb->buffer_offset;
+ buffer_strides[i] = vb->stride;
if (HAS_VERTEX_INPUT)
elems->hw_state.dynbindings[i].stride = vb->stride;
- buffer_offsets[i] = ctx->vbuf_offsets[buffer_id];
- buffer_strides[i] = vb->stride;
zink_batch_resource_usage_set(&ctx->batch, zink_resource(vb->buffer.resource), false);
} else {
buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
@@ -193,6 +193,8 @@
prog = (struct zink_gfx_program*)entry->data;
u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages)
ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader;
+ /* ensure variants are always updated if keys have changed since last use */
+ ctx->dirty_shader_stages |= prog->stages_present;
} else {
ctx->dirty_shader_stages |= bits;
prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.vertices_per_patch + 1);
@@ -374,6 +376,8 @@
access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
pipeline |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
bind_count -= util_bitcount(res->vbo_bind_mask);
+ if (res->write_bind_count[is_compute])
+ pipeline |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
}
bind_count -= res->so_bind_count;
}
@@ -462,6 +466,9 @@
const struct pipe_draw_start_count_bias *draws,
unsigned num_draws)
{
+ if (!dindirect && (!draws[0].count || !dinfo->instance_count))
+ return;
+
struct zink_context *ctx = zink_context(pctx);
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_rasterizer_state *rast_state = ctx->rast_state;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_pipeline.c
^
|
@@ -124,7 +124,13 @@
warn_missing_feature("alphaToOne");
ms_state.alphaToOneEnable = state->blend_state->alpha_to_one;
}
- ms_state.pSampleMask = state->sample_mask ? &state->sample_mask : NULL;
+ /* "If pSampleMask is NULL, it is treated as if the mask has all bits set to 1."
+ * - Chapter 27. Rasterization
+ *
+ * thus it never makes sense to leave this as NULL since gallium will provide correct
+ * data here as long as sample_mask is initialized on context creation
+ */
+ ms_state.pSampleMask = &state->sample_mask;
if (hw_rast_state->force_persample_interp) {
ms_state.sampleShadingEnable = VK_TRUE;
ms_state.minSampleShading = 1.0;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_program.c
^
|
@@ -557,6 +557,7 @@
zink_destroy_gfx_program(struct zink_screen *screen,
struct zink_gfx_program *prog)
{
+ util_queue_fence_wait(&prog->base.cache_fence);
if (prog->base.layout)
VKSCR(DestroyPipelineLayout)(screen->dev, prog->base.layout, NULL);
@@ -601,6 +602,7 @@
zink_destroy_compute_program(struct zink_screen *screen,
struct zink_compute_program *comp)
{
+ util_queue_fence_wait(&comp->base.cache_fence);
if (comp->base.layout)
VKSCR(DestroyPipelineLayout)(screen->dev, comp->base.layout, NULL);
@@ -821,6 +823,9 @@
if (old != PIPE_SHADER_TYPES) {
memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
ctx->dirty_shader_stages |= BITFIELD_BIT(old);
+ } else {
+ /* always unset vertex shader values when changing to a non-vs last stage */
+ memset(&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
}
ctx->last_vertex_stage_dirty = true;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_query.c
^
|
@@ -183,22 +183,6 @@
query->type == PIPE_QUERY_GPU_FINISHED;
}
-static void
-qbo_sync_from_prev(struct zink_context *ctx, struct zink_query *query, unsigned id_offset, unsigned last_start)
-{
- assert(id_offset);
-
- struct zink_query_buffer *prev = list_last_entry(&query->buffers, struct zink_query_buffer, list);
- unsigned result_size = get_num_results(query->type) * sizeof(uint64_t);
- /* this is get_buffer_offset() but without the zink_query object */
- unsigned qbo_offset = last_start * get_num_results(query->type) * sizeof(uint64_t);
- query->curr_query = id_offset;
- query->curr_qbo->num_results = id_offset;
- zink_copy_buffer(ctx, zink_resource(query->curr_qbo->buffer), zink_resource(prev->buffer), 0,
- qbo_offset,
- id_offset * result_size);
-}
-
static bool
qbo_append(struct pipe_screen *screen, struct zink_query *query)
{
@@ -459,6 +443,8 @@
uint64_t *xfb_results = NULL;
uint64_t *results;
bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT;
+ if (!qbo->num_results)
+ continue;
results = pipe_buffer_map_range(pctx, qbo->buffer, 0,
(is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer);
if (!results) {
@@ -563,7 +549,7 @@
util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
assert(query_id < NUM_QUERIES);
VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,
- offset, type_size, flags);
+ offset, base_result_size, flags);
}
static void
@@ -575,8 +561,6 @@
static void
reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
{
- unsigned last_start = q->last_start;
- unsigned id_offset = q->curr_query - q->last_start;
/* This command must only be called outside of a render pass instance
*
* - vkCmdResetQueryPool spec
@@ -605,8 +589,6 @@
reset_qbo(q);
else
debug_printf("zink: qbo alloc failed on reset!");
- if (id_offset)
- qbo_sync_from_prev(ctx, q, id_offset, last_start);
}
static inline unsigned
@@ -650,6 +632,8 @@
if (!is_timestamp)
q->curr_qbo->num_results++;
+ else
+ q->curr_qbo->num_results = 1;
q->needs_update = false;
}
@@ -1014,17 +998,18 @@
*/
VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
+ unsigned src_offset = result_size * get_num_results(query->type);
if (zink_batch_usage_check_completion(ctx, query->batch_id)) {
- uint64_t u64[2] = {0};
- if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, 2 * result_size, u64,
+ uint64_t u64[4] = {0};
+ if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, sizeof(u64), u64,
0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag) == VK_SUCCESS) {
- pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + result_size);
+ pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
return;
}
}
- struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, result_size * 2);
+ struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
- zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size, result_size);
+ zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query->type), result_size);
pipe_resource_reference(&staging, NULL);
return;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_resource.c
^
|
@@ -165,6 +165,9 @@
if (bind & PIPE_BIND_SHADER_IMAGE)
bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
+ if (bind & PIPE_BIND_QUERY_BUFFER)
+ bci.usage |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
+
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
bci.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
return bci;
@@ -372,7 +375,7 @@
ici->samples = templ->nr_samples ? templ->nr_samples : VK_SAMPLE_COUNT_1_BIT;
ici->tiling = modifiers_count ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : bind & PIPE_BIND_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
ici->sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ici->initialLayout = dmabuf ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
+ ici->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
/* sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the following conditions is true:
* - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT
@@ -446,21 +449,20 @@
VkMemoryRequirements reqs;
VkMemoryPropertyFlags flags;
bool need_dedicated = false;
+ bool shared = templ->bind & PIPE_BIND_SHARED;
VkExternalMemoryHandleTypeFlags export_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
VkExternalMemoryHandleTypeFlags external = 0;
if (whandle) {
- if (whandle->type == WINSYS_HANDLE_TYPE_FD)
+ if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
- else
+ export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ } else
unreachable("unknown handle type");
}
/* TODO: remove linear for wsi */
bool scanout = templ->bind & PIPE_BIND_SCANOUT;
- bool shared = templ->bind & PIPE_BIND_SHARED;
- if (shared && screen->info.have_EXT_external_memory_dma_buf)
- export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
pipe_reference_init(&obj->reference, 1);
util_dynarray_init(&obj->tmp, NULL);
@@ -1232,13 +1234,13 @@
mgr = ctx->tc->base.stream_uploader;
else
mgr = ctx->base.stream_uploader;
- u_upload_alloc(mgr, 0, box->width + box->x,
+ u_upload_alloc(mgr, 0, box->width,
screen->info.props.limits.minMemoryMapAlignment, &offset,
(struct pipe_resource **)&trans->staging_res, (void **)&ptr);
res = zink_resource(trans->staging_res);
- trans->offset = offset + box->x;
+ trans->offset = offset;
usage |= PIPE_MAP_UNSYNCHRONIZED;
- ptr = ((uint8_t *)ptr) + box->x;
+ ptr = ((uint8_t *)ptr);
} else {
/* At this point, the buffer is always idle (we checked it above). */
usage |= PIPE_MAP_UNSYNCHRONIZED;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_screen.c
^
|
@@ -184,7 +184,7 @@
if (!screen->disk_cache)
return;
- util_queue_add_job(&screen->cache_put_thread, pg, NULL, cache_put_job, NULL, 0);
+ util_queue_add_job(&screen->cache_put_thread, pg, &pg->cache_fence, cache_put_job, NULL, 0);
}
static void
@@ -438,7 +438,7 @@
return 1;
case PIPE_CAP_TGSI_BALLOT:
- return screen->vk_version >= VK_MAKE_VERSION(1,2,0) && screen->info.props11.subgroupSize <= 64;
+ return screen->info.have_vulkan12 && screen->info.have_EXT_shader_subgroup_ballot && screen->info.props11.subgroupSize <= 64;
case PIPE_CAP_SAMPLE_SHADING:
return screen->info.feats.features.sampleRateShading;
@@ -671,7 +671,10 @@
return MIN2(screen->info.props.limits.maxVertexOutputComponents / 4 / 2, 16);
case PIPE_CAP_DMABUF:
- return screen->info.have_KHR_external_memory_fd && screen->info.have_EXT_external_memory_dma_buf && screen->info.have_EXT_queue_family_foreign;
+ return screen->info.have_KHR_external_memory_fd &&
+ screen->info.have_EXT_external_memory_dma_buf &&
+ screen->info.have_EXT_queue_family_foreign &&
+ screen->info.have_EXT_image_drm_format_modifier;
case PIPE_CAP_DEPTH_BOUNDS_TEST:
return screen->info.feats.features.depthBounds;
@@ -851,8 +854,10 @@
return 0; /* not implemented */
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
- return screen->info.feats11.uniformAndStorageBuffer16BitAccess ||
- (screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess);
+ //enabling this breaks GTF-GL46.gtf21.GL2Tests.glGetUniform.glGetUniform
+ //return screen->info.feats11.uniformAndStorageBuffer16BitAccess ||
+ //(screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess);
+ return 0;
case PIPE_SHADER_CAP_FP16_DERIVATIVES:
return 0; //spirv requires 32bit derivative srcs and dests
case PIPE_SHADER_CAP_FP16:
@@ -1706,11 +1711,11 @@
if (mem.memoryProperties.memoryHeaps[i].flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
/* VRAM */
info->total_device_memory += mem.memoryProperties.memoryHeaps[i].size / 1024;
- info->avail_device_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024;
+ info->avail_device_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024;
} else {
/* GART */
info->total_staging_memory += mem.memoryProperties.memoryHeaps[i].size / 1024;
- info->avail_staging_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024;
+ info->avail_staging_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024;
}
}
/* evictions not yet supported in vulkan */
@@ -1941,9 +1946,11 @@
screen->base.get_compiler_options = zink_get_compiler_options;
screen->base.get_sample_pixel_grid = zink_get_sample_pixel_grid;
screen->base.is_format_supported = zink_is_format_supported;
- screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers;
- screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported;
- screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes;
+ if (screen->info.have_EXT_image_drm_format_modifier && screen->info.have_EXT_external_memory_dma_buf) {
+ screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers;
+ screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported;
+ screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes;
+ }
screen->base.context_create = zink_context_create;
screen->base.flush_frontbuffer = zink_flush_frontbuffer;
screen->base.destroy = zink_destroy_screen;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_state.h
^
|
@@ -63,12 +63,12 @@
unsigned polygon_mode : 2; //VkPolygonMode
unsigned cull_mode : 2; //VkCullModeFlags
unsigned line_mode : 2; //VkLineRasterizationModeEXT
- bool depth_clamp:1;
- bool rasterizer_discard:1;
- bool pv_last:1;
- bool line_stipple_enable:1;
- bool force_persample_interp:1;
- bool clip_halfz:1;
+ unsigned depth_clamp:1;
+ unsigned rasterizer_discard:1;
+ unsigned pv_last:1;
+ unsigned line_stipple_enable:1;
+ unsigned force_persample_interp:1;
+ unsigned clip_halfz:1;
};
#define ZINK_RAST_HW_STATE_SIZE 12
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c
^
|
@@ -265,6 +265,7 @@
&pool->free_cmd_buffers, pool_link) {
lvp_cmd_buffer_destroy(cmd_buffer);
}
+ list_inithead(&pool->free_cmd_buffers);
}
VKAPI_ATTR void VKAPI_CALL lvp_CmdDrawMultiEXT(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_device.c
^
|
@@ -1174,11 +1174,13 @@
struct pipe_fence_handle *handle = NULL;
device->queue.ctx->flush(device->queue.ctx, &handle, 0);
if (fence)
- fence->handle = handle;
+ device->pscreen->fence_reference(device->pscreen, &fence->handle, handle);
set_last_fence(device, handle, timeline);
/* this is the array of signaling timeline semaphore links */
for (unsigned i = 0; i < num_timelines; i++)
- timelines[i]->fence = handle;
+ device->pscreen->fence_reference(device->pscreen, &timelines[i]->fence, handle);
+
+ device->pscreen->fence_reference(device->pscreen, &handle, NULL);
}
/* get a new timeline link for creating a new signal event
@@ -1210,7 +1212,8 @@
* sema->lock MUST be locked before calling
*/
static void
-prune_semaphore_links(struct lvp_semaphore *sema, uint64_t timeline)
+prune_semaphore_links(struct lvp_device *device,
+ struct lvp_semaphore *sema, uint64_t timeline)
{
if (!timeline)
/* zero isn't a valid id to prune with */
@@ -1225,7 +1228,7 @@
util_dynarray_append(&sema->links, struct lvp_semaphore_timeline*, tl);
tl = tl->next;
cur->next = NULL;
- cur->fence = NULL;
+ device->pscreen->fence_reference(device->pscreen, &cur->fence, NULL);
}
/* this is now the current timeline link */
sema->timeline = tl;
@@ -1288,7 +1291,7 @@
/* no timeline link was available yet: try to find one */
simple_mtx_lock(&sema->lock);
/* always prune first to update current timeline id */
- prune_semaphore_links(sema, device->queue.last_finished);
+ prune_semaphore_links(device, sema, device->queue.last_finished);
tl_array[i].tl = find_semaphore_timeline(sema, waitval);
if (timeout && !tl_array[i].tl) {
/* still no timeline link available:
@@ -1540,7 +1543,7 @@
}
simple_mtx_lock(&sema->lock);
/* always prune first to make links available and update timeline id */
- prune_semaphore_links(sema, queue->last_finished);
+ prune_semaphore_links(queue->device, sema, queue->last_finished);
if (sema->current < info->pSignalSemaphoreValues[j]) {
/* only signal semaphores if the new id is >= the current one */
struct lvp_semaphore_timeline *tl = get_semaphore_link(sema);
@@ -1562,7 +1565,7 @@
}
simple_mtx_lock(&sema->lock);
/* always prune first to update timeline id */
- prune_semaphore_links(sema, queue->last_finished);
+ prune_semaphore_links(queue->device, sema, queue->last_finished);
if (info->pWaitSemaphoreValues[j] &&
pSubmits[i].pWaitDstStageMask && pSubmits[i].pWaitDstStageMask[j] &&
sema->current < info->pWaitSemaphoreValues[j]) {
@@ -2316,7 +2319,7 @@
LVP_FROM_HANDLE(lvp_device, device, _device);
LVP_FROM_HANDLE(lvp_semaphore, sema, _semaphore);
simple_mtx_lock(&sema->lock);
- prune_semaphore_links(sema, device->queue.last_finished);
+ prune_semaphore_links(device, sema, device->queue.last_finished);
*pValue = sema->current;
simple_mtx_unlock(&sema->lock);
return VK_SUCCESS;
@@ -2334,7 +2337,7 @@
sema->current = pSignalInfo->value;
cnd_broadcast(&sema->submit);
simple_mtx_lock(&sema->lock);
- prune_semaphore_links(sema, device->queue.last_finished);
+ prune_semaphore_links(device, sema, device->queue.last_finished);
simple_mtx_unlock(&sema->lock);
return VK_SUCCESS;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_execute.c
^
|
@@ -573,40 +573,6 @@
state->rs_dirty = true;
}
- state->disable_multisample = pipeline->disable_multisample;
- if (pipeline->graphics_create_info.pMultisampleState) {
- const VkPipelineMultisampleStateCreateInfo *ms = pipeline->graphics_create_info.pMultisampleState;
- state->rs_state.multisample = ms->rasterizationSamples > 1;
- state->sample_mask = ms->pSampleMask ? ms->pSampleMask[0] : 0xffffffff;
- state->blend_state.alpha_to_coverage = ms->alphaToCoverageEnable;
- state->blend_state.alpha_to_one = ms->alphaToOneEnable;
- state->blend_dirty = true;
- state->rs_dirty = true;
- state->min_samples = 1;
- state->sample_mask_dirty = true;
- fb_samples = ms->rasterizationSamples;
- if (ms->sampleShadingEnable) {
- state->min_samples = ceil(ms->rasterizationSamples * ms->minSampleShading);
- if (state->min_samples > 1)
- state->min_samples = ms->rasterizationSamples;
- if (state->min_samples < 1)
- state->min_samples = 1;
- }
- if (pipeline->force_min_sample)
- state->min_samples = ms->rasterizationSamples;
- state->min_samples_dirty = true;
- } else {
- state->rs_state.multisample = false;
- state->sample_mask_dirty = state->sample_mask != 0xffffffff;
- state->sample_mask = 0xffffffff;
- state->min_samples_dirty = state->min_samples;
- state->min_samples = 0;
- state->blend_dirty |= state->blend_state.alpha_to_coverage || state->blend_state.alpha_to_one;
- state->blend_state.alpha_to_coverage = false;
- state->blend_state.alpha_to_one = false;
- state->rs_dirty = true;
- }
-
if (pipeline->graphics_create_info.pDepthStencilState) {
const VkPipelineDepthStencilStateCreateInfo *dsa = pipeline->graphics_create_info.pDepthStencilState;
@@ -710,6 +676,40 @@
state->blend_dirty = true;
}
+ state->disable_multisample = pipeline->disable_multisample;
+ if (pipeline->graphics_create_info.pMultisampleState) {
+ const VkPipelineMultisampleStateCreateInfo *ms = pipeline->graphics_create_info.pMultisampleState;
+ state->rs_state.multisample = ms->rasterizationSamples > 1;
+ state->sample_mask = ms->pSampleMask ? ms->pSampleMask[0] : 0xffffffff;
+ state->blend_state.alpha_to_coverage = ms->alphaToCoverageEnable;
+ state->blend_state.alpha_to_one = ms->alphaToOneEnable;
+ state->blend_dirty = true;
+ state->rs_dirty = true;
+ state->min_samples = 1;
+ state->sample_mask_dirty = true;
+ fb_samples = ms->rasterizationSamples;
+ if (ms->sampleShadingEnable) {
+ state->min_samples = ceil(ms->rasterizationSamples * ms->minSampleShading);
+ if (state->min_samples > 1)
+ state->min_samples = ms->rasterizationSamples;
+ if (state->min_samples < 1)
+ state->min_samples = 1;
+ }
+ if (pipeline->force_min_sample)
+ state->min_samples = ms->rasterizationSamples;
+ state->min_samples_dirty = true;
+ } else {
+ state->rs_state.multisample = false;
+ state->sample_mask_dirty = state->sample_mask != 0xffffffff;
+ state->sample_mask = 0xffffffff;
+ state->min_samples_dirty = state->min_samples;
+ state->min_samples = 0;
+ state->blend_dirty |= state->blend_state.alpha_to_coverage || state->blend_state.alpha_to_one;
+ state->blend_state.alpha_to_coverage = false;
+ state->blend_state.alpha_to_one = false;
+ state->rs_dirty = true;
+ }
+
if (!dynamic_states[conv_dynamic_state_idx(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT)]) {
const VkPipelineVertexInputStateCreateInfo *vi = pipeline->graphics_create_info.pVertexInputState;
int i;
@@ -1011,8 +1011,6 @@
*/
if (iv->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
iv->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
- if (templ.swizzle_a == PIPE_SWIZZLE_X)
- templ.swizzle_r = PIPE_SWIZZLE_X;
fix_depth_swizzle(templ.swizzle_r);
fix_depth_swizzle(templ.swizzle_g);
fix_depth_swizzle(templ.swizzle_b);
@@ -2847,43 +2845,13 @@
}
}
-static void pack_clear_color(enum pipe_format pformat, VkClearColorValue *in_val, uint32_t col_val[4])
-{
- const struct util_format_description *desc = util_format_description(pformat);
- col_val[0] = col_val[1] = col_val[2] = col_val[3] = 0;
- for (unsigned c = 0; c < 4; c++) {
- if (desc->swizzle[c] >= 4)
- continue;
- const struct util_format_channel_description *channel = &desc->channel[desc->swizzle[c]];
- if (channel->size == 32) {
- col_val[c] = in_val->uint32[c];
- continue;
- }
- if (channel->pure_integer) {
- uint64_t v = in_val->uint32[c] & ((1u << channel->size) - 1);
- switch (channel->size) {
- case 2:
- case 8:
- case 10:
- col_val[0] |= (v << channel->shift);
- break;
- case 16:
- col_val[c / 2] |= (v << (16 * (c % 2)));
- break;
- }
- } else {
- util_pack_color(in_val->float32, pformat, (union util_color *)col_val);
- break;
- }
- }
-}
-
static void handle_clear_color_image(struct vk_cmd_queue_entry *cmd,
struct rendering_state *state)
{
LVP_FROM_HANDLE(lvp_image, image, cmd->u.clear_color_image.image);
- uint32_t col_val[4];
- pack_clear_color(image->bo->format, cmd->u.clear_color_image.color, col_val);
+ union util_color uc;
+ uint32_t *col_val = uc.ui;
+ util_pack_color_union(image->bo->format, &uc, (void*)cmd->u.clear_color_image.color);
for (unsigned i = 0; i < cmd->u.clear_color_image.range_count; i++) {
VkImageSubresourceRange *range = &cmd->u.clear_color_image.ranges[i];
struct pipe_box box;
@@ -3940,6 +3908,18 @@
}
}
+ for (enum pipe_shader_type s = PIPE_SHADER_VERTEX; s < PIPE_SHADER_TYPES; s++) {
+ for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (state.sv[s][i])
+ pipe_sampler_view_reference(&state.sv[s][i], NULL);
+ }
+ }
+
+ for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (state.cso_ss_ptr[PIPE_SHADER_COMPUTE][i])
+ state.pctx->delete_sampler_state(state.pctx, state.ss_cso[PIPE_SHADER_COMPUTE][i]);
+ }
+
free(state.imageless_views);
free(state.pending_clear_aspects);
free(state.cleared_views);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_formats.c
^
|
@@ -230,6 +230,9 @@
break;
}
+ if (info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)
+ goto skip_checks;
+
if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
goto unsupported;
@@ -273,6 +276,7 @@
}
}
+skip_checks:
*pImageFormatProperties = (VkImageFormatProperties) {
.maxExtent = maxExtent,
.maxMipLevels = maxMipLevels,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_pipeline.c
^
|
@@ -64,6 +64,9 @@
if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])
device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]);
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
+ ralloc_free(pipeline->pipeline_nir[i]);
+
ralloc_free(pipeline->mem_ctx);
vk_object_base_finish(&pipeline->base);
vk_free2(&device->vk.alloc, pAllocator, pipeline);
@@ -602,6 +605,8 @@
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+ NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
+
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
@@ -656,7 +661,7 @@
static void fill_shader_prog(struct pipe_shader_state *state, gl_shader_stage stage, struct lvp_pipeline *pipeline)
{
state->type = PIPE_SHADER_IR_NIR;
- state->ir.nir = pipeline->pipeline_nir[stage];
+ state->ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[stage]);
}
static void
@@ -728,7 +733,7 @@
device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, pipeline->pipeline_nir[stage]);
if (stage == MESA_SHADER_COMPUTE) {
struct pipe_compute_state shstate = {0};
- shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];
+ shstate.prog = (void *)nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE]);
shstate.ir_type = PIPE_SHADER_IR_NIR;
shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;
pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
@@ -904,7 +909,7 @@
pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;
struct pipe_shader_state shstate = {0};
shstate.type = PIPE_SHADER_IR_NIR;
- shstate.ir.nir = pipeline->pipeline_nir[MESA_SHADER_FRAGMENT];
+ shstate.ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]);
pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
}
return VK_SUCCESS;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
^
|
@@ -608,6 +608,7 @@
ws->info.num_physical_sgprs_per_simd = 512;
ws->info.num_physical_wave64_vgprs_per_simd = 256;
ws->info.has_3d_cube_border_color_mipmap = true;
+ ws->info.never_stop_sq_perf_counters = false;
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/gbm/main/gbm.c
^
|
@@ -497,8 +497,22 @@
const uint64_t *modifiers,
const unsigned int count)
{
+ uint32_t flags = 0;
+
+ /*
+ * ABI version 1 added the modifiers+flags capability. Backends from
+ * prior versions may fail if "unknown" flags are provided along with
+ * modifiers, but assume scanout is required when modifiers are used.
+ * Newer backends expect scanout to be explicitly requested if required,
+ * but applications using this older interface rely on the older implied
+ * requirement, so that behavior must be preserved.
+ */
+ if (gbm->v0.backend_version >= 1) {
+ flags |= GBM_BO_USE_SCANOUT;
+ }
+
return gbm_bo_create_with_modifiers2(gbm, width, height, format, modifiers,
- count, GBM_BO_USE_SCANOUT);
+ count, flags);
}
GBM_EXPORT struct gbm_bo *
@@ -648,9 +662,23 @@
const uint64_t *modifiers,
const unsigned int count)
{
+ uint32_t flags = 0;
+
+ /*
+ * ABI version 1 added the modifiers+flags capability. Backends from
+ * prior versions may fail if "unknown" flags are provided along with
+ * modifiers, but assume scanout is required when modifiers are used.
+ * Newer backends expect scanout to be explicitly requested if required,
+ * but applications using this older interface rely on the older implied
+ * requirement, so that behavior must be preserved.
+ */
+ if (gbm->v0.backend_version >= 1) {
+ flags |= GBM_BO_USE_SCANOUT;
+ }
+
return gbm_surface_create_with_modifiers2(gbm, width, height, format,
modifiers, count,
- GBM_BO_USE_SCANOUT);
+ flags);
}
GBM_EXPORT struct gbm_surface *
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/glx/dri3_glx.c
^
|
@@ -306,6 +306,8 @@
pcp->base.noError = GL_TRUE;
}
+ pcp->base.renderType = dca.render_type;
+
pcp->driContext =
(*psc->image_driver->createContextAttribs) (psc->driScreen,
dca.api,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/glx/glx_pbuffer.c
^
|
@@ -199,6 +199,8 @@
pdraw->textureTarget = determineTextureTarget(attrib_list, num_attribs);
pdraw->textureFormat = determineTextureFormat(attrib_list, num_attribs);
+
+ pdraw->refcount = 1;
#endif
return GL_TRUE;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/glx/glxextensions.h
^
|
@@ -283,17 +283,11 @@
/* GLX_ALIAS should be used for functions with a non-void return type.
GLX_ALIAS_VOID is for functions with a void return type. */
# ifdef HAVE_FUNC_ATTRIBUTE_ALIAS
-/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
- * extra expansion means that the name mangling macros in glx_mangle.h will
- * apply before stringification, so the alias attribute will have a string like
- * "mglXFoo" instead of "glXFoo". */
-# define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
+# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
return_type real_func proto_args \
__attribute__ ((alias( # aliased_func ) ));
-# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
- GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
- GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
+ GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
# else
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
return_type real_func proto_args \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_cfg.cpp
^
|
@@ -364,6 +364,8 @@
next = new_block();
if (inst->predicate)
cur->add_successor(mem_ctx, next, bblock_link_logical);
+ else
+ cur->add_successor(mem_ctx, next, bblock_link_physical);
set_next_block(&cur, next, ip);
break;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs.cpp
^
|
@@ -2839,6 +2839,9 @@
if (inst->src[1].file != IMM)
continue;
+ if (brw_reg_type_is_floating_point(inst->src[1].type))
+ break;
+
/* a * 1.0 = a */
if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_MOV;
@@ -8933,7 +8936,12 @@
if (last_scratch > 0) {
ASSERTED unsigned max_scratch_size = 2 * 1024 * 1024;
- prog_data->total_scratch = brw_get_scratch_size(last_scratch);
+ /* Take the max of any previously compiled variant of the shader. In the
+ * case of bindless shaders with return parts, this will also take the
+ * max of all parts.
+ */
+ prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch),
+ prog_data->total_scratch);
if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) {
if (devinfo->is_haswell) {
@@ -9675,6 +9683,7 @@
prog_data->per_coarse_pixel_dispatch =
key->coarse_pixel &&
+ !prog_data->uses_omask &&
!prog_data->persample_dispatch &&
!prog_data->uses_sample_mask &&
(prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) &&
@@ -9717,6 +9726,7 @@
INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_WM);
prog_data->base.stage = MESA_SHADER_FRAGMENT;
+ prog_data->base.total_scratch = 0;
const struct intel_device_info *devinfo = compiler->devinfo;
const unsigned max_subgroup_size = compiler->devinfo->ver >= 6 ? 32 : 16;
@@ -10106,6 +10116,7 @@
prog_data->base.stage = MESA_SHADER_COMPUTE;
prog_data->base.total_shared = nir->info.shared_size;
+ prog_data->base.total_scratch = 0;
/* Generate code for all the possible SIMD variants. */
bool generate_all;
@@ -10501,7 +10512,7 @@
assert(local_arg_offset % 8 == 0);
return offset |
- SET_BITS(simd_size > 8, 4, 4) |
+ SET_BITS(simd_size == 8, 4, 4) |
SET_BITS(local_arg_offset / 8, 2, 0);
}
@@ -10519,6 +10530,7 @@
const bool debug_enabled = INTEL_DEBUG(DEBUG_RT);
prog_data->base.stage = shader->info.stage;
+ prog_data->base.total_scratch = 0;
prog_data->max_stack_size = 0;
fs_generator g(compiler, log_data, mem_ctx, &prog_data->base,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_generator.cpp
^
|
@@ -616,8 +616,8 @@
* easier just to split it here.
*/
const unsigned lower_width =
- (devinfo->ver <= 7 || type_sz(src.type) > 4) ?
- 8 : MIN2(16, inst->exec_size);
+ devinfo->ver <= 7 || element_sz(src) > 4 || element_sz(dst) > 4 ? 8 :
+ MIN2(16, inst->exec_size);
brw_set_default_exec_size(p, cvt(lower_width) - 1);
for (unsigned group = 0; group < inst->exec_size; group += lower_width) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_nir.cpp
^
|
@@ -547,17 +547,16 @@
/* For (gl_FrontFacing ? 1.0 : -1.0), emit:
*
- * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W
+ * or(8) tmp.1<2>W g1.1<0,1,0>W 0x00003f80W
* and(8) dst<1>D tmp<8,8,1>D 0xbf800000D
*
- * and negate the result for (gl_FrontFacing ? -1.0 : 1.0).
+ * and negate g1.1<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0).
*/
- bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
- g1, brw_imm_uw(0x3f80));
-
if (value1 == -1.0f)
- bld.MOV(tmp, negate(tmp));
+ g1.negate = true;
+ bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
+ g1, brw_imm_uw(0x3f80));
} else if (devinfo->ver >= 6) {
/* Bit 15 of g0.0 is 0 if the polygon is front facing. */
fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
@@ -1887,7 +1886,7 @@
case nir_op_sdot_4x8_iadd:
case nir_op_sdot_4x8_iadd_sat:
- inst = bld.DP4A(result,
+ inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D),
retype(op[2], BRW_REGISTER_TYPE_D),
retype(op[0], BRW_REGISTER_TYPE_D),
retype(op[1], BRW_REGISTER_TYPE_D));
@@ -1898,7 +1897,7 @@
case nir_op_udot_4x8_uadd:
case nir_op_udot_4x8_uadd_sat:
- inst = bld.DP4A(result,
+ inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_UD),
retype(op[2], BRW_REGISTER_TYPE_UD),
retype(op[0], BRW_REGISTER_TYPE_UD),
retype(op[1], BRW_REGISTER_TYPE_UD));
@@ -1909,7 +1908,7 @@
case nir_op_sudot_4x8_iadd:
case nir_op_sudot_4x8_iadd_sat:
- inst = bld.DP4A(result,
+ inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D),
retype(op[2], BRW_REGISTER_TYPE_D),
retype(op[0], BRW_REGISTER_TYPE_D),
retype(op[1], BRW_REGISTER_TYPE_UD));
@@ -3929,7 +3928,10 @@
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
- srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
+ /* No point in masking with sample mask, here we're handling compute
+ * intrinsics.
+ */
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
fs_reg data = get_nir_src(instr->src[0]);
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -6067,7 +6069,7 @@
* Compiler should send U,V,R parameters even if V,R are 0.
*/
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && devinfo->verx10 == 125)
- assert(instr->coord_components == 3u + instr->is_array);
+ assert(instr->coord_components >= 3u);
break;
case nir_tex_src_ddx:
srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_scoreboard.cpp
^
|
@@ -122,6 +122,8 @@
else if (inst->opcode == SHADER_OPCODE_BROADCAST &&
!devinfo->has_64bit_float && type_sz(t) >= 8)
return TGL_PIPE_INT;
+ else if (inst->opcode == FS_OPCODE_PACK_HALF_2x16_SPLIT)
+ return TGL_PIPE_FLOAT;
else if (type_sz(inst->dst.type) >= 8 || type_sz(t) >= 8 ||
is_dword_multiply)
return TGL_PIPE_LONG;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
^
|
@@ -60,6 +60,9 @@
for (unsigned i = 0; i < info->num_indices; i++)
dup->const_index[i] = intrin->const_index[i];
+ if (nir_intrinsic_has_access(intrin))
+ nir_intrinsic_set_access(dup, nir_intrinsic_access(intrin));
+
nir_intrinsic_set_align(dup, align, 0);
if (info->has_dest) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_nir_lower_shader_calls.c
^
|
@@ -124,143 +124,128 @@
nir_btd_stack_push_intel(b, offset);
}
-bool
-brw_nir_lower_shader_calls(nir_shader *shader)
+static bool
+lower_shader_calls_instr(struct nir_builder *b, nir_instr *instr, void *data)
{
- nir_function_impl *impl = nir_shader_get_entrypoint(shader);
- bool progress = false;
-
- nir_builder _b, *b = &_b;
- nir_builder_init(&_b, impl);
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
- nir_foreach_block_safe(block, impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
- if (call->intrinsic != nir_intrinsic_rt_trace_ray &&
- call->intrinsic != nir_intrinsic_rt_execute_callable &&
- call->intrinsic != nir_intrinsic_rt_resume)
- continue;
-
- b->cursor = nir_before_instr(instr);
-
- progress = true;
-
- switch (call->intrinsic) {
- case nir_intrinsic_rt_trace_ray: {
- store_resume_addr(b, call);
-
- nir_ssa_def *as_addr = call->src[0].ssa;
- nir_ssa_def *ray_flags = call->src[1].ssa;
- /* From the SPIR-V spec:
- *
- * "Only the 8 least-significant bits of Cull Mask are used by
- * this instruction - other bits are ignored.
- *
- * Only the 4 least-significant bits of SBT Offset and SBT
- * Stride are used by this instruction - other bits are
- * ignored.
- *
- * Only the 16 least-significant bits of Miss Index are used by
- * this instruction - other bits are ignored."
- */
- nir_ssa_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
- nir_ssa_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
- nir_ssa_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
- nir_ssa_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
- nir_ssa_def *ray_orig = call->src[6].ssa;
- nir_ssa_def *ray_t_min = call->src[7].ssa;
- nir_ssa_def *ray_dir = call->src[8].ssa;
- nir_ssa_def *ray_t_max = call->src[9].ssa;
-
- /* The hardware packet takes the address to the root node in the
- * acceleration structure, not the acceleration structure itself.
- * To find that, we have to read the root node offset from the
- * acceleration structure which is the first QWord.
- */
- nir_ssa_def *root_node_ptr =
- nir_iadd(b, as_addr, nir_load_global(b, as_addr, 256, 1, 64));
-
- /* The hardware packet requires an address to the first element of
- * the hit SBT.
- *
- * In order to calculate this, we must multiply the "SBT Offset"
- * provided to OpTraceRay by the SBT stride provided for the hit
- * SBT in the call to vkCmdTraceRay() and add that to the base
- * address of the hit SBT. This stride is not to be confused with
- * the "SBT Stride" provided to OpTraceRay which is in units of
- * this stride. It's a rather terrible overload of the word
- * "stride". The hardware docs calls the SPIR-V stride value the
- * "shader index multiplier" which is a much more sane name.
- */
- nir_ssa_def *hit_sbt_stride_B =
- nir_load_ray_hit_sbt_stride_intel(b);
- nir_ssa_def *hit_sbt_offset_B =
- nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
- nir_ssa_def *hit_sbt_addr =
- nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
- nir_u2u64(b, hit_sbt_offset_B));
-
- /* The hardware packet takes an address to the miss BSR. */
- nir_ssa_def *miss_sbt_stride_B =
- nir_load_ray_miss_sbt_stride_intel(b);
- nir_ssa_def *miss_sbt_offset_B =
- nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
- nir_ssa_def *miss_sbt_addr =
- nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
- nir_u2u64(b, miss_sbt_offset_B));
-
- struct brw_nir_rt_mem_ray_defs ray_defs = {
- .root_node_ptr = root_node_ptr,
- .ray_flags = nir_u2u16(b, ray_flags),
- .ray_mask = cull_mask,
- .hit_group_sr_base_ptr = hit_sbt_addr,
- .hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
- .miss_sr_ptr = miss_sbt_addr,
- .orig = ray_orig,
- .t_near = ray_t_min,
- .dir = ray_dir,
- .t_far = ray_t_max,
- .shader_index_multiplier = sbt_stride,
- };
- brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
- nir_trace_ray_initial_intel(b);
- break;
- }
-
- case nir_intrinsic_rt_execute_callable: {
- store_resume_addr(b, call);
-
- nir_ssa_def *sbt_offset32 =
- nir_imul(b, call->src[0].ssa,
- nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
- nir_ssa_def *sbt_addr =
- nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
- nir_u2u64(b, sbt_offset32));
- brw_nir_btd_spawn(b, sbt_addr);
- break;
- }
-
- default:
- unreachable("Invalid intrinsic");
- }
-
- nir_instr_remove(&call->instr);
- }
+ /* Leave nir_intrinsic_rt_resume to be lowered by
+ * brw_nir_lower_rt_intrinsics()
+ */
+ nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
+
+ switch (call->intrinsic) {
+ case nir_intrinsic_rt_trace_ray: {
+ b->cursor = nir_instr_remove(instr);
+
+ store_resume_addr(b, call);
+
+ nir_ssa_def *as_addr = call->src[0].ssa;
+ nir_ssa_def *ray_flags = call->src[1].ssa;
+ /* From the SPIR-V spec:
+ *
+ * "Only the 8 least-significant bits of Cull Mask are used by this
+ * instruction - other bits are ignored.
+ *
+ * Only the 4 least-significant bits of SBT Offset and SBT Stride are
+ * used by this instruction - other bits are ignored.
+ *
+ * Only the 16 least-significant bits of Miss Index are used by this
+ * instruction - other bits are ignored."
+ */
+ nir_ssa_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
+ nir_ssa_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
+ nir_ssa_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
+ nir_ssa_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
+ nir_ssa_def *ray_orig = call->src[6].ssa;
+ nir_ssa_def *ray_t_min = call->src[7].ssa;
+ nir_ssa_def *ray_dir = call->src[8].ssa;
+ nir_ssa_def *ray_t_max = call->src[9].ssa;
+
+ /* The hardware packet takes the address to the root node in the
+ * acceleration structure, not the acceleration structure itself. To
+ * find that, we have to read the root node offset from the acceleration
+ * structure which is the first QWord.
+ */
+ nir_ssa_def *root_node_ptr =
+ nir_iadd(b, as_addr, nir_load_global(b, as_addr, 256, 1, 64));
+
+ /* The hardware packet requires an address to the first element of the
+ * hit SBT.
+ *
+ * In order to calculate this, we must multiply the "SBT Offset"
+ * provided to OpTraceRay by the SBT stride provided for the hit SBT in
+ * the call to vkCmdTraceRay() and add that to the base address of the
+ * hit SBT. This stride is not to be confused with the "SBT Stride"
+ * provided to OpTraceRay which is in units of this stride. It's a
+ * rather terrible overload of the word "stride". The hardware docs
+ * calls the SPIR-V stride value the "shader index multiplier" which is
+ * a much more sane name.
+ */
+ nir_ssa_def *hit_sbt_stride_B =
+ nir_load_ray_hit_sbt_stride_intel(b);
+ nir_ssa_def *hit_sbt_offset_B =
+ nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
+ nir_ssa_def *hit_sbt_addr =
+ nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
+ nir_u2u64(b, hit_sbt_offset_B));
+
+ /* The hardware packet takes an address to the miss BSR. */
+ nir_ssa_def *miss_sbt_stride_B =
+ nir_load_ray_miss_sbt_stride_intel(b);
+ nir_ssa_def *miss_sbt_offset_B =
+ nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
+ nir_ssa_def *miss_sbt_addr =
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_nir_lower_storage_image.c
^
|
@@ -646,6 +646,9 @@
if (var->data.access & ACCESS_NON_READABLE)
return false;
+ if (var->data.image.format == PIPE_FORMAT_NONE)
+ return false;
+
/* If we have a matching typed format, then we have an actual image surface
* so we fall back and let the back-end emit a TXS for this.
*/
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_predicated_break.cpp
^
|
@@ -53,19 +53,79 @@
* and we can remove the BREAK instruction and predicate the WHILE.
*/
+#define MAX_NESTING 128
+
+struct loop_continue_tracking {
+ BITSET_WORD has_continue[BITSET_WORDS(MAX_NESTING)];
+ unsigned depth;
+};
+
+static void
+enter_loop(struct loop_continue_tracking *s)
+{
+ s->depth++;
+
+ /* Any loops deeper than that maximum nesting will just re-use the last
+ * flag. This simplifies most of the code. MAX_NESTING is chosen to be
+ * large enough that it is unlikely to occur. Even if it does, the
+ * optimization that uses this tracking is unlikely to make much
+ * difference.
+ */
+ if (s->depth < MAX_NESTING)
+ BITSET_CLEAR(s->has_continue, s->depth);
+}
+
+static void
+exit_loop(struct loop_continue_tracking *s)
+{
+ assert(s->depth > 0);
+ s->depth--;
+}
+
+static void
+set_continue(struct loop_continue_tracking *s)
+{
+ const unsigned i = MIN2(s->depth, MAX_NESTING - 1);
+
+ BITSET_SET(s->has_continue, i);
+}
+
+static bool
+has_continue(const struct loop_continue_tracking *s)
+{
+ const unsigned i = MIN2(s->depth, MAX_NESTING - 1);
+
+ return BITSET_TEST(s->has_continue, i);
+}
+
bool
opt_predicated_break(backend_shader *s)
{
bool progress = false;
+ struct loop_continue_tracking state = { {0, }, 0 };
foreach_block (block, s->cfg) {
- if (block->start_ip != block->end_ip)
- continue;
+ /* DO instructions, by definition, can only be found at the beginning of
+ * basic blocks.
+ */
+ backend_instruction *const do_inst = block->start();
- /* BREAK and CONTINUE instructions, by definition, can only be found at
- * the ends of basic blocks.
+ /* BREAK, CONTINUE, and WHILE instructions, by definition, can only be
+ * found at the ends of basic blocks.
*/
backend_instruction *jump_inst = block->end();
+
+ if (do_inst->opcode == BRW_OPCODE_DO)
+ enter_loop(&state);
+
+ if (jump_inst->opcode == BRW_OPCODE_CONTINUE)
+ set_continue(&state);
+ else if (jump_inst->opcode == BRW_OPCODE_WHILE)
+ exit_loop(&state);
+
+ if (block->start_ip != block->end_ip)
+ continue;
+
if (jump_inst->opcode != BRW_OPCODE_BREAK &&
jump_inst->opcode != BRW_OPCODE_CONTINUE)
continue;
@@ -119,13 +179,20 @@
/* Now look at the first instruction of the block following the BREAK. If
* it's a WHILE, we can delete the break, predicate the WHILE, and join
* the two basic blocks.
+ *
+ * This optimization can only be applied if the only instruction that
+ * can transfer control to the WHILE is the BREAK. If other paths can
+ * lead to the while, the flags may be in an unknown state, and the loop
+ * could terminate prematurely. This can occur if the loop contains a
+ * CONT instruction.
*/
bblock_t *while_block = earlier_block->next();
backend_instruction *while_inst = while_block->start();
if (jump_inst->opcode == BRW_OPCODE_BREAK &&
while_inst->opcode == BRW_OPCODE_WHILE &&
- while_inst->predicate == BRW_PREDICATE_NONE) {
+ while_inst->predicate == BRW_PREDICATE_NONE &&
+ !has_continue(&state)) {
jump_inst->remove(earlier_block);
while_inst->predicate = jump_inst->predicate;
while_inst->predicate_inverse = !jump_inst->predicate_inverse;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_reg.h
^
|
@@ -1238,6 +1238,28 @@
region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
BRW_HORIZONTAL_STRIDE_0)
+/**
+ * Return the size in bytes per data element of register \p reg on the
+ * corresponding register file.
+ */
+static inline unsigned
+element_sz(struct brw_reg reg)
+{
+ if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) {
+ return type_sz(reg.type);
+
+ } else if (reg.width == BRW_WIDTH_1 &&
+ reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
+ assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
+ return type_sz(reg.type) << (reg.vstride - 1);
+
+ } else {
+ assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
+ assert(reg.vstride == reg.hstride + reg.width);
+ return type_sz(reg.type) << (reg.hstride - 1);
+ }
+}
+
/* brw_packed_float.c */
int brw_float_to_vf(float f);
float brw_vf_to_float(unsigned char vf);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_shader.h
^
|
@@ -121,7 +121,7 @@
extern const char *const pred_ctrl_align16[16];
/* Per-thread scratch space is a power-of-two multiple of 1KB. */
-static inline int
+static inline unsigned
brw_get_scratch_size(int size)
{
return MAX2(1024, util_next_power_of_two(size));
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_vec4.cpp
^
|
@@ -2896,6 +2896,7 @@
INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_VS);
prog_data->base.base.stage = MESA_SHADER_VERTEX;
+ prog_data->base.base.total_scratch = 0;
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_vec4_gs_visitor.cpp
^
|
@@ -600,6 +600,7 @@
const bool debug_enabled = INTEL_DEBUG(DEBUG_GS);
prog_data->base.base.stage = MESA_SHADER_GEOMETRY;
+ prog_data->base.base.total_scratch = 0;
/* The GLSL linker will have already matched up GS inputs and the outputs
* of prior stages. The driver does extend VS outputs in some cases, but
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/brw_vec4_tcs.cpp
^
|
@@ -372,6 +372,7 @@
const unsigned *assembly;
vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
+ prog_data->base.base.total_scratch = 0;
nir->info.outputs_written = key->outputs_written;
nir->info.patch_outputs_written = key->patch_outputs_written;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/meson.build
^
|
@@ -143,7 +143,7 @@
prog_python, '@INPUT@',
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
capture : true,
)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/compiler/test_fs_scoreboard.cpp
^
|
@@ -104,6 +104,14 @@
return inst;
}
+static tgl_swsb
+tgl_swsb_testcase(unsigned regdist, unsigned sbid, enum tgl_sbid_mode mode)
+{
+ tgl_swsb swsb = tgl_swsb_sbid(mode, sbid);
+ swsb.regdist = regdist;
+ return swsb;
+}
+
bool operator ==(const tgl_swsb &a, const tgl_swsb &b)
{
return a.mode == b.mode &&
@@ -178,8 +186,7 @@
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
- EXPECT_EQ(instruction(block0, 2)->sched,
- (tgl_swsb { .regdist = 2, .sbid = 0, .mode = TGL_SBID_SET }));
+ EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
}
TEST_F(scoreboard_test, RAW_outoforder_inorder)
@@ -206,8 +213,7 @@
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
- EXPECT_EQ(instruction(block0, 2)->sched,
- (tgl_swsb { .regdist = 1, .sbid = 0, .mode = TGL_SBID_DST }));
+ EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(1, 0, TGL_SBID_DST));
}
TEST_F(scoreboard_test, RAW_outoforder_outoforder)
@@ -292,8 +298,7 @@
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
- EXPECT_EQ(instruction(block0, 2)->sched,
- (tgl_swsb { .regdist = 2, .sbid = 0, .mode = TGL_SBID_SET }));
+ EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
}
TEST_F(scoreboard_test, WAR_outoforder_inorder)
@@ -405,8 +410,7 @@
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
- EXPECT_EQ(instruction(block0, 2)->sched,
- (tgl_swsb { .regdist = 2, .sbid = 0, .mode = TGL_SBID_SET }));
+ EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
}
TEST_F(scoreboard_test, WAW_outoforder_inorder)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/dev/intel_device_info.c
^
|
@@ -1411,7 +1411,7 @@
* available for that PCI ID and then compute the real value from the
* subslice information we get from the kernel.
*/
- const uint32_t subslice_total = intel_device_info_eu_total(devinfo);
+ const uint32_t subslice_total = intel_device_info_subslice_total(devinfo);
const uint32_t eu_total = intel_device_info_eu_total(devinfo);
/* Logical CS threads = EUs per subslice * num threads per EU */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen12.xml
^
|
@@ -6450,6 +6450,7 @@
<instruction name="PIPE_CONTROL" bias="2" length="6" engine="render">
<field name="DWord Length" start="0" end="7" type="uint" default="4"/>
<field name="HDC Pipeline Flush Enable" start="9" end="9" type="bool"/>
+ <field name="L3 Read Only Cache Invalidation Enable" start="10" end="10" type="bool"/>
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="0"/>
<field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
<field name="Command SubType" start="27" end="28" type="uint" default="3"/>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen125.xml
^
|
@@ -6368,6 +6368,7 @@
<instruction name="PIPE_CONTROL" bias="2" length="6" engine="render">
<field name="DWord Length" start="0" end="7" type="uint" default="4"/>
<field name="HDC Pipeline Flush Enable" start="9" end="9" type="bool"/>
+ <field name="L3 Read Only Cache Invalidation Enable" start="10" end="10" type="bool"/>
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="0"/>
<field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
<field name="Command SubType" start="27" end="28" type="uint" default="3"/>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen4.xml
^
|
@@ -976,7 +976,7 @@
<field name="2D Command Opcode" start="22" end="28" type="uint" default="80"/>
<field name="Command Type" start="29" end="31" type="uint" default="2"/>
<field name="Destination Pitch" start="32" end="47" type="int"/>
- <field name="Raster Operation" start="48" end="55" type="int"/>
+ <field name="Raster Operation" start="48" end="55" type="uint"/>
<field name="Color Depth" start="56" end="57" type="uint" prefix="COLOR_DEPTH">
<value name="8 bit" value="0"/>
<value name="565" value="1"/>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen45.xml
^
|
@@ -1032,7 +1032,7 @@
<field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/>
<field name="Command Type" start="29" end="31" type="uint" default="2"/>
<field name="Destination Pitch" start="32" end="47" type="int"/>
- <field name="Raster Operation" start="48" end="55" type="int"/>
+ <field name="Raster Operation" start="48" end="55" type="uint"/>
<field name="Color Depth" start="56" end="57" type="uint" prefix="COLOR_DEPTH">
<value name="8 bit" value="0"/>
<value name="565" value="1"/>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen5.xml
^
|
@@ -1110,7 +1110,7 @@
<field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/>
<field name="Command Type" start="29" end="31" type="uint" default="2"/>
<field name="Destination Pitch" start="32" end="47" type="int"/>
- <field name="Raster Operation" start="48" end="55" type="int"/>
+ <field name="Raster Operation" start="48" end="55" type="uint"/>
<field name="Color Depth" start="56" end="57" type="uint" prefix="COLOR_DEPTH">
<value name="8 bit" value="0"/>
<value name="565" value="1"/>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/genxml/gen_rt.xml
^
|
@@ -3,8 +3,8 @@
<struct name="BINDLESS_SHADER_RECORD" length="2">
<field name="Offset To Local Arguments" start="0" end="2" type="uint"/>
<field name="Bindless Shader Dispatch Mode" start="4" end="4" type="uint">
- <value name="SIMD8" value="0"/>
- <value name="SIMD16" value="1"/>
+ <value name="RT_SIMD16" value="0"/>
+ <value name="RT_SIMD8" value="1"/>
</field>
<field name="Kernel Start Pointer" start="6" end="31" type="offset"/>
</struct>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/gen_perf.py
^
|
@@ -20,6 +20,8 @@
# IN THE SOFTWARE.
import argparse
+import builtins
+import collections
import os
import sys
import textwrap
@@ -392,10 +394,53 @@
return ""
if unit == 'hz':
unit = 'Hz'
- return " Unit: " + unit + "."
+ return "Unit: " + unit + "."
-def output_counter_report(set, counter, current_offset):
+counter_key_tuple = collections.namedtuple(
+ 'counter_key',
+ [
+ 'name',
+ 'description',
+ 'symbol_name',
+ 'mdapi_group',
+ 'semantic_type',
+ 'data_type',
+ 'units',
+ ]
+)
+
+
+def counter_key(counter):
+ return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields])
+
+
+def output_counter_struct(set, counter, idx,
+ name_to_idx, desc_to_idx,
+ symbol_name_to_idx, category_to_idx):
+ data_type = counter.data_type
+ data_type_uc = data_type.upper()
+
+ semantic_type = counter.semantic_type
+ if semantic_type in semantic_type_map:
+ semantic_type = semantic_type_map[semantic_type]
+
+ semantic_type_uc = semantic_type.upper()
+
+ c("[" + str(idx) + "] = {\n")
+ c_indent(3)
+ c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n")
+ c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n")
+ c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n")
+ c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n")
+ c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n")
+ c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n")
+ c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n")
+ c_outdent(3)
+ c("},\n")
+
+
+def output_counter_report(set, counter, counter_to_idx, current_offset):
data_type = counter.get('data_type')
data_type_uc = data_type.upper()
c_type = data_type
@@ -416,19 +461,15 @@
output_availability(set, availability, counter.get('name'))
c_indent(3)
- c("counter = &query->counters[query->n_counters++];\n")
- c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n")
- c("counter->name = \"" + counter.get('name') + "\";\n")
- c("counter->desc = \"" + counter.get('description') + desc_units(counter.get('units')) + "\";\n")
- c("counter->symbol_name = \"" + counter.get('symbol_name') + "\";\n")
- c("counter->category = \"" + counter.get('mdapi_group') + "\";\n")
- c("counter->type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ";\n")
- c("counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ";\n")
- c("counter->units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.get('units')) + ";\n")
- c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n")
+ key = counter_key(counter)
+ idx = str(counter_to_idx[key])
current_offset = pot_align(current_offset, sizeof(c_type))
- c("counter->offset = " + str(current_offset) + ";\n")
+
+ c("intel_perf_query_add_counter(query, " + idx + ", " +
+ str(current_offset) + ", " +
+ set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" +
+ set.read_funcs[counter.get('symbol_name')] + ");\n")
if availability:
c_outdent(3);
@@ -437,6 +478,29 @@
return current_offset + sizeof(c_type)
+def str_to_idx_table(strs):
+ sorted_strs = sorted(strs)
+
+ str_to_idx = collections.OrderedDict()
+ str_to_idx[sorted_strs[0]] = 0
+ previous = sorted_strs[0]
+
+ for i in range(1, len(sorted_strs)):
+ str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1
+ previous = sorted_strs[i]
+
+ return str_to_idx
+
+
+def output_str_table(name: str, str_to_idx):
+ c("\n")
+ c("static const char " + name + "[] = {\n")
+ c_indent(3)
+ c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items()))
+ c_outdent(3)
+ c("};\n")
+
+
register_types = {
'FLEX': 'flex_regs',
'NOA': 'mux_regs',
@@ -686,13 +750,32 @@
c(textwrap.dedent("""\
#include "perf/intel_perf.h"
+ #include "perf/intel_perf_setup.h"
+ """))
+ names = builtins.set()
+ descs = builtins.set()
+ symbol_names = builtins.set()
+ categories = builtins.set()
+ for gen in gens:
+ for set in gen.sets:
+ for counter in set.counters:
+ names.add(counter.get('name'))
+ symbol_names.add(counter.get('symbol_name'))
+ descs.add(counter.get('description') + " " + desc_units(counter.get('units')))
+ categories.add(counter.get('mdapi_group'))
+
+ name_to_idx = str_to_idx_table(names)
+ output_str_table("name", name_to_idx)
- #define MIN(a, b) ((a < b) ? (a) : (b))
- #define MAX(a, b) ((a > b) ? (a) : (b))
+ desc_to_idx = str_to_idx_table(descs)
+ output_str_table("desc", desc_to_idx)
+ symbol_name_to_idx = str_to_idx_table(symbol_names)
+ output_str_table("symbol_name", symbol_name_to_idx)
- """))
+ category_to_idx = str_to_idx_table(categories)
+ output_str_table("category", category_to_idx)
# Print out all equation functions.
for gen in gens:
@@ -701,6 +784,54 @@
output_counter_read(gen, set, counter)
output_counter_max(gen, set, counter)
+ c("\n")
+ c("static const struct intel_perf_query_counter_data counters[] = {\n")
+ c_indent(3)
+
+ counter_to_idx = collections.OrderedDict()
+ idx = 0
+ for gen in gens:
+ for set in gen.sets:
+ for counter in set.counters:
+ key = counter_key(counter)
+ if key not in counter_to_idx:
+ counter_to_idx[key] = idx
+ output_counter_struct(set, key, idx,
+ name_to_idx,
+ desc_to_idx,
+ symbol_name_to_idx,
+ category_to_idx)
+ idx += 1
+
+ c_outdent(3)
+ c("};\n\n")
+
+ c(textwrap.dedent("""\
+ typedef uint64_t (*oa_counter_read_func)(struct intel_perf_config *perf,
+ const struct intel_perf_query_info *query,
+ const struct intel_perf_query_result *results);
+ static void ATTRIBUTE_NOINLINE
+ intel_perf_query_add_counter(struct intel_perf_query_info *query,
+ int counter_idx, size_t offset,
+ uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64)
+ {
+ struct intel_perf_query_counter *dest = &query->counters[query->n_counters++];
+ const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
+
+ dest->name = &name[counter->name_idx];
+ dest->desc = &desc[counter->desc_idx];
+ dest->symbol_name = &symbol_name[counter->symbol_name_idx];
+ dest->category = &category[counter->category_idx];
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/intel_perf.h
^
|
@@ -52,7 +52,7 @@
#define INTEL_PERF_INVALID_CTX_ID (0xffffffff)
-enum intel_perf_counter_type {
+enum PACKED intel_perf_counter_type {
INTEL_PERF_COUNTER_TYPE_EVENT,
INTEL_PERF_COUNTER_TYPE_DURATION_NORM,
INTEL_PERF_COUNTER_TYPE_DURATION_RAW,
@@ -61,7 +61,7 @@
INTEL_PERF_COUNTER_TYPE_TIMESTAMP,
};
-enum intel_perf_counter_data_type {
+enum PACKED intel_perf_counter_data_type {
INTEL_PERF_COUNTER_DATA_TYPE_BOOL32,
INTEL_PERF_COUNTER_DATA_TYPE_UINT32,
INTEL_PERF_COUNTER_DATA_TYPE_UINT64,
@@ -69,7 +69,7 @@
INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE,
};
-enum intel_perf_counter_units {
+enum PACKED intel_perf_counter_units {
/* size */
INTEL_PERF_COUNTER_UNITS_BYTES,
|
[-]
[+]
|
Added |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/intel_perf_setup.h
^
|
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef INTEL_PERF_SETUP_H
+#define INTEL_PERF_SETUP_H
+
+#include "perf/intel_perf.h"
+
+#define MIN(a, b) ((a < b) ? (a) : (b))
+#define MAX(a, b) ((a > b) ? (a) : (b))
+
+static struct intel_perf_query_info *
+intel_query_alloc(struct intel_perf_config *perf, int ncounters)
+{
+ struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);
+ query->perf = perf;
+ query->kind = INTEL_PERF_QUERY_TYPE_OA;
+ query->n_counters = 0;
+ query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
+ query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters);
+ return query;
+}
+
+static struct intel_perf_query_info *
+hsw_query_alloc(struct intel_perf_config *perf, int ncounters)
+{
+ struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
+ query->oa_format = I915_OA_FORMAT_A45_B8_C8;
+ /* Accumulation buffer offsets... */
+ query->gpu_time_offset = 0;
+ query->a_offset = query->gpu_time_offset + 1;
+ query->b_offset = query->a_offset + 45;
+ query->c_offset = query->b_offset + 8;
+ query->perfcnt_offset = query->c_offset + 8;
+ query->rpstat_offset = query->perfcnt_offset + 2;
+ return query;
+}
+
+static struct intel_perf_query_info *
+bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
+{
+ struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
+ query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+ /* Accumulation buffer offsets... */
+ query->gpu_time_offset = 0;
+ query->gpu_clock_offset = query->gpu_time_offset + 1;
+ query->a_offset = query->gpu_clock_offset + 1;
+ query->b_offset = query->a_offset + 36;
+ query->c_offset = query->b_offset + 8;
+ query->perfcnt_offset = query->c_offset + 8;
+ query->rpstat_offset = query->perfcnt_offset + 2;
+ return query;
+}
+
+struct intel_perf_query_counter_data {
+ uint16_t name_idx;
+ uint16_t desc_idx;
+ uint16_t symbol_name_idx;
+ uint16_t category_idx;
+ enum intel_perf_counter_type type;
+ enum intel_perf_counter_data_type data_type;
+ enum intel_perf_counter_units units;
+};
+
+#endif /* INTEL_PERF_SETUP_H */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/perf/oa-hsw.xml
^
|
@@ -128,7 +128,7 @@
<counter name="GPU Busy"
symbol_name="GpuBusy"
underscore_name="gpu_busy"
- description="The percentage of time in which the GPU has being processing GPU commands."
+ description="The percentage of time in which the GPU has been processing GPU commands."
data_type="float"
max_equation="100"
units="percent"
@@ -1091,7 +1091,7 @@
<counter name="GPU Busy"
symbol_name="GpuBusy"
underscore_name="gpu_busy"
- description="The percentage of time in which the GPU has being processing GPU commands."
+ description="The percentage of time in which the GPU has been processing GPU commands."
data_type="float"
max_equation="100"
units="percent"
@@ -2200,7 +2200,7 @@
<counter name="GPU Busy"
symbol_name="GpuBusy"
underscore_name="gpu_busy"
- description="The percentage of time in which the GPU has being processing GPU commands."
+ description="The percentage of time in which the GPU has been processing GPU commands."
data_type="float"
max_equation="100"
units="percent"
@@ -3014,7 +3014,7 @@
<counter name="GPU Busy"
symbol_name="GpuBusy"
underscore_name="gpu_busy"
- description="The percentage of time in which the GPU has being processing GPU commands."
+ description="The percentage of time in which the GPU has been processing GPU commands."
data_type="float"
max_equation="100"
units="percent"
@@ -3830,7 +3830,7 @@
<counter name="GPU Busy"
symbol_name="GpuBusy"
underscore_name="gpu_busy"
- description="The percentage of time in which the GPU has being processing GPU commands."
+ description="The percentage of time in which the GPU has been processing GPU commands."
data_type="float"
max_equation="100"
units="percent"
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/tools/intel_noop_drm_shim.c
^
|
@@ -344,7 +344,8 @@
[DRM_I915_GEM_CONTEXT_GETPARAM] = i915_ioctl_gem_context_getparam,
[DRM_I915_GEM_CONTEXT_SETPARAM] = i915_ioctl_noop,
[DRM_I915_GEM_EXECBUFFER2] = i915_ioctl_noop,
- [DRM_I915_GEM_EXECBUFFER2_WR] = i915_ioctl_noop,
+ /* [DRM_I915_GEM_EXECBUFFER2_WR] = i915_ioctl_noop,
+ same value as DRM_I915_GEM_EXECBUFFER2. */
[DRM_I915_GEM_USERPTR] = i915_ioctl_gem_userptr,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_allocator.c
^
|
@@ -155,10 +155,8 @@
* userptr and send a chunk of it off to the GPU.
*/
table->fd = os_create_anonymous_file(BLOCK_POOL_MEMFD_SIZE, "state table");
- if (table->fd == -1) {
- result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
- goto fail_fd;
- }
+ if (table->fd == -1)
+ return vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
if (!u_vector_init(&table->cleanups, 8,
sizeof(struct anv_state_table_cleanup))) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_batch_chain.c
^
|
@@ -346,7 +346,7 @@
{
VkResult result;
- struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
+ struct anv_batch_bo *bbo = vk_zalloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (bbo == NULL)
return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -1795,8 +1795,8 @@
__builtin_ia32_mfence();
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) {
- for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
- __builtin_ia32_clflush((*bbo)->bo->map + i);
+ for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE)
+ __builtin_ia32_clflush((*bbo)->bo->map + l);
}
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_blorp.c
^
|
@@ -1416,7 +1416,6 @@
assert(src_image->vk.samples > 1);
assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
assert(dst_image->vk.samples == 1);
- assert(src_image->n_planes == dst_image->n_planes);
struct blorp_surf src_surf, dst_surf;
get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_cmd_buffer.c
^
|
@@ -275,7 +275,7 @@
result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk);
if (result != VK_SUCCESS)
- goto fail;
+ goto fail_alloc;
cmd_buffer->batch.status = VK_SUCCESS;
@@ -285,7 +285,7 @@
result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
if (result != VK_SUCCESS)
- goto fail;
+ goto fail_vk;
anv_state_stream_init(&cmd_buffer->surface_state_stream,
&device->surface_state_pool, 4096);
@@ -306,7 +306,9 @@
return VK_SUCCESS;
- fail:
+ fail_vk:
+ vk_command_buffer_finish(&cmd_buffer->vk);
+ fail_alloc:
vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
return result;
@@ -1583,7 +1585,7 @@
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- assert(attachmentCount < MAX_RTS);
+ assert(attachmentCount <= MAX_RTS);
uint8_t color_writes = 0;
for (uint32_t i = 0; i < attachmentCount; i++)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_descriptor_set.c
^
|
@@ -1702,8 +1702,36 @@
copy->srcArrayElement,
copy->descriptorCount);
} else {
- for (uint32_t j = 0; j < copy->descriptorCount; j++)
- dst_desc[j] = src_desc[j];
+ struct anv_buffer_view *dst_bview =
+ &dst->buffer_views[dst_layout->buffer_view_index +
+ copy->dstArrayElement];
+ struct anv_buffer_view *src_bview =
+ &src->buffer_views[src_layout->buffer_view_index +
+ copy->srcArrayElement];
+ /* If ANV_DESCRIPTOR_BUFFER_VIEW is present in the source descriptor,
+ * it means we're using an anv_buffer_view allocated by the source
+ * descriptor set. In that case we want to careful copy it because
+ * his lifecycle is tied to the source descriptor set, not the
+ * destination descriptor set.
+ */
+ if (src_layout->data & ANV_DESCRIPTOR_BUFFER_VIEW) {
+ assert(dst_layout->data & ANV_DESCRIPTOR_BUFFER_VIEW);
+ for (uint32_t j = 0; j < copy->descriptorCount; j++) {
+ dst_bview[j].format = src_bview[j].format;
+ dst_bview[j].range = src_bview[j].range;
+ dst_bview[j].address = src_bview[j].address;
+
+ memcpy(dst_bview[j].surface_state.map,
+ src_bview[j].surface_state.map,
+ src_bview[j].surface_state.alloc_size);
+
+ dst_desc[j].type = src_desc[j].type;
+ dst_desc[j].buffer_view = &dst_bview[j];
+ }
+ } else {
+ for (uint32_t j = 0; j < copy->descriptorCount; j++)
+ dst_desc[j] = src_desc[j];
+ }
unsigned desc_size = anv_descriptor_size(src_layout);
if (desc_size > 0) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_device.c
^
|
@@ -1973,12 +1973,12 @@
scalar_stages |= mesa_to_vk_shader_stage(stage);
}
if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
- scalar_stages |= MESA_SHADER_RAYGEN |
- MESA_SHADER_ANY_HIT |
- MESA_SHADER_CLOSEST_HIT |
- MESA_SHADER_MISS |
- MESA_SHADER_INTERSECTION |
- MESA_SHADER_CALLABLE;
+ scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
+ VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
+ VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
+ VK_SHADER_STAGE_MISS_BIT_KHR |
+ VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
+ VK_SHADER_STAGE_CALLABLE_BIT_KHR;
}
p->subgroupSupportedStages = scalar_stages;
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_image.c
^
|
@@ -635,20 +635,6 @@
return VK_SUCCESS;
}
- if (device->info.ver >= 12 &&
- (image->vk.array_layers > 1 || image->vk.mip_levels)) {
- /* HSD 14010672564: On TGL, if a block of fragment shader outputs
- * match the surface's clear color, the HW may convert them to
- * fast-clears. Anv only does clear color tracking for the first
- * slice unfortunately. Disable CCS until anv gains more clear color
- * tracking abilities.
- */
- anv_perf_warn(VK_LOG_OBJS(&image->vk.base),
- "HW may put fast-clear blocks on more slices than SW "
- "currently tracks. Not allocating a CCS buffer.");
- return VK_SUCCESS;
- }
-
if (INTEL_DEBUG(DEBUG_NO_RBC))
return VK_SUCCESS;
@@ -2044,6 +2030,20 @@
bool aux_supported = true;
bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage);
+ const struct isl_format_layout *fmtl =
+ isl_format_get_layout(image->planes[plane].primary_surface.isl.format);
+
+ /* Disabling CCS for the following case avoids failures in:
+ * - dEQP-VK.drm_format_modifiers.export_import.*
+ * - dEQP-VK.synchronization*
+ */
+ if (usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT) && fmtl->bpb <= 16 &&
+ aux_usage == ISL_AUX_USAGE_CCS_E && devinfo->ver >= 12) {
+ aux_supported = false;
+ clear_supported = false;
+ }
+
if ((usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && !read_only) {
/* This image could be used as both an input attachment and a render
* target (depth, stencil, or color) at the same time and this can cause
@@ -2265,6 +2265,17 @@
case ISL_AUX_STATE_COMPRESSED_CLEAR:
if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
return ANV_FAST_CLEAR_DEFAULT_VALUE;
+ } else if (devinfo->ver >= 12 &&
+ image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) {
+ /* On TGL, if a block of fragment shader outputs match the surface's
+ * clear color, the HW may convert them to fast-clears (see HSD
+ * 14010672564). This can lead to rendering corruptions if not
+ * handled properly. We restrict the clear color to zero to avoid
+ * issues that can occur with:
+ * - Texture view rendering (including blorp_copy calls)
+ * - Images with multiple levels or array layers
+ */
+ return ANV_FAST_CLEAR_DEFAULT_VALUE;
} else if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
/* When we're in a render pass we have the clear color data from the
* VkRenderPassBeginInfo and we can use arbitrary clear colors. They
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_pass.c
^
|
@@ -107,7 +107,11 @@
all_usage |= subpass_att->usage;
- if (pass_att->first_subpass_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+ /* first_subpass_layout only applies to color and depth.
+ * See genX(cmd_buffer_setup_attachments)
+ */
+ if (vk_format_aspects(pass_att->format) != VK_IMAGE_ASPECT_STENCIL_BIT &&
+ pass_att->first_subpass_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
pass_att->first_subpass_layout = subpass_att->layout;
assert(pass_att->first_subpass_layout != VK_IMAGE_LAYOUT_UNDEFINED);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_pipeline.c
^
|
@@ -233,6 +233,12 @@
*/
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
+ const nir_opt_access_options opt_access_options = {
+ .is_vulkan = true,
+ .infer_non_readable = true,
+ };
+ NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
+
/* Split member structs. We do this before lower_io_to_temporaries so that
* it doesn't lower system values to temporaries by accident.
*/
@@ -1157,6 +1163,24 @@
if (deleted_output)
nir_fixup_deref_modes(stage->nir);
+ /* Initially the valid outputs value is based off the renderpass color
+ * attachments (see populate_wm_prog_key()), now that we've potentially
+ * deleted variables that map to unused attachments, we need to update the
+ * valid outputs for the backend compiler based on what output variables
+ * are actually used. */
+ stage->key.wm.color_outputs_valid = 0;
+ nir_foreach_shader_out_variable_safe(var, stage->nir) {
+ if (var->data.location < FRAG_RESULT_DATA0)
+ continue;
+
+ const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
+ const unsigned array_len =
+ glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
+ assert(rt + array_len <= MAX_RTS);
+
+ stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
+ }
+
/* We stored the number of subpass color attachments in nr_color_regions
* when calculating the key for caching. Now that we've computed the bind
* map, we can reduce this to the actual max before we go into the back-end
@@ -2183,8 +2207,7 @@
}
const VkPipelineMultisampleStateCreateInfo *ms_info =
- pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
- pCreateInfo->pMultisampleState;
+ raster_discard ? NULL : pCreateInfo->pMultisampleState;
if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
@@ -2214,8 +2237,7 @@
}
if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
- if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
- uses_color_att) {
+ if (!raster_discard && uses_color_att) {
assert(pCreateInfo->pColorBlendState);
const VkPipelineColorWriteCreateInfoEXT *color_write_info =
vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/anv_private.h
^
|
@@ -3487,7 +3487,8 @@
\
(struct GFX_BINDLESS_SHADER_RECORD) { \
.OffsetToLocalArguments = (local_arg_offset) / 8, \
- .BindlessShaderDispatchMode = prog_data->simd_size / 16, \
+ .BindlessShaderDispatchMode = \
+ prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \
.KernelStartPointer = bin->kernel.offset, \
}; \
})
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/genX_cmd_buffer.c
^
|
@@ -1179,9 +1179,12 @@
UNUSED const uint32_t image_layers = MAX2(image->vk.array_layers, max_depth);
assert((uint64_t)base_layer + layer_count <= image_layers);
assert(last_level_num <= image->vk.mip_levels);
- /* The spec disallows these final layouts. */
- assert(final_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
- final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED);
+ /* If there is a layout transfer, the final layout cannot be undefined or
+ * preinitialized (VUID-VkImageMemoryBarrier-newLayout-01198).
+ */
+ assert(initial_layout == final_layout ||
+ (final_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
+ final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED));
const struct isl_drm_modifier_info *isl_mod_info =
image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT
? isl_drm_modifier_get_info(image->vk.drm_format_mod)
@@ -1410,14 +1413,22 @@
anv_layout_to_aux_usage(devinfo, image, aspect, 0, initial_layout);
enum isl_aux_usage final_aux_usage =
anv_layout_to_aux_usage(devinfo, image, aspect, 0, final_layout);
+ enum anv_fast_clear_type initial_fast_clear =
+ anv_layout_to_fast_clear_type(devinfo, image, aspect, initial_layout);
+ enum anv_fast_clear_type final_fast_clear =
+ anv_layout_to_fast_clear_type(devinfo, image, aspect, final_layout);
/* We must override the anv_layout_to_* functions because they are unaware of
* acquire/release direction.
*/
if (mod_acquire) {
initial_aux_usage = isl_mod_info->aux_usage;
+ initial_fast_clear = isl_mod_info->supports_clear_color ?
+ initial_fast_clear : ANV_FAST_CLEAR_NONE;
} else if (mod_release) {
final_aux_usage = isl_mod_info->aux_usage;
+ final_fast_clear = isl_mod_info->supports_clear_color ?
+ final_fast_clear : ANV_FAST_CLEAR_NONE;
}
/* The current code assumes that there is no mixing of CCS_E and CCS_D.
@@ -1440,10 +1451,6 @@
/* If the initial layout supports more fast clear than the final layout
* then we need at least a partial resolve.
*/
- const enum anv_fast_clear_type initial_fast_clear =
- anv_layout_to_fast_clear_type(devinfo, image, aspect, initial_layout);
- const enum anv_fast_clear_type final_fast_clear =
- anv_layout_to_fast_clear_type(devinfo, image, aspect, final_layout);
if (final_fast_clear < initial_fast_clear)
resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE;
@@ -1703,7 +1710,6 @@
else
continue;
- state->attachments[att].color.state = next_state;
next_state.offset += ss_stride;
next_state.map += ss_stride;
}
@@ -1820,7 +1826,7 @@
const struct anv_image_view * const iview =
anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
- if (iview) {
+ if (iview && (iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
VkImageLayout layout =
cmd_buffer->state.subpass->depth_stencil_attachment->layout;
@@ -2360,6 +2366,13 @@
bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
pipe.ConstantCacheInvalidationEnable =
bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
+#if GFX_VER >= 12
+ /* Invalidates the L3 cache part in which index & vertex data is loaded
+ * when VERTEX_BUFFER_STATE::L3BypassDisable is set.
+ */
+ pipe.L3ReadOnlyCacheInvalidationEnable =
+ bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+#endif
pipe.VFCacheInvalidationEnable =
bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
pipe.TextureCacheInvalidationEnable =
@@ -3752,8 +3765,10 @@
cmd_buffer_emit_clip(cmd_buffer);
- if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE)
- cmd_buffer_emit_streamout(cmd_buffer);
+ if (pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
+ if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | ANV_CMD_DIRTY_XFB_ENABLE))
+ cmd_buffer_emit_streamout(cmd_buffer);
+ }
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
gfx8_cmd_buffer_emit_viewport(cmd_buffer);
@@ -4197,6 +4212,9 @@
genX(cmd_buffer_flush_state)(cmd_buffer);
+ if (cmd_buffer->state.conditional_render_enabled)
+ genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
if (vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance)
emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
@@ -4231,6 +4249,7 @@
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.IndirectParameterEnable = true;
+ prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
}
@@ -6509,6 +6528,7 @@
&cmd_state->attachments[dst_att];
if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+ (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
/* MSAA resolves sample from the source attachment. Transition the
@@ -6575,6 +6595,7 @@
}
if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
src_state->current_stencil_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/genX_query.c
^
|
@@ -723,7 +723,6 @@
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
- case VK_QUERY_TYPE_TIMESTAMP:
for (uint32_t i = 0; i < queryCount; i++) {
emit_query_pc_availability(cmd_buffer,
anv_query_address(pool, firstQuery + i),
@@ -731,6 +730,23 @@
}
break;
+ case VK_QUERY_TYPE_TIMESTAMP: {
+ for (uint32_t i = 0; i < queryCount; i++) {
+ emit_query_pc_availability(cmd_buffer,
+ anv_query_address(pool, firstQuery + i),
+ false);
+ }
+
+ /* Add a CS stall here to make sure the PIPE_CONTROL above has
+ * completed. Otherwise some timestamps written later with MI_STORE_*
+ * commands might race with the PIPE_CONTROL in the loop above.
+ */
+ anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_CS_STALL_BIT,
+ "vkCmdResetQueryPool of timestamps");
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+ break;
+ }
+
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
struct mi_builder b;
@@ -1244,6 +1260,7 @@
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) {
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
mi_reg64(TIMESTAMP));
+ emit_query_mi_availability(&b, query_addr, true);
} else {
/* Everything else is bottom-of-pipe */
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
@@ -1257,9 +1274,9 @@
if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4)
pc.CommandStreamerStallEnable = true;
}
+ emit_query_pc_availability(cmd_buffer, query_addr, true);
}
- emit_query_pc_availability(cmd_buffer, query_addr, true);
/* When multiview is active the spec requires that N consecutive query
* indices are used, where N is the number of active views in the subpass.
@@ -1361,6 +1378,7 @@
*/
if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) {
anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_TILE_CACHE_FLUSH_BIT |
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
"CopyQueryPoolResults");
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/intel/vulkan/genX_state.c
^
|
@@ -156,11 +156,12 @@
init_render_queue_state(struct anv_queue *queue)
{
struct anv_device *device = queue->device;
- struct anv_batch batch;
-
uint32_t cmds[64];
- batch.start = batch.next = cmds;
- batch.end = (void *) cmds + sizeof(cmds);
+ struct anv_batch batch = {
+ .start = cmds,
+ .next = cmds,
+ .end = (void *) cmds + sizeof(cmds),
+ };
anv_batch_emit(&batch, GENX(PIPELINE_SELECT), ps) {
#if GFX_VER >= 9
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/loader/meson.build
^
|
@@ -41,11 +41,19 @@
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
]
-if get_option('prefer-iris')
+_prefer_iris = get_option('prefer-iris')
+if _prefer_iris == 'auto'
+ _prefer_iris = amber ? 'false' : 'true'
+endif
+if _prefer_iris == 'true'
loader_c_args += ['-DPREFER_IRIS']
endif
-if get_option('prefer-crocus')
+_prefer_crocus = get_option('prefer-crocus')
+if _prefer_crocus == 'auto'
+ _prefer_crocus = 'false'
+endif
+if _prefer_crocus == 'true'
loader_c_args += ['-DPREFER_CROCUS']
endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mapi/glapi/gen/static_data.py
^
|
@@ -1689,8 +1689,6 @@
"VertexAttribs2hvNV": 1653,
"VertexAttribs3hvNV": 1654,
"VertexAttribs4hvNV": 1655,
- "ClearTexImageEXT": 1656,
- "ClearTexSubImageEXT": 1657,
}
functions = [
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/i915/intel_context.c
^
|
@@ -111,7 +111,8 @@
__DRIdrawable *driDrawable = driContext->driDrawablePriv;
__DRIscreen *const screen = intel->intelScreen->driScrnPriv;
- if (intel->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (intel->front_buffer_dirty && ctx->DrawBuffer &&
+ _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
if (flushFront(screen) &&
driDrawable &&
driDrawable->loaderPrivate) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/i965/brw_context.c
^
|
@@ -239,7 +239,8 @@
__DRIdrawable *driDrawable = driContext->driDrawablePriv;
__DRIscreen *const dri_screen = brw->screen->driScrnPriv;
- if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (brw->front_buffer_dirty && ctx->DrawBuffer &&
+ _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
if (flushFront(dri_screen) && driDrawable &&
driDrawable->loaderPrivate) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/i965/brw_extensions.c
^
|
@@ -87,7 +87,6 @@
ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true;
ctx->Extensions.ARB_texture_non_power_of_two = true;
ctx->Extensions.ARB_texture_rg = true;
- ctx->Extensions.ARB_texture_rgb10_a2ui = true;
ctx->Extensions.ARB_vertex_program = true;
ctx->Extensions.ARB_vertex_shader = true;
ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
@@ -112,7 +111,6 @@
ctx->Extensions.EXT_texture_array = true;
ctx->Extensions.EXT_texture_env_dot3 = true;
ctx->Extensions.EXT_texture_filter_anisotropic = true;
- ctx->Extensions.EXT_texture_integer = true;
ctx->Extensions.EXT_texture_norm16 = true;
ctx->Extensions.EXT_texture_shared_exponent = true;
ctx->Extensions.EXT_texture_snorm = true;
@@ -202,8 +200,10 @@
ctx->Extensions.ARB_texture_cube_map_array = true;
ctx->Extensions.ARB_texture_gather = true;
ctx->Extensions.ARB_texture_multisample = true;
+ ctx->Extensions.ARB_texture_rgb10_a2ui = true;
ctx->Extensions.ARB_uniform_buffer_object = true;
ctx->Extensions.EXT_gpu_shader4 = true;
+ ctx->Extensions.EXT_texture_integer = true;
ctx->Extensions.EXT_texture_shadow_lod = true;
if (ctx->API != API_OPENGL_COMPAT ||
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/meson.build
^
|
@@ -51,7 +51,7 @@
if _dri_drivers != []
libmesa_dri_drivers = shared_library(
- 'mesa_dri_drivers',
+ '@0@_dri_drivers'.format(glvnd_vendor_name),
[],
link_whole : _dri_drivers,
link_with : [
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/nouveau/nouveau_driver.c
^
|
@@ -71,7 +71,7 @@
PUSH_KICK(push);
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer) &&
+ if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer) &&
ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
__DRIscreen *screen = nctx->screen->dri_screen;
const __DRIdri2LoaderExtension *dri2 = screen->dri2.loader;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/drivers/dri/radeon/radeon_common.c
^
|
@@ -544,7 +544,8 @@
rcommonFlushCmdBuf(radeon, __func__);
flush_front:
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && radeon->front_buffer_dirty) {
+ if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer) &&
+ radeon->front_buffer_dirty) {
__DRIscreen *const screen = radeon->radeonScreen->driScreen;
if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/dlist.c
^
|
@@ -816,6 +816,7 @@
free(node->cold->current_data);
node->cold->current_data = NULL;
+ free(node->cold->prims);
free(node->cold);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/fbobject.c
^
|
@@ -5192,9 +5192,19 @@
discard_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
GLsizei numAttachments, const GLenum *attachments)
{
+ GLenum depth_att, stencil_att;
+
if (!ctx->Driver.DiscardFramebuffer)
return;
+ if (_mesa_is_user_fbo(fb)) {
+ depth_att = GL_DEPTH_ATTACHMENT;
+ stencil_att = GL_STENCIL_ATTACHMENT;
+ } else {
+ depth_att = GL_DEPTH;
+ stencil_att = GL_STENCIL;
+ }
+
for (int i = 0; i < numAttachments; i++) {
struct gl_renderbuffer_attachment *att =
get_fb_attachment(ctx, fb, attachments[i]);
@@ -5207,12 +5217,12 @@
* Driver.DiscardFramebuffer if the attachments list includes both depth
* and stencil and they both point at the same renderbuffer.
*/
- if ((attachments[i] == GL_DEPTH_ATTACHMENT ||
- attachments[i] == GL_STENCIL_ATTACHMENT) &&
+ if ((attachments[i] == depth_att ||
+ attachments[i] == stencil_att) &&
(!att->Renderbuffer ||
att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL)) {
- GLenum other_format = (attachments[i] == GL_DEPTH_ATTACHMENT ?
- GL_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT);
+ GLenum other_format = (attachments[i] == depth_att ?
+ stencil_att : depth_att);
bool has_both = false;
for (int j = 0; j < numAttachments; j++) {
if (attachments[j] == other_format) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/ffvertex_prog.c
^
|
@@ -918,19 +918,19 @@
static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
- GLuint side, GLuint property )
+ GLuint side, GLuint property, bool *is_state_light )
{
GLuint attrib = material_attrib(side, property);
if (p->materials & (1<<attrib)) {
struct ureg light_value =
register_param3(p, STATE_LIGHT, light, property);
- struct ureg material_value = get_material(p, side, property);
- struct ureg tmp = get_temp(p);
- emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
- return tmp;
+ *is_state_light = true;
+ return light_value;
}
- else
+ else {
+ *is_state_light = false;
return register_param3(p, STATE_LIGHTPROD, light, attrib);
+ }
}
@@ -1112,20 +1112,28 @@
*/
struct ureg lightprod_front[MAX_LIGHTS][3];
struct ureg lightprod_back[MAX_LIGHTS][3];
+ bool lightprod_front_is_state_light[MAX_LIGHTS][3];
+ bool lightprod_back_is_state_light[MAX_LIGHTS][3];
for (i = 0; i < MAX_LIGHTS; i++) {
if (p->state->unit[i].light_enabled) {
- lightprod_front[i][0] = get_lightprod(p, i, 0, STATE_AMBIENT);
+ lightprod_front[i][0] = get_lightprod(p, i, 0, STATE_AMBIENT,
+ &lightprod_front_is_state_light[i][0]);
if (twoside)
- lightprod_back[i][0] = get_lightprod(p, i, 1, STATE_AMBIENT);
+ lightprod_back[i][0] = get_lightprod(p, i, 1, STATE_AMBIENT,
+ &lightprod_back_is_state_light[i][0]);
- lightprod_front[i][1] = get_lightprod(p, i, 0, STATE_DIFFUSE);
+ lightprod_front[i][1] = get_lightprod(p, i, 0, STATE_DIFFUSE,
+ &lightprod_front_is_state_light[i][1]);
if (twoside)
- lightprod_back[i][1] = get_lightprod(p, i, 1, STATE_DIFFUSE);
+ lightprod_back[i][1] = get_lightprod(p, i, 1, STATE_DIFFUSE,
+ &lightprod_back_is_state_light[i][1]);
- lightprod_front[i][2] = get_lightprod(p, i, 0, STATE_SPECULAR);
+ lightprod_front[i][2] = get_lightprod(p, i, 0, STATE_SPECULAR,
+ &lightprod_front_is_state_light[i][2]);
if (twoside)
- lightprod_back[i][2] = get_lightprod(p, i, 1, STATE_SPECULAR);
+ lightprod_back[i][2] = get_lightprod(p, i, 1, STATE_SPECULAR,
+ &lightprod_back_is_state_light[i][2]);
}
}
@@ -1209,6 +1217,18 @@
/* Front face lighting:
*/
{
+ /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in
+ * get_lightprod to avoid using too many temps.
+ */
+ for (int j = 0; j < 3; j++) {
+ if (lightprod_front_is_state_light[i][j]) {
+ struct ureg material_value = get_material(p, 0, STATE_AMBIENT + j);
+ struct ureg tmp = get_temp(p);
+ emit_op2(p, OPCODE_MUL, tmp, 0, lightprod_front[i][j], material_value);
+ lightprod_front[i][j] = tmp;
+ }
+ }
+
struct ureg ambient = lightprod_front[i][0];
struct ureg diffuse = lightprod_front[i][1];
struct ureg specular = lightprod_front[i][2];
@@ -1264,6 +1284,18 @@
/* Back face lighting:
*/
if (twoside) {
+ /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in
+ * get_lightprod to avoid using too many temps.
+ */
+ for (int j = 0; j < 3; j++) {
+ if (lightprod_back_is_state_light[i][j]) {
+ struct ureg material_value = get_material(p, 1, STATE_AMBIENT + j);
+ struct ureg tmp = get_temp(p);
+ emit_op2(p, OPCODE_MUL, tmp, 1, lightprod_back[i][j], material_value);
+ lightprod_back[i][j] = tmp;
+ }
+ }
+
struct ureg ambient = lightprod_back[i][0];
struct ureg diffuse = lightprod_back[i][1];
struct ureg specular = lightprod_back[i][2];
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/program_resource.c
^
|
@@ -97,7 +97,6 @@
_mesa_enum_to_string(pname), params);
}
- unsigned i;
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glGetProgramInterfaceiv");
@@ -117,125 +116,7 @@
return;
}
- /* Validate pname against interface. */
- switch(pname) {
- case GL_ACTIVE_RESOURCES:
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++)
- if (shProg->data->ProgramResourceList[i].Type == programInterface)
- (*params)++;
- break;
- case GL_MAX_NAME_LENGTH:
- if (programInterface == GL_ATOMIC_COUNTER_BUFFER ||
- programInterface == GL_TRANSFORM_FEEDBACK_BUFFER) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetProgramInterfaceiv(%s pname %s)",
- _mesa_enum_to_string(programInterface),
- _mesa_enum_to_string(pname));
- return;
- }
- /* Name length consists of base name, 3 additional chars '[0]' if
- * resource is an array and finally 1 char for string terminator.
- */
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
- if (shProg->data->ProgramResourceList[i].Type != programInterface)
- continue;
- unsigned len =
- _mesa_program_resource_name_len(&shProg->data->ProgramResourceList[i]);
- *params = MAX2(*params, len + 1);
- }
- break;
- case GL_MAX_NUM_ACTIVE_VARIABLES:
- switch (programInterface) {
- case GL_UNIFORM_BLOCK:
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
- if (shProg->data->ProgramResourceList[i].Type == programInterface) {
- struct gl_uniform_block *block =
- (struct gl_uniform_block *)
- shProg->data->ProgramResourceList[i].Data;
- *params = MAX2(*params, block->NumUniforms);
- }
- }
- break;
- case GL_SHADER_STORAGE_BLOCK:
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
- if (shProg->data->ProgramResourceList[i].Type == programInterface) {
- struct gl_uniform_block *block =
- (struct gl_uniform_block *)
- shProg->data->ProgramResourceList[i].Data;
- GLint block_params = 0;
- for (unsigned j = 0; j < block->NumUniforms; j++) {
- struct gl_program_resource *uni =
- _mesa_program_resource_find_active_variable(
- shProg,
- GL_BUFFER_VARIABLE,
- block,
- j);
- if (!uni)
- continue;
- block_params++;
- }
- *params = MAX2(*params, block_params);
- }
- }
- break;
- case GL_ATOMIC_COUNTER_BUFFER:
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
- if (shProg->data->ProgramResourceList[i].Type == programInterface) {
- struct gl_active_atomic_buffer *buffer =
- (struct gl_active_atomic_buffer *)
- shProg->data->ProgramResourceList[i].Data;
- *params = MAX2(*params, buffer->NumUniforms);
- }
- }
- break;
- case GL_TRANSFORM_FEEDBACK_BUFFER:
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
- if (shProg->data->ProgramResourceList[i].Type == programInterface) {
- struct gl_transform_feedback_buffer *buffer =
- (struct gl_transform_feedback_buffer *)
- shProg->data->ProgramResourceList[i].Data;
- *params = MAX2(*params, buffer->NumVaryings);
- }
- }
- break;
- default:
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetProgramInterfaceiv(%s pname %s)",
- _mesa_enum_to_string(programInterface),
- _mesa_enum_to_string(pname));
- }
- break;
- case GL_MAX_NUM_COMPATIBLE_SUBROUTINES:
- switch (programInterface) {
- case GL_VERTEX_SUBROUTINE_UNIFORM:
- case GL_FRAGMENT_SUBROUTINE_UNIFORM:
- case GL_GEOMETRY_SUBROUTINE_UNIFORM:
- case GL_COMPUTE_SUBROUTINE_UNIFORM:
- case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
- case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: {
- for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
- if (shProg->data->ProgramResourceList[i].Type == programInterface) {
- struct gl_uniform_storage *uni =
- (struct gl_uniform_storage *)
- shProg->data->ProgramResourceList[i].Data;
- *params = MAX2(*params, uni->num_compatible_subroutines);
- }
- }
- break;
- }
-
- default:
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetProgramInterfaceiv(%s pname %s)",
- _mesa_enum_to_string(programInterface),
- _mesa_enum_to_string(pname));
- }
- break;
- default:
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetProgramInterfaceiv(pname %s)",
- _mesa_enum_to_string(pname));
- }
+ _mesa_get_program_interfaceiv(shProg, programInterface, pname, params);
}
static bool
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shader_query.cpp
^
|
@@ -1668,6 +1668,135 @@
*length = amount;
}
+extern void
+_mesa_get_program_interfaceiv(struct gl_shader_program *shProg,
+ GLenum programInterface, GLenum pname,
+ GLint *params)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ unsigned i;
+
+ /* Validate pname against interface. */
+ switch(pname) {
+ case GL_ACTIVE_RESOURCES:
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++)
+ if (shProg->data->ProgramResourceList[i].Type == programInterface)
+ (*params)++;
+ break;
+ case GL_MAX_NAME_LENGTH:
+ if (programInterface == GL_ATOMIC_COUNTER_BUFFER ||
+ programInterface == GL_TRANSFORM_FEEDBACK_BUFFER) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramInterfaceiv(%s pname %s)",
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
+ return;
+ }
+ /* Name length consists of base name, 3 additional chars '[0]' if
+ * resource is an array and finally 1 char for string terminator.
+ */
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
+ if (shProg->data->ProgramResourceList[i].Type != programInterface)
+ continue;
+ unsigned len =
+ _mesa_program_resource_name_len(&shProg->data->ProgramResourceList[i]);
+ *params = MAX2((unsigned)*params, len + 1);
+ }
+ break;
+ case GL_MAX_NUM_ACTIVE_VARIABLES:
+ switch (programInterface) {
+ case GL_UNIFORM_BLOCK:
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
+ if (shProg->data->ProgramResourceList[i].Type == programInterface) {
+ struct gl_uniform_block *block =
+ (struct gl_uniform_block *)
+ shProg->data->ProgramResourceList[i].Data;
+ *params = MAX2((unsigned)*params, block->NumUniforms);
+ }
+ }
+ break;
+ case GL_SHADER_STORAGE_BLOCK:
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
+ if (shProg->data->ProgramResourceList[i].Type == programInterface) {
+ struct gl_uniform_block *block =
+ (struct gl_uniform_block *)
+ shProg->data->ProgramResourceList[i].Data;
+ GLint block_params = 0;
+ for (unsigned j = 0; j < block->NumUniforms; j++) {
+ struct gl_program_resource *uni =
+ _mesa_program_resource_find_active_variable(
+ shProg,
+ GL_BUFFER_VARIABLE,
+ block,
+ j);
+ if (!uni)
+ continue;
+ block_params++;
+ }
+ *params = MAX2(*params, block_params);
+ }
+ }
+ break;
+ case GL_ATOMIC_COUNTER_BUFFER:
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
+ if (shProg->data->ProgramResourceList[i].Type == programInterface) {
+ struct gl_active_atomic_buffer *buffer =
+ (struct gl_active_atomic_buffer *)
+ shProg->data->ProgramResourceList[i].Data;
+ *params = MAX2((unsigned)*params, buffer->NumUniforms);
+ }
+ }
+ break;
+ case GL_TRANSFORM_FEEDBACK_BUFFER:
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
+ if (shProg->data->ProgramResourceList[i].Type == programInterface) {
+ struct gl_transform_feedback_buffer *buffer =
+ (struct gl_transform_feedback_buffer *)
+ shProg->data->ProgramResourceList[i].Data;
+ *params = MAX2((unsigned)*params, buffer->NumVaryings);
+ }
+ }
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramInterfaceiv(%s pname %s)",
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
+ }
+ break;
+ case GL_MAX_NUM_COMPATIBLE_SUBROUTINES:
+ switch (programInterface) {
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: {
+ for (i = 0, *params = 0; i < shProg->data->NumProgramResourceList; i++) {
+ if (shProg->data->ProgramResourceList[i].Type == programInterface) {
+ struct gl_uniform_storage *uni =
+ (struct gl_uniform_storage *)
+ shProg->data->ProgramResourceList[i].Data;
+ *params = MAX2((unsigned)*params, uni->num_compatible_subroutines);
+ }
+ }
+ break;
+ }
+
+ default:
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramInterfaceiv(%s pname %s)",
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
+ }
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramInterfaceiv(pname %s)",
+ _mesa_enum_to_string(pname));
+ }
+}
+
static bool
validate_io(struct gl_program *producer, struct gl_program *consumer)
{
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shaderapi.c
^
|
@@ -767,13 +767,8 @@
*params = _mesa_longest_attribute_name_length(shProg);
return;
case GL_ACTIVE_UNIFORMS: {
- unsigned i;
- const unsigned num_uniforms =
- shProg->data->NumUniformStorage - shProg->data->NumHiddenUniforms;
- for (*params = 0, i = 0; i < num_uniforms; i++) {
- if (!shProg->data->UniformStorage[i].is_shader_storage)
- (*params)++;
- }
+ _mesa_get_program_interfaceiv(shProg, GL_UNIFORM, GL_ACTIVE_RESOURCES,
+ params);
return;
}
case GL_ACTIVE_UNIFORM_MAX_LENGTH: {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shaderapi.h
^
|
@@ -335,6 +335,11 @@
GLint *params);
extern void
+_mesa_get_program_interfaceiv(struct gl_shader_program *shProg,
+ GLenum programInterface, GLenum pname,
+ GLint *params);
+
+extern void
_mesa_create_program_resource_hash(struct gl_shader_program *shProg);
/* GL_ARB_tessellation_shader */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/main/shared.c
^
|
@@ -357,6 +357,7 @@
_mesa_HashDeleteAll(shared->DisplayList, delete_displaylist_cb, ctx);
_mesa_DeleteHashTable(shared->DisplayList);
free(shared->small_dlist_store.ptr);
+ util_idalloc_fini(&shared->small_dlist_store.free_idx);
}
if (shared->BitmapAtlas) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/state_tracker/st_context.c
^
|
@@ -555,10 +555,9 @@
f->NewDepthClamp = ST_NEW_RASTERIZER;
}
+ f->NewClipPlaneEnable = ST_NEW_RASTERIZER;
if (st->lower_ucp)
- f->NewClipPlaneEnable = ST_NEW_VS_STATE | ST_NEW_GS_STATE;
- else
- f->NewClipPlaneEnable = ST_NEW_RASTERIZER;
+ f->NewClipPlaneEnable |= ST_NEW_VS_STATE | ST_NEW_GS_STATE;
f->NewLineState = ST_NEW_RASTERIZER;
f->NewPolygonState = ST_NEW_RASTERIZER;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/state_tracker/st_format.c
^
|
@@ -110,23 +110,12 @@
}
if (st_astc_format_fallback(st, mesaFormat)) {
- const struct util_format_description *desc =
- util_format_description(mesaFormat);
-
if (_mesa_is_format_srgb(mesaFormat)) {
- if (!st->transcode_astc)
- return PIPE_FORMAT_R8G8B8A8_SRGB;
- else if (desc->block.width * desc->block.height < 32)
- return PIPE_FORMAT_DXT5_SRGBA;
- else
- return PIPE_FORMAT_DXT1_SRGBA;
+ return st->transcode_astc ? PIPE_FORMAT_DXT5_SRGBA :
+ PIPE_FORMAT_R8G8B8A8_SRGB;
} else {
- if (!st->transcode_astc)
- return PIPE_FORMAT_R8G8B8A8_UNORM;
- else if (desc->block.width * desc->block.height < 32)
- return PIPE_FORMAT_DXT5_RGBA;
- else
- return PIPE_FORMAT_DXT1_RGBA;
+ return st->transcode_astc ? PIPE_FORMAT_DXT5_RGBA :
+ PIPE_FORMAT_R8G8B8A8_UNORM;
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/mesa/vbo/vbo_save_api.c
^
|
@@ -117,6 +117,7 @@
#include "util/bitscan.h"
#include "util/u_memory.h"
#include "util/hash_table.h"
+#include "util/u_prim.h"
#include "gallium/include/pipe/p_state.h"
@@ -607,9 +608,7 @@
node->cold->max_index = end - 1;
int max_index_count = total_vert_count * 2;
-
- int size = max_index_count * sizeof(uint32_t);
- uint32_t* indices = (uint32_t*) malloc(size);
+ uint32_t* indices = (uint32_t*) malloc(max_index_count * sizeof(uint32_t));
struct _mesa_prim *merged_prims = NULL;
int idx = 0;
@@ -637,6 +636,12 @@
continue;
}
+ /* Increase indices storage if the original estimation was too small. */
+ if (idx + 3 * vertex_count > max_index_count) {
+ max_index_count = max_index_count + 3 * vertex_count;
+ indices = (uint32_t*) realloc(indices, max_index_count * sizeof(uint32_t));
+ }
+
/* Line strips may get converted to lines */
if (mode == GL_LINE_STRIP)
mode = GL_LINES;
@@ -701,6 +706,14 @@
}
}
+ /* Duplicate the last vertex for incomplete primitives */
+ unsigned min_vert = u_prim_vertex_count(mode)->min;
+ for (unsigned j = vertex_count; j < min_vert; j++) {
+ indices[idx++] = add_vertex(save, vertex_to_index,
+ original_prims[i].start + vertex_count - 1,
+ temp_vertices_buffer, &max_index);
+ }
+
if (merge_prims) {
/* Update vertex count. */
merged_prims[last_valid_prim].count += idx - start;
@@ -813,12 +826,14 @@
free(temp_vertices_buffer);
}
- /* Since we're append the indices to an existing buffer, we need to adjust the start value of each
+ /* Since we append the indices to an existing buffer, we need to adjust the start value of each
* primitive (not the indices themselves). */
- save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used;
- int indices_offset = save->current_bo_bytes_used / 4;
- for (int i = 0; i < merged_prim_count; i++) {
- merged_prims[i].start += indices_offset;
+ if (!ctx->ListState.Current.UseLoopback) {
+ save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used;
+ int indices_offset = save->current_bo_bytes_used / 4;
+ for (int i = 0; i < merged_prim_count; i++) {
+ merged_prims[i].start += indices_offset;
+ }
}
/* Then upload the indices. */
@@ -933,20 +948,16 @@
_glapi_set_dispatch(ctx->Exec);
/* _vbo_loopback_vertex_list doesn't use the index buffer, so we have to
- * use buffer_in_ram instead of current_bo which contains all vertices instead
- * of the deduplicated vertices only in the !UseLoopback case.
+ * use buffer_in_ram (which contains all vertices) instead of current_bo
+ * (which contains deduplicated vertices *when* UseLoopback is false).
*
* The problem is that the VAO offset is based on current_bo's layout,
* so we have to use a temp value.
*/
struct gl_vertex_array_object *vao = node->VAO[VP_MODE_SHADER];
GLintptr original = vao->BufferBinding[0].Offset;
- if (!ctx->ListState.Current.UseLoopback) {
- GLintptr new_offset = 0;
- /* 'start_offset' has been added to all primitives 'start', so undo it here. */
- new_offset -= start_offset * stride;
- vao->BufferBinding[0].Offset = new_offset;
- }
+ /* 'start_offset' has been added to all primitives 'start', so undo it here. */
+ vao->BufferBinding[0].Offset = -(GLintptr)(start_offset * stride);
_vbo_loopback_vertex_list(ctx, node, save->vertex_store->buffer_in_ram);
vao->BufferBinding[0].Offset = original;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/microsoft/compiler/dxil_nir.c
^
|
@@ -1386,10 +1386,10 @@
int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (sampler_idx == -1) {
/* No derefs, must be using indices */
- struct hash_entry *hash_entry = _mesa_hash_table_u64_search(data, tex->sampler_index);
+ nir_variable *bare_sampler = _mesa_hash_table_u64_search(data, tex->sampler_index);
/* Already have a bare sampler here */
- if (hash_entry)
+ if (bare_sampler)
return false;
nir_variable *typed_sampler = NULL;
@@ -1408,7 +1408,7 @@
/* Clone the typed sampler to a bare sampler and we're done */
assert(typed_sampler);
- nir_variable *bare_sampler = nir_variable_clone(typed_sampler, b->shader);
+ bare_sampler = nir_variable_clone(typed_sampler, b->shader);
bare_sampler->type = get_bare_samplers_for_type(typed_sampler->type);
nir_shader_add_variable(b->shader, bare_sampler);
_mesa_hash_table_u64_insert(data, tex->sampler_index, bare_sampler);
@@ -1428,11 +1428,8 @@
return false;
}
- struct hash_entry *hash_entry = _mesa_hash_table_u64_search(data, old_var->data.binding);
- nir_variable *new_var;
- if (hash_entry) {
- new_var = hash_entry->data;
- } else {
+ nir_variable *new_var = _mesa_hash_table_u64_search(data, old_var->data.binding);
+ if (!new_var) {
new_var = nir_variable_clone(old_var, b->shader);
new_var->type = get_bare_samplers_for_type(old_var->type);
nir_shader_add_variable(b->shader, new_var);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/microsoft/compiler/meson.build
^
|
@@ -41,7 +41,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
libdxil_compiler = static_library(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/microsoft/compiler/nir_to_dxil.c
^
|
@@ -2112,6 +2112,12 @@
case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
+ case nir_op_inot: {
+ unsigned bit_size = alu->dest.dest.ssa.bit_size;
+ intmax_t val = bit_size == 1 ? 1 : -1;
+ const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
+ return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
+ }
case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bi_lower_swizzle.c
^
|
@@ -50,6 +50,19 @@
* derivatives, which might require swizzle lowering */
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_V6_I32:
+
+ /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
+ * boolean is implemented as a 16-bit integer, the swizzle is needed
+ * for correct operation if the instruction producing the 16-bit
+ * boolean does not replicate to both halves of the containing 32-bit
+ * register. As such, we may need to lower a swizzle.
+ *
+ * This is a silly hack. Ideally, code gen would be smart enough to
+ * avoid this case (by replicating). In practice, silly hardware design
+ * decisions force our hand here.
+ */
+ case BI_OPCODE_MUX_I32:
+ case BI_OPCODE_CSEL_I32:
break;
case BI_OPCODE_IADD_V2S16:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bi_opt_mod_props.c
^
|
@@ -25,6 +25,18 @@
#include "compiler.h"
#include "bi_builder.h"
+/*
+ * Due to a Bifrost encoding restriction, some instructions cannot have an abs
+ * modifier on both sources. Check if adding a fabs modifier to a given source
+ * of a binary instruction would cause this restriction to be hit.
+ */
+static bool
+bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
+{
+ return (arch <= 8) && I->src[1 - s].abs &&
+ bi_is_word_equiv(I->src[1 - s], repl);
+}
+
static bool
bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
{
@@ -32,9 +44,15 @@
case BI_OPCODE_FCMP_V2F16:
case BI_OPCODE_FMAX_V2F16:
case BI_OPCODE_FMIN_V2F16:
- /* Bifrost encoding restriction: can't have both abs if equal sources */
- return !(arch <= 8 && I->src[1 - s].abs
- && bi_is_word_equiv(I->src[1 - s], repl));
+ return !bi_would_impact_abs(arch, I, repl, s);
+ case BI_OPCODE_FADD_V2F16:
+ /*
+ * For FADD.v2f16, the FMA pipe has the abs encoding hazard,
+ * while the FADD pipe cannot encode a clamp. Either case in
+ * isolation can be worked around in the scheduler, but both
+ * together is impossible to encode. Avoid the hazard.
+ */
+ return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
case BI_OPCODE_V2F32_TO_V2F16:
/* TODO: Needs both match or lower */
return false;
@@ -182,6 +200,10 @@
case BI_OPCODE_FMA_RSCALE_V2F16:
case BI_OPCODE_FADD_RSCALE_F32:
return false;
+ case BI_OPCODE_FADD_V2F16:
+ /* Encoding restriction */
+ return !(I->src[0].abs && I->src[1].abs &&
+ bi_is_word_equiv(I->src[0], I->src[1]));
default:
return bi_opcode_props[I->op].clamp;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bi_schedule.c
^
|
@@ -197,7 +197,7 @@
}
static void
-bi_create_dependency_graph(struct bi_worklist st, bool inorder)
+bi_create_dependency_graph(struct bi_worklist st, bool inorder, bool is_blend)
{
struct util_dynarray last_read[64], last_write[64];
@@ -262,6 +262,17 @@
}
}
+ /* Blend shaders are allowed to clobber R0-R15. Treat these
+ * registers like extra destinations for scheduling purposes.
+ */
+ if (ins->op == BI_OPCODE_BLEND && !is_blend) {
+ for (unsigned c = 0; c < 16; ++c) {
+ add_dependency(last_read, c, i, st.dependents, st.dep_counts);
+ add_dependency(last_write, c, i, st.dependents, st.dep_counts);
+ mark_access(last_write, c, i);
+ }
+ }
+
bi_foreach_src(ins, s) {
if (ins->src[s].type != BI_INDEX_REGISTER) continue;
@@ -414,7 +425,7 @@
*/
static struct bi_worklist
-bi_initialize_worklist(bi_block *block, bool inorder)
+bi_initialize_worklist(bi_block *block, bool inorder, bool is_blend)
{
struct bi_worklist st = { };
st.instructions = bi_flatten_block(block, &st.count);
@@ -425,7 +436,7 @@
st.dependents = calloc(st.count, sizeof(st.dependents[0]));
st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0]));
- bi_create_dependency_graph(st, inorder);
+ bi_create_dependency_graph(st, inorder, is_blend);
st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
for (unsigned i = 0; i < st.count; ++i) {
@@ -479,6 +490,18 @@
ins->src[1].swizzle == BI_SWIZZLE_H01);
}
+/*
+ * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs
+ * encodings are permitted by swapping operands; however, this scheme fails if
+ * both operands are equal. Test for this case.
+ */
+static bool
+bi_impacted_abs(bi_instr *I)
+{
+ return I->src[0].abs && I->src[1].abs &&
+ bi_is_word_equiv(I->src[0], I->src[1]);
+}
+
bool
bi_can_fma(bi_instr *ins)
{
@@ -486,6 +509,10 @@
if (bi_can_iaddc(ins))
return true;
+ /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */
+ if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins))
+ return false;
+
/* TODO: some additional fp16 constraints */
return bi_opcode_props[ins->op].fma;
}
@@ -972,16 +999,21 @@
* same clause (most likely they will not), so if a later instruction
* in the clause accesses the destination, the message-passing
* instruction can't be scheduled */
- if (bi_opcode_props[instr->op].sr_write && !bi_is_null(instr->dest[0])) {
- unsigned nr = bi_count_write_registers(instr, 0);
- assert(instr->dest[0].type == BI_INDEX_REGISTER);
- unsigned reg = instr->dest[0].value;
+ if (bi_opcode_props[instr->op].sr_write) {
+ bi_foreach_dest(instr, d) {
+ if (bi_is_null(instr->dest[d]))
+ continue;
- for (unsigned i = 0; i < clause->access_count; ++i) {
- bi_index idx = clause->accesses[i];
- for (unsigned d = 0; d < nr; ++d) {
- if (bi_is_equiv(bi_register(reg + d), idx))
- return false;
+ unsigned nr = bi_count_write_registers(instr, d);
+ assert(instr->dest[d].type == BI_INDEX_REGISTER);
+ unsigned reg = instr->dest[d].value;
+
+ for (unsigned i = 0; i < clause->access_count; ++i) {
+ bi_index idx = clause->accesses[i];
+ for (unsigned d = 0; d < nr; ++d) {
+ if (bi_is_equiv(bi_register(reg + d), idx))
+ return false;
+ }
}
}
}
@@ -1796,7 +1828,8 @@
/* Copy list to dynamic array */
struct bi_worklist st = bi_initialize_worklist(block,
- bifrost_debug & BIFROST_DBG_INORDER);
+ bifrost_debug & BIFROST_DBG_INORDER,
+ ctx->inputs->is_blend);
if (!st.count) {
bi_free_worklist(st);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/bifrost_compile.c
^
|
@@ -1420,7 +1420,9 @@
uint32_t acc = 0;
for (unsigned i = 0; i < instr->def.num_components; ++i) {
- unsigned v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size);
+ uint32_t v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size);
+
+ v = bi_extend_constant(v, instr->def.bit_size);
acc |= (v << (i * instr->def.bit_size));
}
@@ -2615,6 +2617,7 @@
for (unsigned i = 0; i < instr->num_srcs; ++i) {
bi_index index = bi_src_index(&instr->src[i].src);
unsigned sz = nir_src_bit_size(instr->src[i].src);
+ unsigned components = nir_src_num_components(instr->src[i].src);
ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
nir_alu_type T = base | sz;
@@ -2623,27 +2626,25 @@
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
cx = bi_emit_texc_cube_coord(b, index, &cy);
} else {
- unsigned components = nir_src_num_components(instr->src[i].src);
-
/* Copy XY (for 2D+) or XX (for 1D) */
cx = index;
cy = bi_word(index, MIN2(1, components - 1));
assert(components >= 1 && components <= 3);
- if (components < 3) {
- /* nothing to do */
- } else if (desc.array) {
- /* 2D array */
- dregs[BIFROST_TEX_DREG_ARRAY] =
- bi_emit_texc_array_index(b,
- bi_word(index, 2), T);
- } else {
+ if (components == 3 && !desc.array) {
/* 3D */
dregs[BIFROST_TEX_DREG_Z_COORD] =
bi_word(index, 2);
}
}
+
+ if (desc.array) {
+ dregs[BIFROST_TEX_DREG_ARRAY] =
+ bi_emit_texc_array_index(b,
+ bi_word(index, components - 1), T);
+ }
+
break;
case nir_tex_src_lod:
@@ -3832,7 +3833,7 @@
/* TODO: pack flat */
}
- info->ubo_mask = ctx->ubo_mask & BITSET_MASK(ctx->nir->info.num_ubos);
+ info->ubo_mask = ctx->ubo_mask & ((1 << ctx->nir->info.num_ubos) - 1);
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
disassemble_bifrost(stdout, binary->data, binary->size,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/compiler.h
^
|
@@ -701,6 +701,19 @@
return bi_get_index(ctx->reg_alloc++, true, 0);
}
+/* NIR booleans are 1-bit (0/1). For now, backend IR booleans are N-bit
+ * (0/~0) where N depends on the context. This requires us to sign-extend
+ * when converting constants from NIR to the backend IR.
+ */
+static inline uint32_t
+bi_extend_constant(uint32_t constant, unsigned bit_size)
+{
+ if (bit_size == 1 && constant != 0)
+ return ~0;
+ else
+ return constant;
+}
+
/* Inline constants automatically, will be lowered out by bi_lower_fau where a
* constant is not allowed. load_const_to_scalar gaurantees that this makes
* sense */
@@ -708,11 +721,13 @@
static inline bi_index
bi_src_index(nir_src *src)
{
- if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32)
- return bi_imm_u32(nir_src_as_uint(*src));
- else if (src->is_ssa)
+ if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
+ uint32_t v = nir_src_as_uint(*src);
+
+ return bi_imm_u32(bi_extend_constant(v, nir_src_bit_size(*src)));
+ } else if (src->is_ssa) {
return bi_get_index(src->ssa->index, false, 0);
- else {
+ } else {
assert(!src->reg.indirect);
return bi_get_index(src->reg.reg->index, true, 0);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/gen_disasm.py
^
|
@@ -238,7 +238,7 @@
key_set = find_context_keys(desc, test)
ordered = 'ordering' in key_set
key_set.discard('ordering')
- keys = list(key_set)
+ keys = sorted(list(key_set))
# Evaluate the deriveds for every possible state, forming a (state -> deriveds) map
testf = compile_derived(test, keys)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/meson.build
^
|
@@ -116,7 +116,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
libpanfrost_bifrost_disasm = static_library(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/bifrost/valhall/disassemble.h
^
|
@@ -13,7 +13,6 @@
#define MASK(count) ((1ull << (count)) - 1)
#define SEXT(b, count) ((b ^ BIT(count - 1)) - BIT(count - 1))
#define UNUSED __attribute__((unused))
-static inline float fui(uint32_t u) { float f; memcpy(&f, &u, 4); return f; }
#define VA_SRC_UNIFORM_TYPE 0x2
#define VA_SRC_IMM_TYPE 0x3
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/ci/panfrost-g52-fails.txt
^
|
@@ -21,7 +21,6 @@
shaders@glsl-bug-110796,Fail
shaders@glsl-uniform-interstage-limits@subdivide 5,Crash
shaders@glsl-uniform-interstage-limits@subdivide 5- statechanges,Crash
-shaders@glsl-vs-if-bool,Fail
shaders@point-vertex-id divisor,Fail
shaders@point-vertex-id gl_instanceid divisor,Fail
shaders@point-vertex-id gl_instanceid,Fail
@@ -71,56 +70,23 @@
spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-copypixels,Fail
spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-drawpixels,Fail
spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-readpixels,Fail
-spec@arb_depth_buffer_float@texwrap formats bordercolor,Fail
-spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH32F_STENCIL8- border color only,Fail
-spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH_COMPONENT32F- border color only,Fail
-spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled,Fail
-spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH32F_STENCIL8- swizzled- border color only,Fail
-spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32F- swizzled- border color only,Fail
spec@arb_depth_clamp@depth-clamp-range,Fail
-spec@arb_depth_texture@texwrap formats bordercolor,Fail
-spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT16- border color only,Fail
-spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT24- border color only,Fail
-spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT32- border color only,Fail
-spec@arb_depth_texture@texwrap formats bordercolor-swizzled,Fail
-spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT16- swizzled- border color only,Fail
-spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT24- swizzled- border color only,Fail
-spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32- swizzled- border color only,Fail
spec@arb_direct_state_access@gettextureimage-formats,Crash
spec@arb_direct_state_access@gettextureimage-formats init-by-rendering,Fail
-spec@arb_direct_state_access@texture-buffer,Fail
spec@arb_draw_buffers@fbo-mrt-new-bind,Crash
spec@arb_es2_compatibility@fbo-blending-formats,Fail
spec@arb_es2_compatibility@fbo-blending-formats@GL_RGB565,Fail
-spec@arb_es2_compatibility@texwrap formats bordercolor,Fail
-spec@arb_es2_compatibility@texwrap formats bordercolor@GL_RGB565- border color only,Fail
-spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail
-spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail
-spec@arb_fragment_program@fp-fragment-position,Crash
-spec@arb_fragment_program@sparse-samplers,Crash
spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit depth gl_depth32f_stencil8,Fail
spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit depth_stencil gl_depth32f_stencil8,Fail
spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_depth32f_stencil8,Fail
spec@arb_framebuffer_object@fbo-luminance-alpha,Fail
spec@arb_framebuffer_srgb@fbo-fast-clear,Fail
spec@arb_get_program_binary@restore-sso-program,Fail
-spec@arb_get_texture_sub_image@arb_get_texture_sub_image-getcompressed,Fail
-spec@arb_get_texture_sub_image@arb_get_texture_sub_image-get,Fail
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_DEPTH_STENCIL,Fail
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_STENCIL_INDEX,Fail
-spec@arb_pixel_buffer_object@texsubimage array pbo,Fail
-spec@arb_pixel_buffer_object@texsubimage cube_map_array pbo,Fail
-spec@arb_pixel_buffer_object@texsubimage pbo,Fail
spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail
spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
-spec@arb_provoking_vertex@arb-provoking-vertex-render,Fail
-spec@arb_sample_shading@builtin-gl-sample-id 0,Fail
-spec@arb_sample_shading@builtin-gl-sample-id 2,Fail
-spec@arb_sample_shading@builtin-gl-sample-id 4,Fail
-spec@arb_sample_shading@builtin-gl-sample-mask 0,Fail
-spec@arb_sample_shading@builtin-gl-sample-mask 2,Fail
-spec@arb_sample_shading@builtin-gl-sample-mask 4,Fail
spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail
spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
@@ -145,7 +111,6 @@
spec@arb_sample_shading@samplemask 4,Fail
spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
-spec@arb_shader_atomic_counters@respecify-buffer,Fail
spec@arb_shader_draw_parameters@drawid-indirect-baseinstance,Fail
spec@arb_shader_draw_parameters@drawid-indirect-basevertex,Fail
spec@arb_shader_draw_parameters@drawid-indirect,Fail
@@ -157,25 +122,9 @@
spec@arb_shader_texture_lod@execution@tex-miplevel-selection *projgradarb 2drect_projvec4,Crash
spec@arb_shader_texture_lod@execution@tex-miplevel-selection *projgradarb 2drectshadow,Crash
spec@arb_shading_language_420pack@active sampler conflict,Crash
-spec@arb_texture_buffer_object@data-sync,Fail
spec@arb_texture_buffer_object@formats (fs- arb),Crash
spec@arb_texture_buffer_object@formats (vs- arb),Crash
spec@arb_texture_buffer_object@render-no-bo,Crash
-spec@arb_texture_buffer_object@subdata-sync,Fail
-spec@arb_texture_compression@texwrap formats bordercolor,Fail
-spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_ALPHA- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_LUMINANCE_ALPHA- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_RGBA- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor@GL_COMPRESSED_RGB- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor-swizzled,Fail
-spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_ALPHA- swizzled- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_LUMINANCE_ALPHA- swizzled- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA- swizzled- border color only,Fail
-spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB- swizzled- border color only,Fail
-spec@arb_texture_cube_map_array@arb_texture_cube_map_array-cubemap,Fail
-spec@arb_texture_cube_map_array@arb_texture_cube_map_array-cubemap-lod,Fail
-spec@arb_texture_cube_map_array@arb_texture_cube_map_array-fbo-cubemap-array,Fail
-spec@arb_texture_cube_map_array@texsubimage cube_map_array,Fail
spec@arb_texture_cube_map_array@texturesize@fs-texturesize-isamplercubearray,Fail
spec@arb_texture_cube_map_array@texturesize@fs-texturesize-samplercubearray,Fail
spec@arb_texture_cube_map_array@texturesize@fs-texturesize-samplercubearrayshadow,Fail
@@ -194,144 +143,6 @@
spec@arb_texture_float@fbo-generatemipmap-formats@GL_INTENSITY16F_ARB NPOT,Fail
spec@arb_texture_float@multisample-formats 2 gl_arb_texture_float,Crash
spec@arb_texture_float@multisample-formats 4 gl_arb_texture_float,Crash
-spec@arb_texture_float@texwrap formats bordercolor,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA16F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA32F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_INTENSITY16F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_INTENSITY32F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE16F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE32F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE_ALPHA16F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE_ALPHA32F_ARB- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_RGB16F- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_RGB32F- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_RGBA16F- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor@GL_RGBA32F- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA16F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA32F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_INTENSITY16F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_INTENSITY32F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE16F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE32F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE_ALPHA16F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE_ALPHA32F_ARB- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB16F- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB32F- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA16F- swizzled- border color only,Fail
-spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA32F- swizzled- border color only,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-alpha-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-alpha-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-alpha-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-alpha-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-blue-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-blue-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-blue-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-blue-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-green-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-green-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-green-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-green-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-none-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-none-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-none-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-none-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-red-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-red-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-red-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgba-red-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-blue-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-blue-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-blue-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-blue-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-green-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-green-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-green-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-green-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-none-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-none-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-none-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-none-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-red-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-red-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-red-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rgb-red-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-green-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-green-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-green-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-green-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-none-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-none-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-none-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-none-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-red-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-red-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-red-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-rg-red-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-none-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-none-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-none-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-none-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-red-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-red-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-red-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@fs-r-red-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-alpha-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-alpha-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-alpha-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-alpha-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-blue-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-blue-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-blue-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-blue-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-green-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-green-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-green-uint-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-green-unorm-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-none-float-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-none-int-cubearray,Fail
-spec@arb_texture_gather@texturegather@vs-rgba-none-uint-cubearray,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/ci/traces-panfrost.yml
^
|
@@ -5,7 +5,7 @@
- path: behdad-glyphy/glyphy.trace
expectations:
- device: gl-panfrost-t860
- checksum: b6cd8d92987530edcfc36a933c9b07f6
+ checksum: 22bf5262745fd47c5c5eadb93d7cc420
- path: glmark2/desktop:windows=4:effect=blur:blur-radius=5:passes=1:separable=true.trace
expectations:
- device: gl-panfrost-t860
@@ -158,7 +158,7 @@
- path: glmark2/refract.trace
expectations:
- device: gl-panfrost-t860
- checksum: e520a0071fd940be1401aea2bec97709
+ checksum: 6557deca1a47a7a77723658ea579ac63
- path: glmark2/shading:shading=blinn-phong-inf.trace
expectations:
- device: gl-panfrost-t860
@@ -209,11 +209,11 @@
- path: gputest/plot3d.trace
expectations:
- device: gl-panfrost-t860
- checksum: e73715f3b6a4f1609eaf5432af03714e
+ checksum: a34223830866a42747db199b04c5e1be
- path: humus/AmbientAperture.trace
expectations:
- device: gl-panfrost-t860
- checksum: b0d4a64e0907f817161b2a0e85af7a9a
+ checksum: e4c0b930ef99f14305e1ade7f1779c09
- path: humus/CelShading.trace
expectations:
- device: gl-panfrost-t860
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/lib/genxml/v6.xml
^
|
@@ -835,7 +835,7 @@
<field name="Alpha reference" size="32" start="12:0" type="float"/>
<field name="Thread Balancing" size="16" start="13:0" type="uint"/>
<field name="Secondary preload" size="32" start="13:0" type="Preload"/>
- <field name="Secondary shader" size="64" start="13:0" type="address"/>
+ <field name="Secondary shader" size="64" start="14:0" type="address"/>
</struct>
<struct name="Uniform Buffer" align="8">
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/lib/genxml/v7.xml
^
|
@@ -881,7 +881,7 @@
<field name="Alpha reference" size="32" start="12:0" type="float"/>
<field name="Thread Balancing" size="16" start="13:0" type="uint"/>
<field name="Secondary preload" size="32" start="13:0" type="Preload"/>
- <field name="Secondary shader" size="64" start="13:0" type="address"/>
+ <field name="Secondary shader" size="64" start="14:0" type="address"/>
<field name="Message Preload 1" size="16" start="15:0" type="uint"/>
<field name="Message Preload 2" size="16" start="15:16" type="uint"/>
</struct>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/lib/pan_indirect_draw.c
^
|
@@ -1131,7 +1131,6 @@
pan_pack(state, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader_info, address, &cfg);
}
- pthread_mutex_unlock(&dev->indirect_draw_shaders.lock);
draw_shader->push = shader_info.push;
draw_shader->rsd = dev->indirect_draw_shaders.states->ptr.gpu +
@@ -1171,15 +1170,15 @@
const struct indirect_draw_inputs *inputs)
{
struct panfrost_ptr inputs_buf =
- pan_pool_alloc_aligned(pool, sizeof(inputs), 16);
+ pan_pool_alloc_aligned(pool, sizeof(*inputs), 16);
- memcpy(inputs_buf.cpu, &inputs, sizeof(inputs));
+ memcpy(inputs_buf.cpu, inputs, sizeof(*inputs));
struct panfrost_ptr ubos_buf =
pan_pool_alloc_desc(pool, UNIFORM_BUFFER);
pan_pack(ubos_buf.cpu, UNIFORM_BUFFER, cfg) {
- cfg.entries = DIV_ROUND_UP(sizeof(inputs), 16);
+ cfg.entries = DIV_ROUND_UP(sizeof(*inputs), 16);
cfg.pointer = inputs_buf.gpu;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/midgard/meson.build
^
|
@@ -52,7 +52,7 @@
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
libpanfrost_midgard_disasm = static_library(
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/midgard/midgard_compile.c
^
|
@@ -3231,7 +3231,7 @@
/* Report the very first tag executed */
info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
- info->ubo_mask = ctx->ubo_mask & BITSET_MASK(ctx->nir->info.num_ubos);
+ info->ubo_mask = ctx->ubo_mask & ((1 << ctx->nir->info.num_ubos) - 1);
if ((midgard_debug & MIDGARD_DBG_SHADERS) &&
((midgard_debug & MIDGARD_DBG_INTERNAL) || !nir->info.internal)) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/shared/pan_tiling.c
^
|
@@ -250,6 +250,12 @@
TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \
}
+/*
+ * Perform a generic access to a tiled image with a given format. This works
+ * even for block-compressed images on entire blocks at a time. sx/sy/w/h are
+ * specified in pixels, not blocks, but our internal routines work in blocks,
+ * so we divide here. Alignment is assumed.
+ */
static void
panfrost_access_tiled_image_generic(void *dst, void *src,
unsigned sx, unsigned sy,
@@ -261,10 +267,13 @@
{
unsigned bpp = desc->block.bits;
- if (desc->block.width > 1) {
- w = DIV_ROUND_UP(w, desc->block.width);
- h = DIV_ROUND_UP(h, desc->block.height);
+ /* Convert units */
+ sx /= desc->block.width;
+ sy /= desc->block.height;
+ w = DIV_ROUND_UP(w, desc->block.width);
+ h = DIV_ROUND_UP(h, desc->block.height);
+ if (desc->block.width > 1) {
if (_is_store)
TILED_UNALIGNED_TYPES(true, 2)
else
@@ -371,6 +380,11 @@
panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
}
+/**
+ * Access a tiled image (load or store). Note: the region of interest (x, y, w,
+ * h) is specified in pixels, not blocks. It is expected that these quantities
+ * are aligned to the block size.
+ */
void
panfrost_store_tiled_image(void *dst, const void *src,
unsigned x, unsigned y,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/util/pan_ir.h
^
|
@@ -112,11 +112,17 @@
unsigned sysval_count;
};
-/* Technically Midgard could go up to 92 in a pathological case but we don't
- * take advantage of that. Likewise Bifrost's FAU encoding can address 128
- * words but actual implementations (G72, G76) are capped at 64 */
-
-#define PAN_MAX_PUSH 64
+/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each.
+ * In practice, the maximum number of FAU slots is limited by implementation.
+ * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the
+ * maximum number of 32-bit words is 128, since there are 2 words per FAU slot.
+ *
+ * Midgard can push at most 92 words, so this bound suffices. The Midgard
+ * compiler pushes less than this, as Midgard uses register-mapped uniforms
+ * instead of FAU, preventing large numbers of uniforms to be pushed for
+ * nontrivial programs.
+ */
+#define PAN_MAX_PUSH 128
/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
* an offset to a word must be < 2^16. There are less than 2^8 UBOs */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_device.c
^
|
@@ -157,7 +157,7 @@
/* Nothing to do yet */
break;
case PANVK_EVENT_OP_WAIT:
- in_fences[*nr_in_fences++] = op->event->syncobj;
+ in_fences[(*nr_in_fences)++] = op->event->syncobj;
break;
default:
unreachable("bad panvk_event_op type\n");
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_meta_clear.c
^
|
@@ -70,8 +70,7 @@
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
/* Make sure UBO words have been upgraded to push constants */
- assert(shader_info->ubo_count == 1);
- assert(shader_info->push.count == 4);
+ assert(shader_info->ubo_mask == 0);
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
@@ -138,8 +137,7 @@
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
/* Make sure UBO words have been upgraded to push constants */
- assert(shader_info->ubo_count == 1);
- assert(shader_info->push.count == 2);
+ assert(shader_info->ubo_mask == 0);
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/00-mesa-defaults.conf
^
|
@@ -293,6 +293,11 @@
<option name="force_integer_tex_nearest" value="true" />
</application>
+ <application name="DiRT Rally" executable="DirtRally">
+ <!-- https://gitlab.freedesktop.org/mesa/mesa/-/issues/5648 -->
+ <option name="vs_position_always_invariant" value="true" />
+ </application>
+
<!-- Workarounds for SPECviewperf relying on invalid / non-conformant
OpenGL behavior. Older SPECviewperf versions might also need this.
-->
@@ -793,92 +798,6 @@
<option name="vs_position_always_invariant" value="true" />
</application>
</device>
- <device driver="radv">
- <!-- Engine workarounds -->
- <engine engine_name_match="vkd3d">
- <option name="radv_zero_vram" value="true" />
- </engine>
-
- <engine engine_name_match="Quantic Dream Engine">
- <option name="radv_zero_vram" value="true" />
- <option name="radv_lower_discard_to_demote" value="true" />
- <option name="radv_disable_tc_compat_htile_general" value="true" />
- </engine>
-
- <!-- Game workarounds -->
- <application name="Shadow Of The Tomb Raider (Native)" application_name_match="ShadowOfTheTomb">
- <option name="radv_report_llvm9_version_string" value="true" />
- <option name="radv_invariant_geom" value="true" />
- </application>
-
- <application name="Shadow Of The Tomb Raider (DX11/DX12)" application_name_match="SOTTR.exe">
- <option name="radv_invariant_geom" value="true" />
- </application>
-
- <application name="RAGE 2" executable="RAGE2.exe">
- <option name="radv_enable_mrt_output_nan_fixup" value="true" />
- </application>
-
- <application name="Path of Exile (64-bit, Steam)" executable="PathOfExile_x64Steam.exe">
- <option name="radv_no_dynamic_bounds" value="true" />
- <option name="radv_absolute_depth_bias" value="true" />
- </application>
- <application name="Path of Exile (32-bit, Steam)" executable="PathOfExileSteam.exe">
- <option name="radv_no_dynamic_bounds" value="true" />
- <option name="radv_absolute_depth_bias" value="true" />
- </application>
- <application name="Path of Exile (64-bit)" executable="PathOfExile_x64.exe">
- <option name="radv_no_dynamic_bounds" value="true" />
- <option name="radv_absolute_depth_bias" value="true" />
- </application>
- <application name="Path of Exile (32-bit)" executable="PathOfExile.exe">
- <option name="radv_no_dynamic_bounds" value="true" />
- <option name="radv_absolute_depth_bias" value="true" />
- </application>
-
- <application name="The Surge 2" application_name_match="Fledge">
- <option name="radv_disable_shrink_image_store" value="true" />
- <option name="radv_zero_vram" value="true" />
- </application>
-
- <application name="World War Z (and World War Z: Aftermath)" application_name_match="WWZ|wwz">
- <option name="radv_override_uniform_offset_alignment" value="16" />
- <option name="radv_disable_shrink_image_store" value="true" />
- <option name="radv_invariant_geom" value="true" />
- </application>
-
- <application name="DOOM VFR" application_name_match="DOOM_VFR">
- <option name="radv_no_dynamic_bounds" value="true" />
- </application>
-
- <application name="DOOM Eternal" application_name_match="DOOMEternal">
- <option name="radv_zero_vram" value="true" />
- </application>
-
- <application name="No Man's Sky" application_name_match="No Man's Sky">
- <option name="radv_lower_discard_to_demote" value="true" />
- </application>
-
- <application name="Monster Hunter World" application_name_match="MonsterHunterWorld.exe">
- <option name="radv_invariant_geom" value="true" />
- </application>
-
- <application name="DOOM (2016)" application_name_match="DOOM$">
- <option name="radv_disable_dcc" value="true" />
- </application>
-
- <application name="Wolfenstein II" application_name_match="Wolfenstein II The New Colossus">
- <option name="radv_disable_dcc" value="true" />
- </application>
-
- <application name="RDR2" application_name_match="Red Dead Redemption 2">
- <option name="radv_report_apu_as_dgpu" value="true" />
- </application>
-
- <application name="Resident Evil Village" application_name_match="re8.exe">
- <option name="radv_invariant_geom" value="true" />
- </application>
- </device>
<!--
The android game hall of shame:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/bitset.h
^
|
@@ -208,7 +208,7 @@
static inline void
__bitset_set_range(BITSET_WORD *r, unsigned start, unsigned end)
{
- const unsigned size = end - start;
+ const unsigned size = end - start + 1;
const unsigned start_mod = start % BITSET_WORDBITS;
if (start_mod + size <= BITSET_WORDBITS) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/driconf.h
^
|
@@ -520,4 +520,8 @@
DRI_CONF_OPT_B(radv_report_apu_as_dgpu, def, \
"Report APUs as discrete GPUs instead of integrated GPUs")
+#define DRI_CONF_RADV_DISABLE_HTILE_LAYERS(def) \
+ DRI_CONF_OPT_B(radv_disable_htile_layers, def, \
+ "Disable HTILE for layered depth/stencil formats")
+
#endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/driconf_static.py
^
|
@@ -46,6 +46,7 @@
self.cname = cname('application')
self.name = xml.attrib['name']
self.executable = xml.attrib.get('executable', None)
+ self.executable_regexp = xml.attrib.get('executable_regexp', None)
self.sha1 = xml.attrib.get('sha1', None)
self.application_name_match = xml.attrib.get('application_name_match', None)
self.application_versions = xml.attrib.get('application_versions', None)
@@ -118,6 +119,7 @@
struct driconf_application {
const char *name;
const char *executable;
+ const char *executable_regexp;
const char *sha1;
const char *application_name_match;
const char *application_versions;
@@ -179,6 +181,9 @@
% if application.executable:
.executable = "${application.executable}",
% endif
+% if application.executable_regexp:
+ .executable_regexp = "${application.executable_regexp}",
+% endif
% if application.sha1:
.sha1 = "${application.sha1}",
% endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/fossilize_db.c
^
|
@@ -318,8 +318,12 @@
free(filename);
free(idx_filename);
- if (!check_files_opened_successfully(foz_db->file[file_idx], db_idx))
+ if (!check_files_opened_successfully(foz_db->file[file_idx], db_idx)) {
+ /* Prevent foz_destroy from destroying it a second time. */
+ foz_db->file[file_idx] = NULL;
+
continue; /* Ignore invalid user provided filename and continue */
+ }
if (!load_foz_dbs(foz_db, db_idx, file_idx, true)) {
fclose(db_idx);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/perf/u_trace.c
^
|
@@ -24,16 +24,17 @@
#include <inttypes.h>
#include "util/list.h"
-#include "util/ralloc.h"
#include "util/u_debug.h"
#include "util/u_inlines.h"
#include "util/u_fifo.h"
+#include "util/u_vector.h"
#include "u_trace.h"
#define __NEEDS_TRACE_PRIV
#include "u_trace_priv.h"
+#define PAYLOAD_BUFFER_SIZE 0x100
#define TIMESTAMP_BUF_SIZE 0x1000
#define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t))
@@ -49,6 +50,14 @@
struct list_head ctx_list = { &ctx_list, &ctx_list };
#endif
+struct u_trace_payload_buf {
+ uint32_t refcount;
+
+ uint8_t *buf;
+ uint8_t *next;
+ uint8_t *end;
+};
+
struct u_trace_event {
const struct u_tracepoint *tp;
const void *payload;
@@ -76,12 +85,12 @@
*/
void *timestamps;
- /**
- * For trace payload, we sub-allocate from ralloc'd buffers which
- * hang off of the chunk's ralloc context, so they are automatically
- * free'd when the chunk is free'd
+ /* Array of u_trace_payload_buf referenced by traces[] elements.
*/
- uint8_t *payload_buf, *payload_end;
+ struct u_vector payloads;
+
+ /* Current payload buffer being written. */
+ struct u_trace_payload_buf *payload;
struct util_queue_fence fence;
@@ -97,6 +106,35 @@
bool free_flush_data;
};
+static struct u_trace_payload_buf *
+u_trace_payload_buf_create(void)
+{
+ struct u_trace_payload_buf *payload =
+ malloc(sizeof(*payload) + PAYLOAD_BUFFER_SIZE);
+
+ p_atomic_set(&payload->refcount, 1);
+
+ payload->buf = (uint8_t *) (payload + 1);
+ payload->end = payload->buf + PAYLOAD_BUFFER_SIZE;
+ payload->next = payload->buf;
+
+ return payload;
+}
+
+static struct u_trace_payload_buf *
+u_trace_payload_buf_ref(struct u_trace_payload_buf *payload)
+{
+ p_atomic_inc(&payload->refcount);
+ return payload;
+}
+
+static void
+u_trace_payload_buf_unref(struct u_trace_payload_buf *payload)
+{
+ if (p_atomic_dec_zero(&payload->refcount))
+ free(payload);
+}
+
static void
free_chunk(void *ptr)
{
@@ -104,7 +142,14 @@
chunk->utctx->delete_timestamp_buffer(chunk->utctx, chunk->timestamps);
+ /* Unref payloads attached to this chunk. */
+ struct u_trace_payload_buf **payload;
+ u_vector_foreach(payload, &chunk->payloads)
+ u_trace_payload_buf_unref(*payload);
+ u_vector_finish(&chunk->payloads);
+
list_del(&chunk->node);
+ free(chunk);
}
static void
@@ -113,21 +158,41 @@
while (!list_is_empty(chunks)) {
struct u_trace_chunk *chunk = list_first_entry(chunks,
struct u_trace_chunk, node);
- ralloc_free(chunk);
+ free_chunk(chunk);
}
}
static struct u_trace_chunk *
-get_chunk(struct u_trace *ut)
+get_chunk(struct u_trace *ut, size_t payload_size)
{
struct u_trace_chunk *chunk;
+ assert(payload_size <= PAYLOAD_BUFFER_SIZE);
+
/* do we currently have a non-full chunk to append msgs to? */
if (!list_is_empty(&ut->trace_chunks)) {
chunk = list_last_entry(&ut->trace_chunks,
struct u_trace_chunk, node);
- if (chunk->num_traces < TRACES_PER_CHUNK)
- return chunk;
+ /* Can we store a new trace in the chunk? */
+ if (chunk->num_traces < TRACES_PER_CHUNK) {
+ /* If no payload required, nothing else to check. */
+ if (payload_size <= 0)
+ return chunk;
+
+ /* If the payload buffer has space for the payload, we're good.
+ */
+ if (chunk->payload &&
+ (chunk->payload->end - chunk->payload->next) >= payload_size)
+ return chunk;
+
+ /* If we don't have enough space in the payload buffer, can we
+ * allocate a new one?
+ */
+ struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads);
+ *buf = u_trace_payload_buf_create();
+ chunk->payload = *buf;
+ return chunk;
+ }
/* we need to expand to add another chunk to the batch, so
* the current one is no longer the last one of the batch:
*/
@@ -135,12 +200,17 @@
}
/* .. if not, then create a new one: */
- chunk = rzalloc_size(NULL, sizeof(*chunk));
- ralloc_set_destructor(chunk, free_chunk);
+ chunk = calloc(1, sizeof(*chunk));
chunk->utctx = ut->utctx;
chunk->timestamps = ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE);
chunk->last = true;
+ u_vector_init(&chunk->payloads, 4, sizeof(struct u_trace_payload_buf *));
+ if (payload_size > 0) {
+ struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads);
+ *buf = u_trace_payload_buf_create();
+ chunk->payload = *buf;
+ }
list_addtail(&chunk->node, &ut->trace_chunks);
@@ -319,7 +389,7 @@
static void
cleanup_chunk(void *job, void *gdata, int thread_index)
{
- ralloc_free(job);
+ free_chunk(job);
}
void
@@ -417,7 +487,7 @@
uint32_t from_idx = begin_it.event_idx;
while (from_chunk != end_it.chunk || from_idx != end_it.event_idx) {
- struct u_trace_chunk *to_chunk = get_chunk(into);
+ struct u_trace_chunk *to_chunk = get_chunk(into, 0 /* payload_size */);
unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces,
from_chunk->num_traces - from_idx);
@@ -433,6 +503,17 @@
&from_chunk->traces[from_idx],
to_copy * sizeof(struct u_trace_event));
+ /* Take a refcount on payloads from from_chunk if needed. */
+ if (begin_it.ut != into) {
+ struct u_trace_payload_buf **in_payload;
+ u_vector_foreach(in_payload, &from_chunk->payloads) {
+ struct u_trace_payload_buf **out_payload =
+ u_vector_add(&to_chunk->payloads);
+
+ *out_payload = u_trace_payload_buf_ref(*in_payload);
+ }
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/perf/u_trace.h
^
|
@@ -235,9 +235,6 @@
* Provides callback for driver to copy timestamps on GPU from
* one buffer to another.
*
- * The payload is shared and remains owned by the original u_trace
- * if tracepoints are being copied between different u_trace!
- *
* It allows:
* - Tracing re-usable command buffer in Vulkan, by copying tracepoints
* each time it is submitted.
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/slab.c
^
|
@@ -110,6 +110,7 @@
parent->element_size = ALIGN_POT(sizeof(struct slab_element_header) + item_size,
sizeof(intptr_t));
parent->num_elements = num_items;
+ parent->item_size = item_size;
}
void
@@ -231,6 +232,18 @@
}
/**
+ * Same as slab_alloc but memset the returned object to 0.
+ */
+void *
+slab_zalloc(struct slab_child_pool *pool)
+{
+ void *r = slab_alloc(pool);
+ if (r)
+ memset(r, 0, pool->parent->item_size);
+ return r;
+}
+
+/**
* Free an object allocated from the slab. Single-threaded (i.e. the caller
* must ensure that no operation happens on the same child pool in another
* thread).
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/slab.h
^
|
@@ -55,6 +55,7 @@
simple_mtx_t mutex;
unsigned element_size;
unsigned num_elements;
+ unsigned item_size;
};
struct slab_child_pool {
@@ -81,6 +82,7 @@
struct slab_parent_pool *parent);
void slab_destroy_child(struct slab_child_pool *pool);
void *slab_alloc(struct slab_child_pool *pool);
+void *slab_zalloc(struct slab_child_pool *pool);
void slab_free(struct slab_child_pool *pool, void *ptr);
struct slab_mempool {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/u_atomic.c
^
|
@@ -34,6 +34,21 @@
static pthread_mutex_t sync_mutex = PTHREAD_MUTEX_INITIALIZER;
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma redefine_extname __sync_add_and_fetch_8_c __sync_add_and_fetch_8
+#pragma redefine_extname __sync_sub_and_fetch_8_c __sync_sub_and_fetch_8
+#pragma redefine_extname __sync_fetch_and_add_8_c __sync_fetch_and_add_8
+#pragma redefine_extname __sync_fetch_and_sub_8_c __sync_fetch_and_sub_8
+#pragma redefine_extname __sync_val_compare_and_swap_8_c \
+ __sync_val_compare_and_swap_8
+#define __sync_add_and_fetch_8 __sync_add_and_fetch_8_c
+#define __sync_sub_and_fetch_8 __sync_sub_and_fetch_8_c
+#define __sync_fetch_and_add_8 __sync_fetch_and_add_8_c
+#define __sync_fetch_and_sub_8 __sync_fetch_and_sub_8_c
+#define __sync_val_compare_and_swap_8 __sync_val_compare_and_swap_8_c
+#endif
+
WEAK uint64_t
__sync_add_and_fetch_8(uint64_t *ptr, uint64_t val)
{
@@ -58,6 +73,32 @@
pthread_mutex_unlock(&sync_mutex);
return r;
+}
+
+WEAK uint64_t
+__sync_fetch_and_add_8(uint64_t *ptr, uint64_t val)
+{
+ uint64_t r;
+
+ pthread_mutex_lock(&sync_mutex);
+ r = *ptr;
+ *ptr += val;
+ pthread_mutex_unlock(&sync_mutex);
+
+ return r;
+}
+
+WEAK uint64_t
+__sync_fetch_and_sub_8(uint64_t *ptr, uint64_t val)
+{
+ uint64_t r;
+
+ pthread_mutex_lock(&sync_mutex);
+ r = *ptr;
+ *ptr -= val;
+ pthread_mutex_unlock(&sync_mutex);
+
+ return r;
}
WEAK uint64_t
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/u_cpu_detect.c
^
|
@@ -136,7 +136,7 @@
int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC};
#endif
int has_vu = 0;
- int len = sizeof (has_vu);
+ size_t len = sizeof (has_vu);
int err;
err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
@@ -438,6 +438,7 @@
static void
check_os_mips64_support(void)
{
+#if defined(PIPE_OS_LINUX)
Elf64_auxv_t aux;
int fd;
@@ -453,6 +454,7 @@
}
close (fd);
}
+#endif /* PIPE_OS_LINUX */
}
#endif /* PIPE_ARCH_MIPS64 */
@@ -623,7 +625,7 @@
if (available_cpus == 0) {
const int mib[] = { CTL_HW, HW_NCPUONLINE };
int ncpu;
- int len = sizeof(ncpu);
+ size_t len = sizeof(ncpu);
sysctl(mib, 2, &ncpu, &len, NULL, 0);
available_cpus = ncpu;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/u_debug_stack.c
^
|
@@ -199,7 +199,6 @@
unsigned start_frame,
unsigned nr_frames)
{
- const void **frame_pointer = NULL;
unsigned i = 0;
if (!nr_frames) {
@@ -250,21 +249,22 @@
}
#endif
+#ifdef PIPE_ARCH_X86
#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION > 404) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wframe-address"
- frame_pointer = ((const void **)__builtin_frame_address(1));
+ const void **frame_pointer = ((const void **)__builtin_frame_address(1));
#pragma GCC diagnostic pop
-#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
+#elif defined(PIPE_CC_MSVC)
+ const void **frame_pointer;
__asm {
mov frame_pointer, ebp
}
frame_pointer = (const void **)frame_pointer[0];
#else
- frame_pointer = NULL;
+ const void **frame_pointer = NULL;
#endif
-#ifdef PIPE_ARCH_X86
while (nr_frames) {
const void **next_frame_pointer;
@@ -287,8 +287,6 @@
frame_pointer = next_frame_pointer;
}
-#else
- (void) frame_pointer;
#endif
while (nr_frames) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/util/xmlconfig.c
^
|
@@ -1093,6 +1093,8 @@
const char *appattr[] = {
"name", a->name,
"executable", a->executable,
+ "executable_regexp", a->executable_regexp,
+ "sha1", a->sha1,
"application_name_match", a->application_name_match,
"application_versions", a->application_versions,
NULL
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_image.c
^
|
@@ -206,7 +206,9 @@
const VkImageCreateInfo *create_info,
struct vn_image *img)
{
- return vn_image_init(dev, create_info, img);
+ VkResult result = vn_image_init(dev, create_info, img);
+ img->deferred_info->initialized = result == VK_SUCCESS;
+ return result;
}
VkResult
@@ -298,7 +300,9 @@
if (img->private_memory != VK_NULL_HANDLE)
vn_FreeMemory(device, img->private_memory, pAllocator);
- vn_async_vkDestroyImage(dev->instance, device, image, NULL);
+ /* must not ask renderer to destroy uninitialized deferred image */
+ if (!img->deferred_info || img->deferred_info->initialized)
+ vn_async_vkDestroyImage(dev->instance, device, image, NULL);
if (img->deferred_info)
vk_free(alloc, img->deferred_info);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_image.h
^
|
@@ -22,6 +22,9 @@
VkImageCreateInfo create;
VkImageFormatListCreateInfo list;
VkImageStencilUsageCreateInfo stencil;
+
+ /* track whether vn_image_init_deferred succeeds */
+ bool initialized;
};
struct vn_image {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_physical_device.c
^
|
@@ -2196,8 +2196,25 @@
if (result != VK_SUCCESS || !external_info)
return vn_result(physical_dev->instance, result);
+ if (external_info->handleType ==
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
+ VkAndroidHardwareBufferUsageANDROID *ahb_usage =
+ vk_find_struct(pImageFormatProperties->pNext,
+ ANDROID_HARDWARE_BUFFER_USAGE_ANDROID);
+ if (ahb_usage) {
+ ahb_usage->androidHardwareBufferUsage = vn_android_get_ahb_usage(
+ pImageFormatInfo->usage, pImageFormatInfo->flags);
+ }
+
+ /* AHBs with mipmap usage will ignore this property */
+ pImageFormatProperties->imageFormatProperties.maxMipLevels = 1;
+ }
+
VkExternalImageFormatProperties *img_props = vk_find_struct(
pImageFormatProperties->pNext, EXTERNAL_IMAGE_FORMAT_PROPERTIES);
+ if (!img_props)
+ return VK_SUCCESS;
+
VkExternalMemoryProperties *mem_props =
&img_props->externalMemoryProperties;
@@ -2217,17 +2234,6 @@
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
mem_props->compatibleHandleTypes =
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
-
- VkAndroidHardwareBufferUsageANDROID *ahb_usage =
- vk_find_struct(pImageFormatProperties->pNext,
- ANDROID_HARDWARE_BUFFER_USAGE_ANDROID);
- if (ahb_usage) {
- ahb_usage->androidHardwareBufferUsage = vn_android_get_ahb_usage(
- pImageFormatInfo->usage, pImageFormatInfo->flags);
- }
-
- /* AHBs with mipmap usage will ignore this property */
- pImageFormatProperties->imageFormatProperties.maxMipLevels = 1;
} else {
mem_props->compatibleHandleTypes = supported_handle_types;
mem_props->exportFromImportedHandleTypes =
@@ -2236,7 +2242,7 @@
: 0;
}
- return vn_result(physical_dev->instance, result);
+ return VK_SUCCESS;
}
void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/virtio/vulkan/vn_pipeline.c
^
|
@@ -140,9 +140,12 @@
VkPipelineCacheCreateInfo local_create_info;
if (pCreateInfo->initialDataSize) {
+ const struct vk_pipeline_cache_header *header =
+ pCreateInfo->pInitialData;
+
local_create_info = *pCreateInfo;
- local_create_info.pInitialData +=
- sizeof(struct vk_pipeline_cache_header);
+ local_create_info.initialDataSize -= header->header_size;
+ local_create_info.pInitialData += header->header_size;
pCreateInfo = &local_create_info;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/device-select-layer/device_select_x11.c
^
|
@@ -67,6 +67,8 @@
int scrn;
xcb_connection_t *conn;
int default_idx = -1;
+ drmDevicePtr xdev = NULL;
+
conn = xcb_connect(NULL, &scrn);
if (!conn)
return -1;
@@ -91,7 +93,6 @@
if (dri3_fd == -1)
goto out;
- drmDevicePtr xdev;
int ret = drmGetDevice2(dri3_fd, 0, &xdev);
close(dri3_fd);
if (ret < 0)
@@ -113,7 +114,9 @@
if (default_idx != -1)
break;
}
+
out:
+ drmFreeDevice(&xdev); /* Is NULL pointer safe. */
xcb_disconnect(conn);
return default_idx;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/util/gen_enum_to_str.py
^
|
@@ -78,8 +78,9 @@
case ${v}:
return "${enum.values[v]}";
% endfor
+ case ${enum.max_enum_name}: return "${enum.max_enum_name}";
default:
- unreachable("Undefined enum value.");
+ return "Unknown ${enum.name} value.";
}
}
@@ -116,7 +117,7 @@
return "${object_types[0].enum_to_name[object_type]}";
% endfor
default:
- unreachable("Undefined enum value.");
+ return "Unknown VkObjectType value.";
}
}
"""))
@@ -246,12 +247,24 @@
def CamelCase_to_SHOUT_CASE(s):
return (s[:1] + re.sub(r'(?<![A-Z])([A-Z])', r'_\1', s[1:])).upper()
+def compute_max_enum_name(s):
+ max_enum_name = CamelCase_to_SHOUT_CASE(s)
+ last_prefix = max_enum_name.rsplit('_', 1)[-1]
+ # Those special prefixes need to be always at the end
+ if last_prefix in ['AMD', 'EXT', 'INTEL', 'KHR', 'NV'] :
+ max_enum_name = "_".join(max_enum_name.split('_')[:-1])
+ max_enum_name = max_enum_name + "_MAX_ENUM_" + last_prefix
+ else:
+ max_enum_name = max_enum_name + "_MAX_ENUM"
+
+ return max_enum_name
class VkEnum(object):
"""Simple struct-like class representing a single Vulkan Enum."""
def __init__(self, name, bitwidth=32, values=None):
self.name = name
+ self.max_enum_name = compute_max_enum_name(name)
self.bitwidth = bitwidth
self.extension = None
# Maps numbers to names
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/util/vk_log.c
^
|
@@ -268,7 +268,6 @@
case VK_ERROR_TOO_MANY_OBJECTS:
return &vk_object_to_device(obj)->base;
default:
- assert(obj->client_visible);
return obj;
}
}
@@ -306,6 +305,8 @@
VK_LOG_NO_OBJS(instance), file, line,
"%s (%s)", message, error_str);
}
+
+ ralloc_free(message);
} else {
if (object) {
__vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/util/vk_synchronization2.c
^
|
@@ -27,6 +27,7 @@
#include "vk_device.h"
#include "vk_queue.h"
#include "vk_util.h"
+#include "../wsi/wsi_common.h"
VKAPI_ATTR void VKAPI_CALL
vk_common_CmdWriteTimestamp(
@@ -291,6 +292,7 @@
STACK_ARRAY(VkSubmitInfo2KHR, submit_info_2, submitCount);
STACK_ARRAY(VkPerformanceQuerySubmitInfoKHR, perf_query_submit_info, submitCount);
+ STACK_ARRAY(struct wsi_memory_signal_submit_info, wsi_mem_submit_info, submitCount);
uint32_t n_wait_semaphores = 0;
uint32_t n_command_buffers = 0;
@@ -373,6 +375,15 @@
__vk_append_struct(&submit_info_2[s], &perf_query_submit_info[s]);
}
+ const struct wsi_memory_signal_submit_info *mem_signal_info =
+ vk_find_struct_const(pSubmits[s].pNext,
+ WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
+ if (mem_signal_info) {
+ wsi_mem_submit_info[s] = *mem_signal_info;
+ wsi_mem_submit_info[s].pNext = NULL;
+ __vk_append_struct(&submit_info_2[s], &wsi_mem_submit_info[s]);
+ }
+
n_wait_semaphores += pSubmits[s].waitSemaphoreCount;
n_command_buffers += pSubmits[s].commandBufferCount;
n_signal_semaphores += pSubmits[s].signalSemaphoreCount;
@@ -388,6 +399,7 @@
STACK_ARRAY_FINISH(signal_semaphores);
STACK_ARRAY_FINISH(submit_info_2);
STACK_ARRAY_FINISH(perf_query_submit_info);
+ STACK_ARRAY_FINISH(wsi_mem_submit_info);
return result;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_display.c
^
|
@@ -460,10 +460,8 @@
}
}
-VKAPI_ATTR VkResult VKAPI_CALL
-wsi_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physicalDevice,
- uint32_t *pPropertyCount,
- VkDisplayProperties2KHR *pProperties)
+static VkResult
+wsi_get_connectors(VkPhysicalDevice physicalDevice)
{
VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
struct wsi_device *wsi_device = pdevice->wsi_device;
@@ -471,27 +469,46 @@
(struct wsi_display *) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY];
if (wsi->fd < 0)
- goto bail;
+ return VK_SUCCESS;
drmModeResPtr mode_res = drmModeGetResources(wsi->fd);
if (!mode_res)
- goto bail;
-
- VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount);
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
/* Get current information */
-
for (int c = 0; c < mode_res->count_connectors; c++) {
struct wsi_display_connector *connector =
wsi_display_get_connector(wsi_device, wsi->fd,
mode_res->connectors[c]);
-
if (!connector) {
drmModeFreeResources(mode_res);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
+ }
+ drmModeFreeResources(mode_res);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+wsi_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physicalDevice,
+ uint32_t *pPropertyCount,
+ VkDisplayProperties2KHR *pProperties)
+{
+ VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
+ struct wsi_device *wsi_device = pdevice->wsi_device;
+ struct wsi_display *wsi =
+ (struct wsi_display *) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY];
+
+ /* Get current information */
+ VkResult result = wsi_get_connectors(physicalDevice);
+ if (result != VK_SUCCESS)
+ goto bail;
+
+ VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount);
+
+ wsi_for_each_connector(connector, wsi) {
if (connector->connected) {
vk_outarray_append(&conn, prop) {
wsi_display_fill_in_display_properties(wsi_device,
@@ -501,13 +518,11 @@
}
}
- drmModeFreeResources(mode_res);
-
return vk_outarray_status(&conn);
bail:
*pPropertyCount = 0;
- return VK_SUCCESS;
+ return result;
}
/*
@@ -541,6 +556,10 @@
struct wsi_display *wsi =
(struct wsi_display *) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY];
+ VkResult result = wsi_get_connectors(physicalDevice);
+ if (result != VK_SUCCESS)
+ goto bail;
+
VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount);
wsi_for_each_connector(connector, wsi) {
@@ -554,6 +573,10 @@
}
}
return vk_outarray_status(&conn);
+
+bail:
+ *pPropertyCount = 0;
+ return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
@@ -566,6 +589,11 @@
struct wsi_display *wsi =
(struct wsi_display *) wsi_device->wsi[VK_ICD_WSI_PLATFORM_DISPLAY];
+ /* Get current information */
+ VkResult result = wsi_get_connectors(physicalDevice);
+ if (result != VK_SUCCESS)
+ goto bail;
+
VK_OUTARRAY_MAKE(conn, pProperties, pPropertyCount);
wsi_for_each_connector(connector, wsi) {
@@ -575,6 +603,10 @@
}
}
return vk_outarray_status(&conn);
+
+bail:
+ *pPropertyCount = 0;
+ return result;
}
/*
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_drm.c
^
|
@@ -531,7 +531,7 @@
.sType = VK_STRUCTURE_TYPE_WSI_IMAGE_CREATE_INFO_MESA,
.prime_blit_src = true,
};
- const VkImageCreateInfo image_info = {
+ VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = &image_wsi_info,
.flags = 0,
@@ -552,6 +552,10 @@
.pQueueFamilyIndices = pCreateInfo->pQueueFamilyIndices,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
+ if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR) {
+ image_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR;
+ }
result = wsi->CreateImage(chain->device, &image_info,
&chain->alloc, &image->image);
if (result != VK_SUCCESS)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-21.3.9+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_wayland.c
^
|
@@ -294,14 +294,25 @@
format = wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_R8G8B8_UNORM,
true, true);
- FALLTHROUGH;
+ if (format)
+ wsi_wl_format_add_modifier(format, modifier);
+ if (srgb_format)
+ wsi_wl_format_add_modifier(srgb_format, modifier);
+
+ srgb_format = wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_R8G8B8A8_SRGB,
+ false, true);
+ format = wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_R8G8B8A8_UNORM,
+ false, true);
+ break;
case DRM_FORMAT_ABGR8888:
srgb_format = wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_R8G8B8A8_SRGB,
- true, true);
+ true, false);
format = wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_R8G8B8A8_UNORM,
- true, true);
+ true, false);
break;
case DRM_FORMAT_XRGB8888:
srgb_format = wsi_wl_display_add_vk_format(display, formats,
@@ -310,14 +321,25 @@
format = wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_B8G8R8_UNORM,
true, true);
- FALLTHROUGH;
+ if (format)
+ wsi_wl_format_add_modifier(format, modifier);
+ if (srgb_format)
+ wsi_wl_format_add_modifier(srgb_format, modifier);
+
+ srgb_format = wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_B8G8R8A8_SRGB,
+ false, true);
+ format = wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ false, true);
+ break;
case DRM_FORMAT_ARGB8888:
srgb_format = wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_B8G8R8A8_SRGB,
- true, true);
+ true, false);
format = wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_B8G8R8A8_UNORM,
- true, true);
+ true, false);
break;
}
@@ -336,11 +358,17 @@
case WL_SHM_FORMAT_XBGR8888:
wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_R8G8B8_SRGB,
- false, true);
+ true, true);
wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_R8G8B8_UNORM,
+ true, true);
+ wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_R8G8B8A8_SRGB,
+ false, true);
+ wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_R8G8B8A8_UNORM,
false, true);
- FALLTHROUGH;
+ break;
case WL_SHM_FORMAT_ABGR8888:
wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_R8G8B8A8_SRGB,
@@ -352,11 +380,17 @@
case WL_SHM_FORMAT_XRGB8888:
wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_B8G8R8_SRGB,
- false, true);
+ true, true);
wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_B8G8R8_UNORM,
+ true, true);
+ wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_B8G8R8A8_SRGB,
false, true);
- FALLTHROUGH;
+ wsi_wl_display_add_vk_format(display, formats,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ false, true);
+ break;
case WL_SHM_FORMAT_ARGB8888:
wsi_wl_display_add_vk_format(display, formats,
VK_FORMAT_B8G8R8A8_SRGB,
@@ -427,6 +461,12 @@
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
return alpha ? WL_SHM_FORMAT_ARGB8888 : WL_SHM_FORMAT_XRGB8888;
+ case VK_FORMAT_R8G8B8_UNORM:
+ case VK_FORMAT_R8G8B8_SRGB:
+ return WL_SHM_FORMAT_XBGR8888;
+ case VK_FORMAT_B8G8R8_UNORM:
+ case VK_FORMAT_B8G8R8_SRGB:
+ return WL_SHM_FORMAT_XRGB8888;
default:
assert(!"Unsupported Vulkan format");
|