Skip to content
Snippets Groups Projects
2.1.11-optimize-access-checks.patch 6.69 KiB
Newer Older
From 9e7724f9011c0b42b982ed9d67ce3a5d75e33441 Mon Sep 17 00:00:00 2001
From: Andrew Walker <awalker@ixsystems.com>
Date: Tue, 15 Mar 2022 11:03:29 -0400
Subject: [PATCH 1/2] Linux optimize access checks when ACL is trivial

Bypass check of ZFS aces if the ACL is trivial. When an ACL is
trivial its permissions are represented by the mode without any
loss of information. In this case, it is safe to convert the
access request into equivalent mode and then pass desired mask
and inode to generic_permission(). This has the added benefit
of also checking whether entries in a POSIX ACL on the file grant
the desired access.

This commit also skips the ACL check on looking up the xattr dir
since such restrictions don't exist in Linux kernel and it makes
xattr lookup behavior inconsistent between SA and file-based
xattrs. We also don't want to perform a POSIX ACL check while
looking up the POSIX ACL if for some reason it is located in
the xattr dir rather than an SA.

Signed-off-by: Andrew Walker <awalker@ixsystems.com>
---
 module/os/linux/zfs/zfs_acl.c      | 70 ++++++++++++++++++++++++++++++
 module/os/linux/zfs/zfs_vnops_os.c |  2 +-
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c
index 351e4dad799..b70691ab31c 100644
--- a/module/os/linux/zfs/zfs_acl.c
+++ b/module/os/linux/zfs/zfs_acl.c
@@ -863,6 +863,26 @@ zfs_unix_to_v4(uint32_t access_mask)
 	return (new_mask);
 }
 
+
+static int
+zfs_v4_to_unix(uint32_t access_mask, int *unmapped)
+{
+	int new_mask = 0;
+
+	*unmapped = access_mask &
+	    (ACE_WRITE_OWNER | ACE_WRITE_ACL | ACE_DELETE);
+
+	if (access_mask & WRITE_MASK)
+		new_mask |= S_IWOTH;
+	if (access_mask & ACE_READ_DATA)
+		new_mask |= S_IROTH;
+	if (access_mask & ACE_EXECUTE)
+		new_mask |= S_IXOTH;
+
+	return (new_mask);
+}
+
+
 static void
 zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
     uint16_t access_type, uint64_t fuid, uint16_t entry_type)
@@ -2399,6 +2419,53 @@ zfs_has_access(znode_t *zp, cred_t *cr)
 	return (B_TRUE);
 }
 
+/*
+ * Simplified access check for case where ACL is known to not contain
+ * information beyond what is defined in the mode. In this case, we
+ * can pass along to the kernel / vfs generic_permission() check, which
+ * evaluates the mode and POSIX ACL.
+ *
+ * NFSv4 ACLs allow granting permissions that are usually relegated only
+ * to the file owner or superuser. Examples are ACE_WRITE_OWNER (chown),
+ * ACE_WRITE_ACL(chmod), and ACE_DELETE. ACE_DELETE requests must fail
+ * because with conventional posix permissions, right to delete file
+ * is determined by write bit on the parent dir.
+ *
+ * If unmappable perms are requested, then we must return EPERM
+ * and include those bits in the working_mode so that the caller of
+ * zfs_zaccess_common() can decide whether to perform additional
+ * policy / capability checks. EACCES is used in zfs_zaccess_aces_check()
+ * to indicate access check failed due to explicit DENY entry, and so
+ * we want to avoid that here.
+ */
+static int
+zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
+{
+	int err, mask;
+	int unmapped = 0;
+
+	ASSERT(zp->z_pflags & ZFS_ACL_TRIVIAL);
+
+	mask = zfs_v4_to_unix(*working_mode, &unmapped);
+	if (mask == 0 || unmapped) {
+		*working_mode = unmapped;
+		return (unmapped ? SET_ERROR(EPERM) : 0);
+	}
+
+#if defined(HAVE_IOPS_PERMISSION_USERNS)
+	err = generic_permission(cr->user_ns, ZTOI(zp), mask);
+#else
+	err = generic_permission(ZTOI(zp), mask);
+#endif
+	if (err != 0) {
+		return (SET_ERROR(EPERM));
+	}
+
+	*working_mode = unmapped;
+
+	return (0);
+}
+
 static int
 zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
     boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
@@ -2450,6 +2517,9 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
 		return (SET_ERROR(EPERM));
 	}
 
+	if (zp->z_pflags & ZFS_ACL_TRIVIAL)
+		return (zfs_zaccess_trivial(zp, working_mode, cr));
+
 	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
 }
 
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index ece7c373e85..b65728f0d4c 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -474,7 +474,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 		 */
 
 		if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
-		    B_FALSE, cr))) {
+		    B_TRUE, cr))) {
 			zrele(*zpp);
 			*zpp = NULL;
 		}

From 6947cd4d94945b1014f69801efbd92fc526ab373 Mon Sep 17 00:00:00 2001
From: Ryan Moeller <ryan@iXsystems.com>
Date: Wed, 3 Nov 2021 08:03:08 -0400
Subject: [PATCH 2/2] Add configure check for "permission" inode operation

The "permission" inode operation takes a new `struct user_namespace *`
parameter starting in Linux 5.12.

Add a configure check and adapt accordingly.

Signed-off-by: Ryan Moeller <ryan@iXsystems.com>
---
 config/kernel-inode-permission.m4 | 29 +++++++++++++++++++++++++++++
 config/kernel.m4                  |  2 ++
 2 files changed, 31 insertions(+)
 create mode 100644 config/kernel-inode-permission.m4

diff --git a/config/kernel-inode-permission.m4 b/config/kernel-inode-permission.m4
new file mode 100644
index 00000000000..ba9ff5d43d4
--- /dev/null
+++ b/config/kernel-inode-permission.m4
@@ -0,0 +1,29 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([permission_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int inode_permission(struct user_namespace *userns,
+		    struct inode *inode, int mask) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.permission		= inode_permission,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERMISSION], [
+	AC_MSG_CHECKING([whether iops->permission() takes struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([permission_userns], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_PERMISSION_USERNS, 1,
+		   [iops->permission() takes struct user_namespace*])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index d1d3dede175..639d1837712 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -82,6 +82,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_MKDIR
 	ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS
 	ZFS_AC_KERNEL_SRC_CREATE
+	ZFS_AC_KERNEL_SRC_PERMISSION
 	ZFS_AC_KERNEL_SRC_GET_LINK
 	ZFS_AC_KERNEL_SRC_PUT_LINK
 	ZFS_AC_KERNEL_SRC_TMPFILE
@@ -193,6 +194,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_MKDIR
 	ZFS_AC_KERNEL_LOOKUP_FLAGS
 	ZFS_AC_KERNEL_CREATE
+	ZFS_AC_KERNEL_PERMISSION
 	ZFS_AC_KERNEL_GET_LINK
 	ZFS_AC_KERNEL_PUT_LINK
 	ZFS_AC_KERNEL_TMPFILE