[linux-next] LinuxNextTracking branch, master, updated. next-20240926 - linux-merge - lists.open-mesh.org

26 Sep 2024

The following commit has been merged in the master branch:
commit fa8380a06bd0523e51f826520aac1beb8c585521
Merge: 68e5c7d4cefb66de3953a874e670ec8f1ce86a24 37d3dd663f7485bf3e444f40abee3c68f53158cb
Author: Linus Torvalds torvalds@linux-foundation.org
Date:   Tue Sep 24 14:54:26 2024 -0700
Merge tag 'bpf-next-6.12-struct-fd' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf 'struct fd' updates from Alexei Starovoitov:
     "This includes struct_fd BPF changes from Al and Andrii"
* tag 'bpf-next-6.12-struct-fd' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
      bpf: convert bpf_token_create() to CLASS(fd, ...)
      security,bpf: constify struct path in bpf_token_create() LSM hook
      bpf: more trivial fdget() conversions
      bpf: trivial conversions for fdget()
      bpf: switch maps to CLASS(fd, ...)
      bpf: factor out fetching bpf_map from FD and adding it to used_maps list
      bpf: switch fdget_raw() uses to CLASS(fd_raw, ...)
      bpf: convert __bpf_prog_get() to CLASS(fd, ...)
diff --combined include/linux/bpf.h
index 0c3893c471711,9f35df07e86d7..19d8ca8ac960f

--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -695,11 -695,6 +695,11 @@@ enum bpf_type_flag 
    /* DYNPTR points to xdp_buff */
    DYNPTR_TYPE_XDP		= BIT(16 + BPF_BASE_TYPE_BITS),
+	/* Memory must be aligned on some architectures, used in combination with
 +	 * MEM_FIXED_SIZE.
 +	 */
 +	MEM_ALIGNED		= BIT(17 + BPF_BASE_TYPE_BITS),
 +
    __BPF_TYPE_FLAG_MAX,
    __BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
  };
@@@ -737,6 -732,8 +737,6 @@@ enum bpf_arg_type 
    ARG_ANYTHING,		/* any (initialized) argument is ok */
    ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
    ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 -	ARG_PTR_TO_INT,		/* pointer to int */
 -	ARG_PTR_TO_LONG,	/* pointer to long */
    ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
    ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
    ARG_PTR_TO_RINGBUF_MEM,	/* pointer to dynamically reserved ringbuf memory */
@@@ -747,7 -744,7 +747,7 @@@
    ARG_PTR_TO_STACK,	/* pointer to stack */
    ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
    ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
 -	ARG_PTR_TO_KPTR,	/* pointer to referenced kptr */
 +	ARG_KPTR_XCHG_DEST,	/* pointer to destination that kptrs are bpf_kptr_xchg'd into */
    ARG_PTR_TO_DYNPTR,      /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
    __BPF_ARG_TYPE_MAX,
@@@ -811,12 -808,12 +811,12 @@@ struct bpf_func_proto 
    bool gpl_only;
    bool pkt_access;
    bool might_sleep;
 -	/* set to true if helper follows contract for gcc/llvm
 -	 * attribute no_caller_saved_registers:
 +	/* set to true if helper follows contract for llvm
 +	 * attribute bpf_fastcall:
     * - void functions do not scratch r0
     * - functions taking N arguments scratch only registers r1-rN
     */
 -	bool allow_nocsr;
 +	bool allow_fastcall;
    enum bpf_return_type ret_type;
    union {
    	struct {
@@@ -977,8 -974,6 +977,8 @@@ struct bpf_verifier_ops 
    			struct bpf_insn_access_aux *info);
    int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
    		    const struct bpf_prog *prog);
 +	int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog,
 +			    s16 ctx_stack_off);
    int (*gen_ld_abs)(const struct bpf_insn *orig,
    		  struct bpf_insn *insn_buf);
    u32 (*convert_ctx_access)(enum bpf_access_type type,
@@@ -2246,7 -2241,16 +2246,16 @@@ void __bpf_obj_drop_impl(void *p, cons
struct bpf_map *bpf_map_get(u32 ufd);
  struct bpf_map *bpf_map_get_with_uref(u32 ufd);
- struct bpf_map *__bpf_map_get(struct fd f);
+ 
+ static inline struct bpf_map *__bpf_map_get(struct fd f)
+ {
+ 	if (fd_empty(f))
+ 		return ERR_PTR(-EBADF);
+ 	if (unlikely(fd_file(f)->f_op != &bpf_map_fops))
+ 		return ERR_PTR(-EINVAL);
+ 	return fd_file(f)->private_data;
+ }
+ 
  void bpf_map_inc(struct bpf_map *map);
  void bpf_map_inc_with_uref(struct bpf_map *map);
  struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
@@@ -3203,9 -3207,7 +3212,9 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_get_current_comm_proto;
  extern const struct bpf_func_proto bpf_get_stackid_proto;
  extern const struct bpf_func_proto bpf_get_stack_proto;
 +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto;
  extern const struct bpf_func_proto bpf_get_task_stack_proto;
 +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto;
  extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
  extern const struct bpf_func_proto bpf_get_stack_proto_pe;
  extern const struct bpf_func_proto bpf_sock_map_update_proto;
@@@ -3213,7 -3215,6 +3222,7 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
  extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
  extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
 +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto;
  extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
  extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
  extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
diff --combined include/linux/lsm_hook_defs.h
index 1d59513bf2301,462b553782410..9eca013aa5e1f
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@@ -48,7 -48,7 +48,7 @@@ LSM_HOOK(int, 0, quota_on, struct dentr
  LSM_HOOK(int, 0, syslog, int type)
  LSM_HOOK(int, 0, settime, const struct timespec64 *ts,
     const struct timezone *tz)
 -LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages)
 +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages)
  LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm)
  LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file)
  LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
@@@ -114,7 -114,6 +114,7 @@@ LSM_HOOK(int, 0, path_notify, const str
     unsigned int obj_type)
  LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
  LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
 +LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
  LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
     struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
     int *xattr_count)
@@@ -180,8 -179,6 +180,8 @@@ LSM_HOOK(void, LSM_RET_VOID, inode_gets
  LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
  LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
     const char *name)
 +LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode,
 +	 enum lsm_integrity_type type, const void *value, size_t size)
  LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
     struct kernfs_node *kn)
  LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
@@@ -356,7 -353,8 +356,7 @@@ LSM_HOOK(void, LSM_RET_VOID, secmark_re
  LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void)
  LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req,
     struct flowi_common *flic)
 -LSM_HOOK(int, 0, tun_dev_alloc_security, void **security)
 -LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security)
 +LSM_HOOK(int, 0, tun_dev_alloc_security, void *security)
  LSM_HOOK(int, 0, tun_dev_create, void)
  LSM_HOOK(int, 0, tun_dev_attach_queue, void *security)
  LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security)
@@@ -376,7 -374,8 +376,7 @@@ LSM_HOOK(int, 0, mptcp_add_subflow, str
  LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey)
  LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name,
     u8 port_num)
 -LSM_HOOK(int, 0, ib_alloc_security, void **sec)
 -LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec)
 +LSM_HOOK(int, 0, ib_alloc_security, void *sec)
  #endif /* CONFIG_SECURITY_INFINIBAND */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
@@@ -404,6 -403,7 +404,6 @@@ LSM_HOOK(int, 0, xfrm_decode_session, s
  #ifdef CONFIG_KEYS
  LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred,
     unsigned long flags)
 -LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key)
  LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred,
     enum key_need_perm need_perm)
  LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer)
@@@ -431,7 -431,7 +431,7 @@@ LSM_HOOK(int, 0, bpf_prog_load, struct 
     struct bpf_token *token)
  LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
  LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
- 	 struct path *path)
+ 	 const struct path *path)
  LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
  LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
  LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
@@@ -442,6 -442,7 +442,6 @@@ LSM_HOOK(int, 0, locked_down, enum lock
  #ifdef CONFIG_PERF_EVENTS
  LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type)
  LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event)
 -LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event)
  LSM_HOOK(int, 0, perf_event_read, struct perf_event *event)
  LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
  #endif /* CONFIG_PERF_EVENTS */
@@@ -451,10 -452,3 +451,10 @@@ LSM_HOOK(int, 0, uring_override_creds, 
  LSM_HOOK(int, 0, uring_sqpoll, void)
  LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
  #endif /* CONFIG_IO_URING */
 +
 +LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void)
 +
 +LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev)
 +LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev)
 +LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev,
 +	 enum lsm_integrity_type type, const void *value, size_t size)
diff --combined include/linux/security.h
index c37c32ebbdcd8,31523a2c71c4d..b86ec2afc6910
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@@ -83,18 -83,6 +83,18 @@@ enum lsm_event 
    LSM_POLICY_CHANGE,
  };
+struct dm_verity_digest {
 +	const char *alg;
 +	const u8 *digest;
 +	size_t digest_len;
 +};
 +
 +enum lsm_integrity_type {
 +	LSM_INT_DMVERITY_SIG_VALID,
 +	LSM_INT_DMVERITY_ROOTHASH,
 +	LSM_INT_FSVERITY_BUILTINSIG_VALID,
 +};
 +
  /*
   * These are reasons that can be passed to the security_locked_down()
   * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
@@@ -411,9 -399,6 +411,9 @@@ int security_inode_listsecurity(struct 
  void security_inode_getsecid(struct inode *inode, u32 *secid);
  int security_inode_copy_up(struct dentry *src, struct cred **new);
  int security_inode_copy_up_xattr(struct dentry *src, const char *name);
 +int security_inode_setintegrity(const struct inode *inode,
 +				enum lsm_integrity_type type, const void *value,
 +				size_t size);
  int security_kernfs_init_security(struct kernfs_node *kn_dir,
    			  struct kernfs_node *kn);
  int security_file_permission(struct file *file, int mask);
@@@ -524,11 -509,6 +524,11 @@@ int security_inode_getsecctx(struct ino
  int security_locked_down(enum lockdown_reason what);
  int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
    	      void *val, size_t val_len, u64 id, u64 flags);
 +int security_bdev_alloc(struct block_device *bdev);
 +void security_bdev_free(struct block_device *bdev);
 +int security_bdev_setintegrity(struct block_device *bdev,
 +			       enum lsm_integrity_type type, const void *value,
 +			       size_t size);
  #else /* CONFIG_SECURITY */
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@@ -654,7 -634,7 +654,7 @@@ static inline int security_settime64(co
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
  {
 -	return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages));
 +	return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages));
  }
static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm)
@@@ -1030,13 -1010,6 +1030,13 @@@ static inline int security_inode_copy_u
    return 0;
  }
+static inline int security_inode_setintegrity(const struct inode *inode,
 +					      enum lsm_integrity_type type,
 +					      const void *value, size_t size)
 +{
 +	return 0;
 +}
 +
  static inline int security_kernfs_init_security(struct kernfs_node *kn_dir,
    					struct kernfs_node *kn)
  {
@@@ -1510,23 -1483,6 +1510,23 @@@ static inline int lsm_fill_user_ctx(str
  {
    return -EOPNOTSUPP;
  }
 +
 +static inline int security_bdev_alloc(struct block_device *bdev)
 +{
 +	return 0;
 +}
 +
 +static inline void security_bdev_free(struct block_device *bdev)
 +{
 +}
 +
 +static inline int security_bdev_setintegrity(struct block_device *bdev,
 +					     enum lsm_integrity_type type,
 +					     const void *value, size_t size)
 +{
 +	return 0;
 +}
 +
  #endif	/* CONFIG_SECURITY */
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
@@@ -2134,7 -2090,6 +2134,7 @@@ struct dentry *securityfs_create_symlin
    				 const char *target,
    				 const struct inode_operations *iops);
  extern void securityfs_remove(struct dentry *dentry);
 +extern void securityfs_recursive_remove(struct dentry *dentry);
#else /* CONFIG_SECURITYFS */
@@@ -2182,7 -2137,7 +2182,7 @@@ extern int security_bpf_prog_load(struc
    			  struct bpf_token *token);
  extern void security_bpf_prog_free(struct bpf_prog *prog);
  extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- 				     struct path *path);
+ 				     const struct path *path);
  extern void security_bpf_token_free(struct bpf_token *token);
  extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
  extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
@@@ -2222,7 -2177,7 +2222,7 @@@ static inline void security_bpf_prog_fr
  { }
static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- 				     struct path *path)
+ 					    const struct path *path)
  {
    return 0;
  }
@@@ -2301,12 -2256,4 +2301,12 @@@ static inline int security_uring_cmd(st
  #endif /* CONFIG_SECURITY */
  #endif /* CONFIG_IO_URING */
+#ifdef CONFIG_SECURITY
 +extern void security_initramfs_populated(void);
 +#else
 +static inline void security_initramfs_populated(void)
 +{
 +}
 +#endif /* CONFIG_SECURITY */
 +
  #endif /* ! __LINUX_SECURITY_H */
diff --combined kernel/bpf/btf.c
index 83bbf935c5628,c4506d788c858..75e4fe83c5091
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@@ -212,7 -212,7 +212,7 @@@ enum btf_kfunc_hook 
    BTF_KFUNC_HOOK_TRACING,
    BTF_KFUNC_HOOK_SYSCALL,
    BTF_KFUNC_HOOK_FMODRET,
 -	BTF_KFUNC_HOOK_CGROUP_SKB,
 +	BTF_KFUNC_HOOK_CGROUP,
    BTF_KFUNC_HOOK_SCHED_ACT,
    BTF_KFUNC_HOOK_SK_SKB,
    BTF_KFUNC_HOOK_SOCKET_FILTER,
@@@ -790,7 -790,7 +790,7 @@@ const char *btf_str_by_offset(const str
    return NULL;
  }
-static bool __btf_name_valid(const struct btf *btf, u32 offset)
 +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
  {
    /* offset must be valid */
    const char *src = btf_str_by_offset(btf, offset);
@@@ -811,6 -811,11 +811,6 @@@
    return !*src;
  }
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
 -{
 -	return __btf_name_valid(btf, offset);
 -}
 -
  /* Allow any printable character in DATASEC names */
  static bool btf_name_valid_section(const struct btf *btf, u32 offset)
  {
@@@ -818,11 -823,9 +818,11 @@@
    const char *src = btf_str_by_offset(btf, offset);
    const char *src_limit;
+	if (!*src)
 +		return false;
 +
    /* set a limit on identifier length */
    src_limit = src + KSYM_NAME_LEN;
 -	src++;
    while (*src && src < src_limit) {
    	if (!isprint(*src))
    		return false;
@@@ -3756,7 -3759,6 +3756,7 @@@ static int btf_find_field(const struct 
    return -EINVAL;
  }
+/* Callers have to ensure the life cycle of btf if it is program BTF */
  static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
    		  struct btf_field_info *info)
  {
@@@ -3785,6 -3787,7 +3785,6 @@@
    	field->kptr.dtor = NULL;
    	id = info->kptr.type_id;
    	kptr_btf = (struct btf *)btf;
 -		btf_get(kptr_btf);
    	goto found_dtor;
    }
    if (id < 0)
@@@ -4626,7 -4629,7 +4626,7 @@@ static s32 btf_var_check_meta(struct bt
    }
if (!t->name_off ||
 -	    !__btf_name_valid(env->btf, t->name_off)) {
 +	    !btf_name_valid_identifier(env->btf, t->name_off)) {
    	btf_verifier_log_type(env, t, "Invalid name");
    	return -EINVAL;
    }
@@@ -5514,72 -5517,36 +5514,72 @@@ static const char *alloc_obj_fields[] 
  static struct btf_struct_metas *
  btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
  {
 -	union {
 -		struct btf_id_set set;
 -		struct {
 -			u32 _cnt;
 -			u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
 -		} _arr;
 -	} aof;
    struct btf_struct_metas *tab = NULL;
 +	struct btf_id_set *aof;
    int i, n, id, ret;
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
    BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
-	memset(&aof, 0, sizeof(aof));
 +	aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
 +	if (!aof)
 +		return ERR_PTR(-ENOMEM);
 +	aof->cnt = 0;
 +
    for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
    	/* Try to find whether this special type exists in user BTF, and
    	 * if so remember its ID so we can easily find it among members
    	 * of structs that we iterate in the next loop.
    	 */
 +		struct btf_id_set *new_aof;
 +
    	id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
    	if (id < 0)
    		continue;
 -		aof.set.ids[aof.set.cnt++] = id;
 +
 +		new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
 +				   GFP_KERNEL | __GFP_NOWARN);
 +		if (!new_aof) {
 +			ret = -ENOMEM;
 +			goto free_aof;
 +		}
 +		aof = new_aof;
 +		aof->ids[aof->cnt++] = id;
 +	}
 +
 +	n = btf_nr_types(btf);
 +	for (i = 1; i < n; i++) {
 +		/* Try to find if there are kptrs in user BTF and remember their ID */
 +		struct btf_id_set *new_aof;
 +		struct btf_field_info tmp;
 +		const struct btf_type *t;
 +
 +		t = btf_type_by_id(btf, i);
 +		if (!t) {
 +			ret = -EINVAL;
 +			goto free_aof;
 +		}
 +
 +		ret = btf_find_kptr(btf, t, 0, 0, &tmp);
 +		if (ret != BTF_FIELD_FOUND)
 +			continue;
 +
 +		new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
 +				   GFP_KERNEL | __GFP_NOWARN);
 +		if (!new_aof) {
 +			ret = -ENOMEM;
 +			goto free_aof;
 +		}
 +		aof = new_aof;
 +		aof->ids[aof->cnt++] = i;
    }
-	if (!aof.set.cnt)
 +	if (!aof->cnt) {
 +		kfree(aof);
    	return NULL;
 -	sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
 +	}
 +	sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
-	n = btf_nr_types(btf);
    for (i = 1; i < n; i++) {
    	struct btf_struct_metas *new_tab;
    	const struct btf_member *member;
@@@ -5589,13 -5556,17 +5589,13 @@@
    	int j, tab_cnt;
t = btf_type_by_id(btf, i);
 -		if (!t) {
 -			ret = -EINVAL;
 -			goto free;
 -		}
    	if (!__btf_type_is_struct(t))
    		continue;
cond_resched();
for_each_member(j, t, member) {
 -			if (btf_id_set_contains(&aof.set, member->type))
 +			if (btf_id_set_contains(aof, member->type))
    			goto parse;
    	}
    	continue;
@@@ -5614,8 -5585,7 +5614,8 @@@
    	type = &tab->types[tab->cnt];
    	type->btf_id = i;
    	record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
 -						  BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
 +						  BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
 +						  BPF_KPTR, t->size);
    	/* The record cannot be unset, treat it as an error if so */
    	if (IS_ERR_OR_NULL(record)) {
    		ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@@ -5624,12 -5594,9 +5624,12 @@@
    	type->record = record;
    	tab->cnt++;
    }
 +	kfree(aof);
    return tab;
  free:
    btf_struct_metas_free(tab);
 +free_aof:
 +	kfree(aof);
    return ERR_PTR(ret);
  }
@@@ -6276,11 -6243,12 +6276,11 @@@ static struct btf *btf_parse_module(con
    btf->kernel_btf = true;
    snprintf(btf->name, sizeof(btf->name), "%s", module_name);
-	btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
 +	btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
    if (!btf->data) {
    	err = -ENOMEM;
    	goto errout;
    }
 -	memcpy(btf->data, data, data_size);
    btf->data_size = data_size;
err = btf_parse_hdr(env);
@@@ -6315,7 -6283,7 +6315,7 @@@
errout:
    btf_verifier_env_free(env);
 -	if (base_btf != vmlinux_btf)
 +	if (!IS_ERR(base_btf) && base_btf != vmlinux_btf)
    	btf_free(base_btf);
    if (btf) {
    	kvfree(btf->data);
@@@ -6558,9 -6526,6 +6558,9 @@@ bool btf_ctx_access(int off, int size, 
    if (prog_args_trusted(prog))
    	info->reg_type |= PTR_TRUSTED;
+	if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
 +		info->reg_type |= PTR_MAYBE_NULL;
 +
    if (tgt_prog) {
    	enum bpf_prog_type tgt_type;
@@@ -7711,21 -7676,16 +7711,16 @@@ int btf_new_fd(const union bpf_attr *at
  struct btf *btf_get_by_fd(int fd)
  {
    struct btf *btf;
- 	struct fd f;
+ 	CLASS(fd, f)(fd);
- 	f = fdget(fd);
- 
- 	if (!fd_file(f))
+ 	if (fd_empty(f))
    	return ERR_PTR(-EBADF);
- 	if (fd_file(f)->f_op != &btf_fops) {
- 		fdput(f);
+ 	if (fd_file(f)->f_op != &btf_fops)
    	return ERR_PTR(-EINVAL);
- 	}
btf = fd_file(f)->private_data;
    refcount_inc(&btf->refcnt);
- 	fdput(f);
return btf;
  }
@@@ -8087,44 -8047,15 +8082,44 @@@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX
  BTF_TRACING_TYPE_xxx
  #undef BTF_TRACING_TYPE
+/* Validate well-formedness of iter argument type.
 + * On success, return positive BTF ID of iter state's STRUCT type.
 + * On error, negative error is returned.
 + */
 +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
 +{
 +	const struct btf_param *arg;
 +	const struct btf_type *t;
 +	const char *name;
 +	int btf_id;
 +
 +	if (btf_type_vlen(func) <= arg_idx)
 +		return -EINVAL;
 +
 +	arg = &btf_params(func)[arg_idx];
 +	t = btf_type_skip_modifiers(btf, arg->type, NULL);
 +	if (!t || !btf_type_is_ptr(t))
 +		return -EINVAL;
 +	t = btf_type_skip_modifiers(btf, t->type, &btf_id);
 +	if (!t || !__btf_type_is_struct(t))
 +		return -EINVAL;
 +
 +	name = btf_name_by_offset(btf, t->name_off);
 +	if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
 +		return -EINVAL;
 +
 +	return btf_id;
 +}
 +
  static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
    			 const struct btf_type *func, u32 func_flags)
  {
    u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
 -	const char *name, *sfx, *iter_name;
 -	const struct btf_param *arg;
 +	const char *sfx, *iter_name;
    const struct btf_type *t;
    char exp_name[128];
    u32 nr_args;
 +	int btf_id;
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
    if (!flags || (flags & (flags - 1)))
@@@ -8135,21 -8066,28 +8130,21 @@@
    if (nr_args < 1)
    	return -EINVAL;
-	arg = &btf_params(func)[0];
 -	t = btf_type_skip_modifiers(btf, arg->type, NULL);
 -	if (!t || !btf_type_is_ptr(t))
 -		return -EINVAL;
 -	t = btf_type_skip_modifiers(btf, t->type, NULL);
 -	if (!t || !__btf_type_is_struct(t))
 -		return -EINVAL;
 -
 -	name = btf_name_by_offset(btf, t->name_off);
 -	if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
 -		return -EINVAL;
 +	btf_id = btf_check_iter_arg(btf, func, 0);
 +	if (btf_id < 0)
 +		return btf_id;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
     * fit nicely in stack slots
     */
 +	t = btf_type_by_id(btf, btf_id);
    if (t->size == 0 || (t->size % 8))
    	return -EINVAL;
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
     * naming pattern
     */
 -	iter_name = name + sizeof(ITER_PREFIX) - 1;
 +	iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
    if (flags & KF_ITER_NEW)
    	sfx = "new";
    else if (flags & KF_ITER_NEXT)
@@@ -8364,19 -8302,13 +8359,19 @@@ static int bpf_prog_type_to_kfunc_hook(
    case BPF_PROG_TYPE_STRUCT_OPS:
    	return BTF_KFUNC_HOOK_STRUCT_OPS;
    case BPF_PROG_TYPE_TRACING:
 +	case BPF_PROG_TYPE_TRACEPOINT:
 +	case BPF_PROG_TYPE_PERF_EVENT:
    case BPF_PROG_TYPE_LSM:
    	return BTF_KFUNC_HOOK_TRACING;
    case BPF_PROG_TYPE_SYSCALL:
    	return BTF_KFUNC_HOOK_SYSCALL;
    case BPF_PROG_TYPE_CGROUP_SKB:
 +	case BPF_PROG_TYPE_CGROUP_SOCK:
 +	case BPF_PROG_TYPE_CGROUP_DEVICE:
    case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 -		return BTF_KFUNC_HOOK_CGROUP_SKB;
 +	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 +	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 +		return BTF_KFUNC_HOOK_CGROUP;
    case BPF_PROG_TYPE_SCHED_ACT:
    	return BTF_KFUNC_HOOK_SCHED_ACT;
    case BPF_PROG_TYPE_SK_SKB:
@@@ -8952,7 -8884,6 +8947,7 @@@ int bpf_core_apply(struct bpf_core_ctx 
    struct bpf_core_cand_list cands = {};
    struct bpf_core_relo_res targ_res;
    struct bpf_core_spec *specs;
 +	const struct btf_type *type;
    int err;
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
@@@ -8962,13 -8893,6 +8957,13 @@@
    if (!specs)
    	return -ENOMEM;
+	type = btf_type_by_id(ctx->btf, relo->type_id);
 +	if (!type) {
 +		bpf_log(ctx->log, "relo #%u: bad type id %u\n",
 +			relo_idx, relo->type_id);
 +		return -EINVAL;
 +	}
 +
    if (need_cands) {
    	struct bpf_cand_cache *cc;
    	int i;
diff --combined kernel/bpf/syscall.c
index 8386f25bc532c,65dcd92d0b2c5..a8f1808a1ca54
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@@ -550,8 -550,7 +550,8 @@@ void btf_record_free(struct btf_record 
    	case BPF_KPTR_PERCPU:
    		if (rec->fields[i].kptr.module)
    			module_put(rec->fields[i].kptr.module);
 -			btf_put(rec->fields[i].kptr.btf);
 +			if (btf_is_kernel(rec->fields[i].kptr.btf))
 +				btf_put(rec->fields[i].kptr.btf);
    		break;
    	case BPF_LIST_HEAD:
    	case BPF_LIST_NODE:
@@@ -597,8 -596,7 +597,8 @@@ struct btf_record *btf_record_dup(cons
    	case BPF_KPTR_UNREF:
    	case BPF_KPTR_REF:
    	case BPF_KPTR_PERCPU:
 -			btf_get(fields[i].kptr.btf);
 +			if (btf_is_kernel(fields[i].kptr.btf))
 +				btf_get(fields[i].kptr.btf);
    		if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
    			ret = -ENXIO;
    			goto free;
@@@ -735,11 -733,15 +735,11 @@@ void bpf_obj_free_fields(const struct b
    }
  }
-/* called from workqueue */
 -static void bpf_map_free_deferred(struct work_struct *work)
 +static void bpf_map_free(struct bpf_map *map)
  {
 -	struct bpf_map *map = container_of(work, struct bpf_map, work);
    struct btf_record *rec = map->record;
    struct btf *btf = map->btf;
-	security_bpf_map_free(map);
 -	bpf_map_release_memcg(map);
    /* implementation dependent freeing */
    map->ops->map_free(map);
    /* Delay freeing of btf_record for maps, as map_free
@@@ -758,16 -760,6 +758,16 @@@
    btf_put(btf);
  }
+/* called from workqueue */
 +static void bpf_map_free_deferred(struct work_struct *work)
 +{
 +	struct bpf_map *map = container_of(work, struct bpf_map, work);
 +
 +	security_bpf_map_free(map);
 +	bpf_map_release_memcg(map);
 +	bpf_map_free(map);
 +}
 +
  static void bpf_map_put_uref(struct bpf_map *map)
  {
    if (atomic64_dec_and_test(&map->usercnt)) {
@@@ -1419,27 -1411,13 +1419,12 @@@ static int map_create(union bpf_attr *a
  free_map_sec:
    security_bpf_map_free(map);
  free_map:
 -	btf_put(map->btf);
 -	map->ops->map_free(map);
 +	bpf_map_free(map);
  put_token:
    bpf_token_put(token);
    return err;
  }
- /* if error is returned, fd is released.
-  * On success caller should complete fd access with matching fdput()
-  */
- struct bpf_map *__bpf_map_get(struct fd f)
- {
- 	if (!fd_file(f))
- 		return ERR_PTR(-EBADF);
- 	if (fd_file(f)->f_op != &bpf_map_fops) {
- 		fdput(f);
- 		return ERR_PTR(-EINVAL);
- 	}
- 
- 	return fd_file(f)->private_data;
- }
- 
  void bpf_map_inc(struct bpf_map *map)
  {
    atomic64_inc(&map->refcnt);
@@@ -1455,15 -1433,11 +1440,11 @@@ EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref
struct bpf_map *bpf_map_get(u32 ufd)
  {
- 	struct fd f = fdget(ufd);
- 	struct bpf_map *map;
+ 	CLASS(fd, f)(ufd);
+ 	struct bpf_map *map = __bpf_map_get(f);
- 	map = __bpf_map_get(f);
- 	if (IS_ERR(map))
- 		return map;
- 
- 	bpf_map_inc(map);
- 	fdput(f);
+ 	if (!IS_ERR(map))
+ 		bpf_map_inc(map);
return map;
  }
@@@ -1471,15 -1445,11 +1452,11 @@@ EXPORT_SYMBOL(bpf_map_get)
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
  {
- 	struct fd f = fdget(ufd);
- 	struct bpf_map *map;
+ 	CLASS(fd, f)(ufd);
+ 	struct bpf_map *map = __bpf_map_get(f);
- 	map = __bpf_map_get(f);
- 	if (IS_ERR(map))
- 		return map;
- 
- 	bpf_map_inc_with_uref(map);
- 	fdput(f);
+ 	if (!IS_ERR(map))
+ 		bpf_map_inc_with_uref(map);
return map;
  }
@@@ -1544,11 -1514,9 +1521,9 @@@ static int map_lookup_elem(union bpf_at
  {
    void __user *ukey = u64_to_user_ptr(attr->key);
    void __user *uvalue = u64_to_user_ptr(attr->value);
- 	int ufd = attr->map_fd;
    struct bpf_map *map;
    void *key, *value;
    u32 value_size;
- 	struct fd f;
    int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
@@@ -1557,26 -1525,20 +1532,20 @@@
    if (attr->flags & ~BPF_F_LOCK)
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->map_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
- 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
- 		err = -EPERM;
- 		goto err_put;
- 	}
+ 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
+ 		return -EPERM;
if ((attr->flags & BPF_F_LOCK) &&
- 	    !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
- 		err = -EINVAL;
- 		goto err_put;
- 	}
+ 	    !btf_record_has_field(map->record, BPF_SPIN_LOCK))
+ 		return -EINVAL;
key = __bpf_copy_key(ukey, map->key_size);
- 	if (IS_ERR(key)) {
- 		err = PTR_ERR(key);
- 		goto err_put;
- 	}
+ 	if (IS_ERR(key))
+ 		return PTR_ERR(key);
value_size = bpf_map_value_size(map);
@@@ -1607,8 -1569,6 +1576,6 @@@ free_value
    kvfree(value);
  free_key:
    kvfree(key);
- err_put:
- 	fdput(f);
    return err;
  }
@@@ -1619,17 -1579,15 +1586,15 @@@ static int map_update_elem(union bpf_at
  {
    bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
    bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
- 	int ufd = attr->map_fd;
    struct bpf_map *map;
    void *key, *value;
    u32 value_size;
- 	struct fd f;
    int err;
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->map_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
@@@ -1667,7 -1625,6 +1632,6 @@@ free_key
    kvfree(key);
  err_put:
    bpf_map_write_active_dec(map);
- 	fdput(f);
    return err;
  }
@@@ -1676,16 -1633,14 +1640,14 @@@
  static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
  {
    bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
- 	int ufd = attr->map_fd;
    struct bpf_map *map;
- 	struct fd f;
    void *key;
    int err;
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->map_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
@@@ -1722,7 -1677,6 +1684,6 @@@ out
    kvfree(key);
  err_put:
    bpf_map_write_active_dec(map);
- 	fdput(f);
    return err;
  }
@@@ -1733,30 -1687,24 +1694,24 @@@ static int map_get_next_key(union bpf_a
  {
    void __user *ukey = u64_to_user_ptr(attr->key);
    void __user *unext_key = u64_to_user_ptr(attr->next_key);
- 	int ufd = attr->map_fd;
    struct bpf_map *map;
    void *key, *next_key;
- 	struct fd f;
    int err;
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->map_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
- 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
- 		err = -EPERM;
- 		goto err_put;
- 	}
+ 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
+ 		return -EPERM;
if (ukey) {
    	key = __bpf_copy_key(ukey, map->key_size);
- 		if (IS_ERR(key)) {
- 			err = PTR_ERR(key);
- 			goto err_put;
- 		}
+ 		if (IS_ERR(key))
+ 			return PTR_ERR(key);
    } else {
    	key = NULL;
    }
@@@ -1788,8 -1736,6 +1743,6 @@@ free_next_key
    kvfree(next_key);
  free_key:
    kvfree(key);
- err_put:
- 	fdput(f);
    return err;
  }
@@@ -2018,11 -1964,9 +1971,9 @@@ static int map_lookup_and_delete_elem(u
  {
    void __user *ukey = u64_to_user_ptr(attr->key);
    void __user *uvalue = u64_to_user_ptr(attr->value);
- 	int ufd = attr->map_fd;
    struct bpf_map *map;
    void *key, *value;
    u32 value_size;
- 	struct fd f;
    int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
@@@ -2031,7 -1975,7 +1982,7 @@@
    if (attr->flags & ~BPF_F_LOCK)
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->map_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
@@@ -2101,7 -2045,6 +2052,6 @@@ free_key
    kvfree(key);
  err_put:
    bpf_map_write_active_dec(map);
- 	fdput(f);
    return err;
  }
@@@ -2109,27 -2052,22 +2059,22 @@@
static int map_freeze(const union bpf_attr *attr)
  {
- 	int err = 0, ufd = attr->map_fd;
+ 	int err = 0;
    struct bpf_map *map;
- 	struct fd f;
if (CHECK_ATTR(BPF_MAP_FREEZE))
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->map_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
- 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) {
- 		fdput(f);
+ 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record))
    	return -ENOTSUPP;
- 	}
- 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
- 		fdput(f);
+ 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE))
    	return -EPERM;
- 	}
mutex_lock(&map->freeze_mutex);
    if (bpf_map_write_active(map)) {
@@@ -2144,7 -2082,6 +2089,6 @@@
    WRITE_ONCE(map->frozen, true);
  err_put:
    mutex_unlock(&map->freeze_mutex);
- 	fdput(f);
    return err;
  }
@@@ -2414,18 -2351,6 +2358,6 @@@ int bpf_prog_new_fd(struct bpf_prog *pr
    			O_RDWR | O_CLOEXEC);
  }
- static struct bpf_prog *____bpf_prog_get(struct fd f)
- {
- 	if (!fd_file(f))
- 		return ERR_PTR(-EBADF);
- 	if (fd_file(f)->f_op != &bpf_prog_fops) {
- 		fdput(f);
- 		return ERR_PTR(-EINVAL);
- 	}
- 
- 	return fd_file(f)->private_data;
- }
- 
  void bpf_prog_add(struct bpf_prog *prog, int i)
  {
    atomic64_add(i, &prog->aux->refcnt);
@@@ -2481,20 -2406,19 +2413,19 @@@ bool bpf_prog_get_ok(struct bpf_prog *p
  static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
    			       bool attach_drv)
  {
- 	struct fd f = fdget(ufd);
+ 	CLASS(fd, f)(ufd);
    struct bpf_prog *prog;
- 	prog = ____bpf_prog_get(f);
- 	if (IS_ERR(prog))
- 		return prog;
- 	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
- 		prog = ERR_PTR(-EINVAL);
- 		goto out;
- 	}
+ 	if (fd_empty(f))
+ 		return ERR_PTR(-EBADF);
+ 	if (fd_file(f)->f_op != &bpf_prog_fops)
+ 		return ERR_PTR(-EINVAL);
+ 
+ 	prog = fd_file(f)->private_data;
+ 	if (!bpf_prog_get_ok(prog, attach_type, attach_drv))
+ 		return ERR_PTR(-EINVAL);
bpf_prog_inc(prog);
- out:
- 	fdput(f);
    return prog;
  }
@@@ -3263,20 -3187,16 +3194,16 @@@ int bpf_link_new_fd(struct bpf_link *li
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
  {
- 	struct fd f = fdget(ufd);
+ 	CLASS(fd, f)(ufd);
    struct bpf_link *link;
- 	if (!fd_file(f))
+ 	if (fd_empty(f))
    	return ERR_PTR(-EBADF);
- 	if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) {
- 		fdput(f);
+ 	if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll)
    	return ERR_PTR(-EINVAL);
- 	}
link = fd_file(f)->private_data;
    bpf_link_inc(link);
- 	fdput(f);
- 
    return link;
  }
  EXPORT_SYMBOL(bpf_link_get_from_fd);
@@@ -4981,33 -4901,25 +4908,25 @@@ static int bpf_link_get_info_by_fd(stru
  static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
    			  union bpf_attr __user *uattr)
  {
- 	int ufd = attr->info.bpf_fd;
- 	struct fd f;
- 	int err;
- 
    if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
    	return -EINVAL;
- 	f = fdget(ufd);
- 	if (!fd_file(f))
+ 	CLASS(fd, f)(attr->info.bpf_fd);
+ 	if (fd_empty(f))
    	return -EBADFD;
if (fd_file(f)->f_op == &bpf_prog_fops)
- 		err = bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
+ 		return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
    				      uattr);
    else if (fd_file(f)->f_op == &bpf_map_fops)
- 		err = bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
+ 		return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
    				     uattr);
    else if (fd_file(f)->f_op == &btf_fops)
- 		err = bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
+ 		return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
    else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll)
- 		err = bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
+ 		return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
    				      attr, uattr);
- 	else
- 		err = -EINVAL;
- 
- 	fdput(f);
- 	return err;
+ 	return -EINVAL;
  }
#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
@@@ -5195,14 -5107,13 +5114,13 @@@ static int bpf_map_do_batch(const unio
    		 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
    bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
    struct bpf_map *map;
- 	int err, ufd;
- 	struct fd f;
+ 	int err;
if (CHECK_ATTR(BPF_MAP_BATCH))
    	return -EINVAL;
- 	ufd = attr->batch.map_fd;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->batch.map_fd);
+ 
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
@@@ -5230,7 -5141,6 +5148,6 @@@ err_put
    	maybe_wait_bpf_programs(map);
    	bpf_map_write_active_dec(map);
    }
- 	fdput(f);
    return err;
  }
@@@ -5675,7 -5585,7 +5592,7 @@@ static int token_create(union bpf_attr 
    return bpf_token_create(attr);
  }
-static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
  {
    union bpf_attr attr;
    int err;
@@@ -5939,7 -5849,6 +5856,7 @@@ static const struct bpf_func_proto bpf_
BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
  {
 +	*res = 0;
    if (flags)
    	return -EINVAL;
@@@ -5960,8 -5869,7 +5877,8 @@@ static const struct bpf_func_proto bpf_
    .arg1_type	= ARG_PTR_TO_MEM,
    .arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    .arg3_type	= ARG_ANYTHING,
 -	.arg4_type	= ARG_PTR_TO_LONG,
 +	.arg4_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
 +	.arg4_size	= sizeof(u64),
  };
static const struct bpf_func_proto *
diff --combined kernel/bpf/verifier.c
index dd86282ccaa4a,e3932f8ce10a3..9a7ed527e47e3
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -28,8 -28,6 +28,8 @@@
  #include <linux/cpumask.h>
  #include <linux/bpf_mem_alloc.h>
  #include <net/xdp.h>
 +#include <linux/trace_events.h>
 +#include <linux/kallsyms.h>
#include "disasm.h"
@@@ -385,6 -383,11 +385,6 @@@ static void verbose_invalid_scalar(stru
    verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
  }
-static bool type_may_be_null(u32 type)
 -{
 -	return type & PTR_MAYBE_NULL;
 -}
 -
  static bool reg_not_null(const struct bpf_reg_state *reg)
  {
    enum bpf_reg_type type;
@@@ -4576,28 -4579,28 +4576,28 @@@ static int get_reg_width(struct bpf_reg
    return fls64(reg->umax_value);
  }
-/* See comment for mark_nocsr_pattern_for_call() */
 -static void check_nocsr_stack_contract(struct bpf_verifier_env *env, struct bpf_func_state *state,
 -				       int insn_idx, int off)
 +/* See comment for mark_fastcall_pattern_for_call() */
 +static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
 +					  struct bpf_func_state *state, int insn_idx, int off)
  {
    struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
    struct bpf_insn_aux_data *aux = env->insn_aux_data;
    int i;
-	if (subprog->nocsr_stack_off <= off || aux[insn_idx].nocsr_pattern)
 +	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
    	return;
 -	/* access to the region [max_stack_depth .. nocsr_stack_off)
 -	 * from something that is not a part of the nocsr pattern,
 -	 * disable nocsr rewrites for current subprogram by setting
 -	 * nocsr_stack_off to a value smaller than any possible offset.
 +	/* access to the region [max_stack_depth .. fastcall_stack_off)
 +	 * from something that is not a part of the fastcall pattern,
 +	 * disable fastcall rewrites for current subprogram by setting
 +	 * fastcall_stack_off to a value smaller than any possible offset.
     */
 -	subprog->nocsr_stack_off = S16_MIN;
 -	/* reset nocsr aux flags within subprogram,
 +	subprog->fastcall_stack_off = S16_MIN;
 +	/* reset fastcall aux flags within subprogram,
     * happens at most once per subprogram
     */
    for (i = subprog->start; i < (subprog + 1)->start; ++i) {
 -		aux[i].nocsr_spills_num = 0;
 -		aux[i].nocsr_pattern = 0;
 +		aux[i].fastcall_spills_num = 0;
 +		aux[i].fastcall_pattern = 0;
    }
  }
@@@ -4649,7 -4652,7 +4649,7 @@@ static int check_stack_write_fixed_off(
    if (err)
    	return err;
-	check_nocsr_stack_contract(env, state, insn_idx, off);
 +	check_fastcall_stack_contract(env, state, insn_idx, off);
    mark_stack_slot_scratched(env, spi);
    if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
    	bool reg_value_fits;
@@@ -4784,7 -4787,7 +4784,7 @@@ static int check_stack_write_var_off(st
    		return err;
    }
-	check_nocsr_stack_contract(env, state, insn_idx, min_off);
 +	check_fastcall_stack_contract(env, state, insn_idx, min_off);
    /* Variable offset writes destroy any spilled pointers in range. */
    for (i = min_off; i < max_off; i++) {
    	u8 new_type, *stype;
@@@ -4923,7 -4926,7 +4923,7 @@@ static int check_stack_read_fixed_off(s
    reg = &reg_state->stack[spi].spilled_ptr;
mark_stack_slot_scratched(env, spi);
 -	check_nocsr_stack_contract(env, state, env->insn_idx, off);
 +	check_fastcall_stack_contract(env, state, env->insn_idx, off);
if (is_spilled_reg(&reg_state->stack[spi])) {
    	u8 spill_size = 1;
@@@ -5084,7 -5087,7 +5084,7 @@@ static int check_stack_read_var_off(str
    min_off = reg->smin_value + off;
    max_off = reg->smax_value + off;
    mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
 -	check_nocsr_stack_contract(env, ptr_state, env->insn_idx, min_off);
 +	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
    return 0;
  }
@@@ -6801,13 -6804,13 +6801,13 @@@ static int check_stack_slot_within_boun
    struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx];
    int min_valid_off, max_bpf_stack;
-	/* If accessing instruction is a spill/fill from nocsr pattern,
 +	/* If accessing instruction is a spill/fill from bpf_fastcall pattern,
     * add room for all caller saved registers below MAX_BPF_STACK.
 -	 * In case if nocsr rewrite won't happen maximal stack depth
 +	 * In case if bpf_fastcall rewrite won't happen maximal stack depth
     * would be checked by check_max_stack_depth_subprog().
     */
    max_bpf_stack = MAX_BPF_STACK;
 -	if (aux->nocsr_pattern)
 +	if (aux->fastcall_pattern)
    	max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
if (t == BPF_WRITE || env->allow_uninit_stack)
@@@ -7800,38 -7803,29 +7800,38 @@@ static int process_kptr_func(struct bpf
    		     struct bpf_call_arg_meta *meta)
  {
    struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 -	struct bpf_map *map_ptr = reg->map_ptr;
    struct btf_field *kptr_field;
 +	struct bpf_map *map_ptr;
 +	struct btf_record *rec;
    u32 kptr_off;
+	if (type_is_ptr_alloc_obj(reg->type)) {
 +		rec = reg_btf_record(reg);
 +	} else { /* PTR_TO_MAP_VALUE */
 +		map_ptr = reg->map_ptr;
 +		if (!map_ptr->btf) {
 +			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
 +				map_ptr->name);
 +			return -EINVAL;
 +		}
 +		rec = map_ptr->record;
 +		meta->map_ptr = map_ptr;
 +	}
 +
    if (!tnum_is_const(reg->var_off)) {
    	verbose(env,
    		"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
    		regno);
    	return -EINVAL;
    }
 -	if (!map_ptr->btf) {
 -		verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
 -			map_ptr->name);
 -		return -EINVAL;
 -	}
 -	if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
 -		verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
 +
 +	if (!btf_record_has_field(rec, BPF_KPTR)) {
 +		verbose(env, "R%d has no valid kptr\n", regno);
    	return -EINVAL;
    }
-	meta->map_ptr = map_ptr;
    kptr_off = reg->off + reg->var_off.value;
 -	kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
 +	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
    if (!kptr_field) {
    	verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
    	return -EACCES;
@@@ -7976,17 -7970,12 +7976,17 @@@ static bool is_iter_destroy_kfunc(struc
    return meta->kfunc_flags & KF_ITER_DESTROY;
  }
-static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
 +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
 +			      const struct btf_param *arg)
  {
    /* btf_check_iter_kfuncs() guarantees that first argument of any iter
     * kfunc is iter state pointer
     */
 -	return arg == 0 && is_iter_kfunc(meta);
 +	if (is_iter_kfunc(meta))
 +		return arg_idx == 0;
 +
 +	/* iter passed as an argument to a generic kfunc */
 +	return btf_param_match_suffix(meta->btf, arg, "__iter");
  }
static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
@@@ -7994,20 -7983,14 +7994,20 @@@
  {
    struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
    const struct btf_type *t;
 -	const struct btf_param *arg;
 -	int spi, err, i, nr_slots;
 -	u32 btf_id;
 +	int spi, err, i, nr_slots, btf_id;
-	/* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
 -	arg = &btf_params(meta->func_proto)[0];
 -	t = btf_type_skip_modifiers(meta->btf, arg->type, NULL);	/* PTR */
 -	t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id);	/* STRUCT */
 +	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
 +	 * ensures struct convention, so we wouldn't need to do any BTF
 +	 * validation here. But given iter state can be passed as a parameter
 +	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
 +	 * conservative here.
 +	 */
 +	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
 +	if (btf_id < 0) {
 +		verbose(env, "expected valid iter pointer as arg #%d\n", regno);
 +		return -EINVAL;
 +	}
 +	t = btf_type_by_id(meta->btf, btf_id);
    nr_slots = t->size / BPF_REG_SIZE;
if (is_iter_new_kfunc(meta)) {
@@@ -8029,9 -8012,7 +8029,9 @@@
    	if (err)
    		return err;
    } else {
 -		/* iter_next() or iter_destroy() expect initialized iter state*/
 +		/* iter_next() or iter_destroy(), as well as any kfunc
 +		 * accepting iter argument, expect initialized iter state
 +		 */
    	err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
    	switch (err) {
    	case 0:
@@@ -8145,15 -8126,6 +8145,15 @@@ static int widen_imprecise_scalars(stru
    return 0;
  }
+static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
 +						 struct bpf_kfunc_call_arg_meta *meta)
 +{
 +	int iter_frameno = meta->iter.frameno;
 +	int iter_spi = meta->iter.spi;
 +
 +	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
 +}
 +
  /* process_iter_next_call() is called when verifier gets to iterator's next
   * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
   * to it as just "iter_next()" in comments below.
@@@ -8238,10 -8210,12 +8238,10 @@@ static int process_iter_next_call(struc
    struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
    struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
    struct bpf_reg_state *cur_iter, *queued_iter;
 -	int iter_frameno = meta->iter.frameno;
 -	int iter_spi = meta->iter.spi;
BTF_TYPE_EMIT(struct bpf_iter);
-	cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
 +	cur_iter = get_iter_from_state(cur_st, meta);
if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
        cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
@@@ -8269,7 -8243,7 +8269,7 @@@
    	if (!queued_st)
    		return -ENOMEM;
-		queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
 +		queued_iter = get_iter_from_state(queued_st, meta);
    	queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
    	queued_iter->iter.depth++;
    	if (prev_st)
@@@ -8293,12 -8267,6 +8293,12 @@@ static bool arg_type_is_mem_size(enum b
           type == ARG_CONST_SIZE_OR_ZERO;
  }
+static bool arg_type_is_raw_mem(enum bpf_arg_type type)
 +{
 +	return base_type(type) == ARG_PTR_TO_MEM &&
 +	       type & MEM_UNINIT;
 +}
 +
  static bool arg_type_is_release(enum bpf_arg_type type)
  {
    return type & OBJ_RELEASE;
@@@ -8309,6 -8277,16 +8309,6 @@@ static bool arg_type_is_dynptr(enum bpf
    return base_type(type) == ARG_PTR_TO_DYNPTR;
  }
-static int int_ptr_type_to_size(enum bpf_arg_type type)
 -{
 -	if (type == ARG_PTR_TO_INT)
 -		return sizeof(u32);
 -	else if (type == ARG_PTR_TO_LONG)
 -		return sizeof(u64);
 -
 -	return -EINVAL;
 -}
 -
  static int resolve_map_arg_type(struct bpf_verifier_env *env,
    			 const struct bpf_call_arg_meta *meta,
    			 enum bpf_arg_type *arg_type)
@@@ -8381,6 -8359,16 +8381,6 @@@ static const struct bpf_reg_types mem_t
    },
  };
-static const struct bpf_reg_types int_ptr_types = {
 -	.types = {
 -		PTR_TO_STACK,
 -		PTR_TO_PACKET,
 -		PTR_TO_PACKET_META,
 -		PTR_TO_MAP_KEY,
 -		PTR_TO_MAP_VALUE,
 -	},
 -};
 -
  static const struct bpf_reg_types spin_lock_types = {
    .types = {
    	PTR_TO_MAP_VALUE,
@@@ -8411,12 -8399,7 +8411,12 @@@ static const struct bpf_reg_types func_
  static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
  static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
  static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 -static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
 +static const struct bpf_reg_types kptr_xchg_dest_types = {
 +	.types = {
 +		PTR_TO_MAP_VALUE,
 +		PTR_TO_BTF_ID | MEM_ALLOC
 +	}
 +};
  static const struct bpf_reg_types dynptr_types = {
    .types = {
    	PTR_TO_STACK,
@@@ -8441,12 -8424,14 +8441,12 @@@ static const struct bpf_reg_types *comp
    [ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
    [ARG_PTR_TO_MEM]		= &mem_types,
    [ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
 -	[ARG_PTR_TO_INT]		= &int_ptr_types,
 -	[ARG_PTR_TO_LONG]		= &int_ptr_types,
    [ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
    [ARG_PTR_TO_FUNC]		= &func_ptr_types,
    [ARG_PTR_TO_STACK]		= &stack_ptr_types,
    [ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
    [ARG_PTR_TO_TIMER]		= &timer_types,
 -	[ARG_PTR_TO_KPTR]		= &kptr_types,
 +	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
    [ARG_PTR_TO_DYNPTR]		= &dynptr_types,
  };
@@@ -8485,8 -8470,7 +8485,8 @@@ static int check_reg_type(struct bpf_ve
    if (base_type(arg_type) == ARG_PTR_TO_MEM)
    	type &= ~DYNPTR_TYPE_FLAG_MASK;
-	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
 +	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
 +	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
    	type &= ~MEM_ALLOC;
    	type &= ~MEM_PERCPU;
    }
@@@ -8579,8 -8563,7 +8579,8 @@@ found
    		verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
    		return -EFAULT;
    	}
 -		if (meta->func_id == BPF_FUNC_kptr_xchg) {
 +		/* Check if local kptr in src arg matches kptr in dst arg */
 +		if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
    		if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
    			return -EACCES;
    	}
@@@ -8891,7 -8874,7 +8891,7 @@@ skip_type_check
    	meta->release_regno = regno;
    }
-	if (reg->ref_obj_id) {
 +	if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
    	if (meta->ref_obj_id) {
    		verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
    			regno, reg->ref_obj_id,
@@@ -9003,11 -8986,9 +9003,11 @@@
    	 */
    	meta->raw_mode = arg_type & MEM_UNINIT;
    	if (arg_type & MEM_FIXED_SIZE) {
 -			err = check_helper_mem_access(env, regno,
 -						      fn->arg_size[arg], false,
 -						      meta);
 +			err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
 +			if (err)
 +				return err;
 +			if (arg_type & MEM_ALIGNED)
 +				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
    	}
    	break;
    case ARG_CONST_SIZE:
@@@ -9032,6 -9013,17 +9032,6 @@@
    	if (err)
    		return err;
    	break;
 -	case ARG_PTR_TO_INT:
 -	case ARG_PTR_TO_LONG:
 -	{
 -		int size = int_ptr_type_to_size(arg_type);
 -
 -		err = check_helper_mem_access(env, regno, size, false, meta);
 -		if (err)
 -			return err;
 -		err = check_ptr_alignment(env, reg, 0, size, true);
 -		break;
 -	}
    case ARG_PTR_TO_CONST_STR:
    {
    	err = check_reg_const_str(env, reg, regno);
@@@ -9039,7 -9031,7 +9039,7 @@@
    		return err;
    	break;
    }
 -	case ARG_PTR_TO_KPTR:
 +	case ARG_KPTR_XCHG_DEST:
    	err = process_kptr_func(env, regno, meta);
    	if (err)
    		return err;
@@@ -9348,15 -9340,15 +9348,15 @@@ static bool check_raw_mode_ok(const str
  {
    int count = 0;
-	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
 +	if (arg_type_is_raw_mem(fn->arg1_type))
    	count++;
 -	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
 +	if (arg_type_is_raw_mem(fn->arg2_type))
    	count++;
 -	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
 +	if (arg_type_is_raw_mem(fn->arg3_type))
    	count++;
 -	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
 +	if (arg_type_is_raw_mem(fn->arg4_type))
    	count++;
 -	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
 +	if (arg_type_is_raw_mem(fn->arg5_type))
    	count++;
/* We only support one arg being in raw mode at the moment,
@@@ -11390,7 -11382,7 +11390,7 @@@ get_kfunc_ptr_arg_type(struct bpf_verif
    if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
    	return KF_ARG_PTR_TO_DYNPTR;
-	if (is_kfunc_arg_iter(meta, argno))
 +	if (is_kfunc_arg_iter(meta, argno, &args[argno]))
    	return KF_ARG_PTR_TO_ITER;
if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
@@@ -11492,7 -11484,8 +11492,7 @@@ static int process_kf_arg_ptr_to_btf_id
     * btf_struct_ids_match() to walk the struct at the 0th offset, and
     * resolve types.
     */
 -	if (is_kfunc_acquire(meta) ||
 -	    (is_kfunc_release(meta) && reg->ref_obj_id) ||
 +	if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
        btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
    	strict_type_match = true;
@@@ -12109,8 -12102,7 +12109,8 @@@ static int check_kfunc_args(struct bpf_
    	switch (kf_arg_type) {
    	case KF_ARG_PTR_TO_CTX:
    		if (reg->type != PTR_TO_CTX) {
 -				verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
 +				verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
 +					i, reg_type_str(env, reg->type));
    			return -EINVAL;
    		}
@@@ -12833,17 -12825,6 +12833,17 @@@ static int check_kfunc_call(struct bpf_
    		regs[BPF_REG_0].btf = desc_btf;
    		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
    		regs[BPF_REG_0].btf_id = ptr_type_id;
 +
 +			if (is_iter_next_kfunc(&meta)) {
 +				struct bpf_reg_state *cur_iter;
 +
 +				cur_iter = get_iter_from_state(env->cur_state, &meta);
 +
 +				if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
 +					regs[BPF_REG_0].type |= MEM_RCU;
 +				else
 +					regs[BPF_REG_0].type |= PTR_TRUSTED;
 +			}
    	}
if (is_kfunc_ret_null(&meta)) {
@@@ -16124,14 -16105,14 +16124,14 @@@ static int visit_func_call_insn(int t,
/* Return a bitmask specifying which caller saved registers are
   * clobbered by a call to a helper *as if* this helper follows
 - * no_caller_saved_registers contract:
 + * bpf_fastcall contract:
   * - includes R0 if function is non-void;
   * - includes R1-R5 if corresponding parameter has is described
   *   in the function prototype.
   */
 -static u32 helper_nocsr_clobber_mask(const struct bpf_func_proto *fn)
 +static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn)
  {
 -	u8 mask;
 +	u32 mask;
    int i;
mask = 0;
@@@ -16144,8 -16125,8 +16144,8 @@@
  }
/* True if do_misc_fixups() replaces calls to helper number 'imm',
 - * replacement patch is presumed to follow no_caller_saved_registers contract
 - * (see mark_nocsr_pattern_for_call() below).
 + * replacement patch is presumed to follow bpf_fastcall contract
 + * (see mark_fastcall_pattern_for_call() below).
   */
  static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
  {
@@@ -16159,30 -16140,7 +16159,30 @@@
    }
  }
-/* GCC and LLVM define a no_caller_saved_registers function attribute.
 +/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */
 +static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta)
 +{
 +	u32 vlen, i, mask;
 +
 +	vlen = btf_type_vlen(meta->func_proto);
 +	mask = 0;
 +	if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type)))
 +		mask |= BIT(BPF_REG_0);
 +	for (i = 0; i < vlen; ++i)
 +		mask |= BIT(BPF_REG_1 + i);
 +	return mask;
 +}
 +
 +/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */
 +static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta)
 +{
 +	if (meta->btf == btf_vmlinux)
 +		return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
 +		       meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast];
 +	return false;
 +}
 +
 +/* LLVM define a bpf_fastcall function attribute.
   * This attribute means that function scratches only some of
   * the caller saved registers defined by ABI.
   * For BPF the set of such registers could be defined as follows:
@@@ -16192,12 -16150,13 +16192,12 @@@
   *
   * The contract between kernel and clang allows to simultaneously use
   * such functions and maintain backwards compatibility with old
 - * kernels that don't understand no_caller_saved_registers calls
 - * (nocsr for short):
 + * kernels that don't understand bpf_fastcall calls:
   *
 - * - for nocsr calls clang allocates registers as-if relevant r0-r5
 + * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
   *   registers are not scratched by the call;
   *
 - * - as a post-processing step, clang visits each nocsr call and adds
 + * - as a post-processing step, clang visits each bpf_fastcall call and adds
   *   spill/fill for every live r0-r5;
   *
   * - stack offsets used for the spill/fill are allocated as lowest
@@@ -16205,11 -16164,11 +16205,11 @@@
   *   purposes;
   *
   * - when kernel loads a program, it looks for such patterns
 - *   (nocsr function surrounded by spills/fills) and checks if
 - *   spill/fill stack offsets are used exclusively in nocsr patterns;
 + *   (bpf_fastcall function surrounded by spills/fills) and checks if
 + *   spill/fill stack offsets are used exclusively in fastcall patterns;
   *
   * - if so, and if verifier or current JIT inlines the call to the
 - *   nocsr function (e.g. a helper call), kernel removes unnecessary
 + *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
   *   spill/fill pairs;
   *
   * - when old kernel loads a program, presence of spill/fill pairs
@@@ -16228,22 -16187,22 +16228,22 @@@
   *   r0 += r2;
   *   exit;
   *
 - * The purpose of mark_nocsr_pattern_for_call is to:
 + * The purpose of mark_fastcall_pattern_for_call is to:
   * - look for such patterns;
 - * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern;
 - * - mark set env->insn_aux_data[*].nocsr_spills_num for call instruction;
 - * - update env->subprog_info[*]->nocsr_stack_off to find an offset
 - *   at which nocsr spill/fill stack slots start;
 - * - update env->subprog_info[*]->keep_nocsr_stack.
 + * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
 + * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
 + * - update env->subprog_info[*]->fastcall_stack_off to find an offset
 + *   at which bpf_fastcall spill/fill stack slots start;
 + * - update env->subprog_info[*]->keep_fastcall_stack.
   *
 - * The .nocsr_pattern and .nocsr_stack_off are used by
 - * check_nocsr_stack_contract() to check if every stack access to
 - * nocsr spill/fill stack slot originates from spill/fill
 - * instructions, members of nocsr patterns.
 + * The .fastcall_pattern and .fastcall_stack_off are used by
 + * check_fastcall_stack_contract() to check if every stack access to
 + * fastcall spill/fill stack slot originates from spill/fill
 + * instructions, members of fastcall patterns.
   *
 - * If such condition holds true for a subprogram, nocsr patterns could
 - * be rewritten by remove_nocsr_spills_fills().
 - * Otherwise nocsr patterns are not changed in the subprogram
 + * If such condition holds true for a subprogram, fastcall patterns could
 + * be rewritten by remove_fastcall_spills_fills().
 + * Otherwise bpf_fastcall patterns are not changed in the subprogram
   * (code, presumably, generated by an older clang version).
   *
   * For example, it is *not* safe to remove spill/fill below:
@@@ -16256,9 -16215,9 +16256,9 @@@
   *   r0 += r1;                           exit;
   *   exit;
   */
 -static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env,
 -					struct bpf_subprog_info *subprog,
 -					int insn_idx, s16 lowest_off)
 +static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
 +					   struct bpf_subprog_info *subprog,
 +					   int insn_idx, s16 lowest_off)
  {
    struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
    struct bpf_insn *call = &env->prog->insnsi[insn_idx];
@@@ -16273,25 -16232,12 +16273,25 @@@
    	if (get_helper_proto(env, call->imm, &fn) < 0)
    		/* error would be reported later */
    		return;
 -		clobbered_regs_mask = helper_nocsr_clobber_mask(fn);
 -		can_be_inlined = fn->allow_nocsr &&
 +		clobbered_regs_mask = helper_fastcall_clobber_mask(fn);
 +		can_be_inlined = fn->allow_fastcall &&
    			 (verifier_inlines_helper_call(env, call->imm) ||
    			  bpf_jit_inlines_helper_call(call->imm));
    }
+	if (bpf_pseudo_kfunc_call(call)) {
 +		struct bpf_kfunc_call_arg_meta meta;
 +		int err;
 +
 +		err = fetch_kfunc_meta(env, call, &meta, NULL);
 +		if (err < 0)
 +			/* error would be reported later */
 +			return;
 +
 +		clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta);
 +		can_be_inlined = is_fastcall_kfunc_call(&meta);
 +	}
 +
    if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS)
    	return;
@@@ -16330,36 -16276,36 +16330,36 @@@
    	if (stx->off != off || ldx->off != off)
    		break;
    	expected_regs_mask &= ~BIT(stx->src_reg);
 -		env->insn_aux_data[insn_idx - i].nocsr_pattern = 1;
 -		env->insn_aux_data[insn_idx + i].nocsr_pattern = 1;
 +		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
 +		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
    }
    if (i == 1)
    	return;
-	/* Conditionally set 'nocsr_spills_num' to allow forward
 +	/* Conditionally set 'fastcall_spills_num' to allow forward
     * compatibility when more helper functions are marked as
 -	 * nocsr at compile time than current kernel supports, e.g:
 +	 * bpf_fastcall at compile time than current kernel supports, e.g:
     *
     *   1: *(u64 *)(r10 - 8) = r1
 -	 *   2: call A                  ;; assume A is nocsr for current kernel
 +	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
     *   3: r1 = *(u64 *)(r10 - 8)
     *   4: *(u64 *)(r10 - 8) = r1
 -	 *   5: call B                  ;; assume B is not nocsr for current kernel
 +	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
     *   6: r1 = *(u64 *)(r10 - 8)
     *
 -	 * There is no need to block nocsr rewrite for such program.
 -	 * Set 'nocsr_pattern' for both calls to keep check_nocsr_stack_contract() happy,
 -	 * don't set 'nocsr_spills_num' for call B so that remove_nocsr_spills_fills()
 +	 * There is no need to block bpf_fastcall rewrite for such program.
 +	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
 +	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
     * does not remove spill/fill pair {4,6}.
     */
    if (can_be_inlined)
 -		env->insn_aux_data[insn_idx].nocsr_spills_num = i - 1;
 +		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
    else
 -		subprog->keep_nocsr_stack = 1;
 -	subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
 +		subprog->keep_fastcall_stack = 1;
 +	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
  }
-static int mark_nocsr_patterns(struct bpf_verifier_env *env)
 +static int mark_fastcall_patterns(struct bpf_verifier_env *env)
  {
    struct bpf_subprog_info *subprog = env->subprog_info;
    struct bpf_insn *insn;
@@@ -16376,12 -16322,12 +16376,12 @@@
    			continue;
    		lowest_off = min(lowest_off, insn->off);
    	}
 -		/* use this offset to find nocsr patterns */
 +		/* use this offset to find fastcall patterns */
    	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
    		insn = env->prog->insnsi + i;
    		if (insn->code != (BPF_JMP | BPF_CALL))
    			continue;
 -			mark_nocsr_pattern_for_call(env, subprog, i, lowest_off);
 +			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
    	}
    }
    return 0;
@@@ -17396,9 -17342,8 +17396,9 @@@ static bool stacksafe(struct bpf_verifi
    	spi = i / BPF_REG_SIZE;
if (exact != NOT_EXACT &&
 -		    old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
 -		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
 +		    (i >= cur->allocated_stack ||
 +		     old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
 +		     cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
    		return false;
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
@@@ -18920,6 -18865,53 +18920,53 @@@ static bool bpf_map_is_cgroup_storage(s
    	map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
  }
+ /* Add map behind fd to used maps list, if it's not already there, and return
+  * its index. Also set *reused to true if this map was already in the list of
+  * used maps.
+  * Returns <0 on error, or >= 0 index, on success.
+  */
+ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd, bool *reused)
+ {
+ 	CLASS(fd, f)(fd);
+ 	struct bpf_map *map;
+ 	int i;
+ 
+ 	map = __bpf_map_get(f);
+ 	if (IS_ERR(map)) {
+ 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
+ 		return PTR_ERR(map);
+ 	}
+ 
+ 	/* check whether we recorded this map already */
+ 	for (i = 0; i < env->used_map_cnt; i++) {
+ 		if (env->used_maps[i] == map) {
+ 			*reused = true;
+ 			return i;
+ 		}
+ 	}
+ 
+ 	if (env->used_map_cnt >= MAX_USED_MAPS) {
+ 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
+ 			MAX_USED_MAPS);
+ 		return -E2BIG;
+ 	}
+ 
+ 	if (env->prog->sleepable)
+ 		atomic64_inc(&map->sleepable_refcnt);
+ 
+ 	/* hold the map. If the program is rejected by verifier,
+ 	 * the map will be released by release_maps() or it
+ 	 * will be used by the valid program until it's unloaded
+ 	 * and all maps are released in bpf_free_used_maps()
+ 	 */
+ 	bpf_map_inc(map);
+ 
+ 	*reused = false;
+ 	env->used_maps[env->used_map_cnt++] = map;
+ 
+ 	return env->used_map_cnt - 1;
+ }
+ 
  /* find and rewrite pseudo imm in ld_imm64 instructions:
   *
   * 1. if it accesses map FD, replace it with actual map pointer.
@@@ -18931,7 -18923,7 +18978,7 @@@ static int resolve_pseudo_ldimm64(struc
  {
    struct bpf_insn *insn = env->prog->insnsi;
    int insn_cnt = env->prog->len;
- 	int i, j, err;
+ 	int i, err;
err = bpf_prog_calc_tag(env->prog);
    if (err)
@@@ -18948,9 -18940,10 +18995,10 @@@
    	if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
    		struct bpf_insn_aux_data *aux;
    		struct bpf_map *map;
- 			struct fd f;
+ 			int map_idx;
    		u64 addr;
    		u32 fd;
+ 			bool reused;
if (i == insn_cnt - 1 || insn[1].code != 0 ||
    		    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@@ -19011,20 -19004,18 +19059,18 @@@
    			break;
    		}
- 			f = fdget(fd);
- 			map = __bpf_map_get(f);
- 			if (IS_ERR(map)) {
- 				verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
- 				return PTR_ERR(map);
- 			}
+ 			map_idx = add_used_map_from_fd(env, fd, &reused);
+ 			if (map_idx < 0)
+ 				return map_idx;
+ 			map = env->used_maps[map_idx];
+ 
+ 			aux = &env->insn_aux_data[i];
+ 			aux->map_index = map_idx;
err = check_map_prog_compatibility(env, map, env->prog);
- 			if (err) {
- 				fdput(f);
+ 			if (err)
    			return err;
- 			}
- 			aux = &env->insn_aux_data[i];
    		if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
    		    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
    			addr = (unsigned long)map;
@@@ -19033,13 -19024,11 +19079,11 @@@
if (off >= BPF_MAX_VAR_OFF) {
    				verbose(env, "direct value offset of %u is not allowed\n", off);
- 					fdput(f);
    				return -EINVAL;
    			}
if (!map->ops->map_direct_value_addr) {
    				verbose(env, "no direct value access support for this map type\n");
- 					fdput(f);
    				return -EINVAL;
    			}
@@@ -19047,7 -19036,6 +19091,6 @@@
    			if (err) {
    				verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
    					map->value_size, off);
- 					fdput(f);
    				return err;
    			}
@@@ -19058,70 -19046,39 +19101,39 @@@
    		insn[0].imm = (u32)addr;
    		insn[1].imm = addr >> 32;
- 			/* check whether we recorded this map already */
- 			for (j = 0; j < env->used_map_cnt; j++) {
- 				if (env->used_maps[j] == map) {
- 					aux->map_index = j;
- 					fdput(f);
- 					goto next_insn;
- 				}
- 			}
- 
- 			if (env->used_map_cnt >= MAX_USED_MAPS) {
- 				verbose(env, "The total number of maps per program has reached the limit of %u\n",
- 					MAX_USED_MAPS);
- 				fdput(f);
- 				return -E2BIG;
- 			}
- 
- 			if (env->prog->sleepable)
- 				atomic64_inc(&map->sleepable_refcnt);
- 			/* hold the map. If the program is rejected by verifier,
- 			 * the map will be released by release_maps() or it
- 			 * will be used by the valid program until it's unloaded
- 			 * and all maps are released in bpf_free_used_maps()
- 			 */
- 			bpf_map_inc(map);
- 
- 			aux->map_index = env->used_map_cnt;
- 			env->used_maps[env->used_map_cnt++] = map;
+ 			/* proceed with extra checks only if its newly added used map */
+ 			if (reused)
+ 				goto next_insn;
if (bpf_map_is_cgroup_storage(map) &&
    		    bpf_cgroup_storage_assign(env->prog->aux, map)) {
    			verbose(env, "only one cgroup storage of each type is allowed\n");
- 				fdput(f);
    			return -EBUSY;
    		}
    		if (map->map_type == BPF_MAP_TYPE_ARENA) {
    			if (env->prog->aux->arena) {
    				verbose(env, "Only one arena per program\n");
- 					fdput(f);
    				return -EBUSY;
    			}
    			if (!env->allow_ptr_leaks || !env->bpf_capable) {
    				verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
- 					fdput(f);
    				return -EPERM;
    			}
    			if (!env->prog->jit_requested) {
    				verbose(env, "JIT is required to use arena\n");
- 					fdput(f);
    				return -EOPNOTSUPP;
    			}
    			if (!bpf_jit_supports_arena()) {
    				verbose(env, "JIT doesn't support arena\n");
- 					fdput(f);
    				return -EOPNOTSUPP;
    			}
    			env->prog->aux->arena = (void *)map;
    			if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
    				verbose(env, "arena's user address must be set via map_extra or mmap()\n");
- 					fdput(f);
    				return -EINVAL;
    			}
    		}
- 			fdput(f);
  next_insn:
    		insn++;
    		i++;
@@@ -19277,9 -19234,6 +19289,9 @@@ static int adjust_jmp_off(struct bpf_pr
    for (i = 0; i < insn_cnt; i++, insn++) {
    	u8 code = insn->code;
+		if (tgt_idx <= i && i < tgt_idx + delta)
 +			continue;
 +
    	if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
    	    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
    		continue;
@@@ -19668,39 -19622,14 +19680,39 @@@ apply_patch_buffer
   */
  static int convert_ctx_accesses(struct bpf_verifier_env *env)
  {
 +	struct bpf_subprog_info *subprogs = env->subprog_info;
    const struct bpf_verifier_ops *ops = env->ops;
 -	int i, cnt, size, ctx_field_size, delta = 0;
 +	int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
    const int insn_cnt = env->prog->len;
 -	struct bpf_insn insn_buf[16], *insn;
 +	struct bpf_insn *epilogue_buf = env->epilogue_buf;
 +	struct bpf_insn *insn_buf = env->insn_buf;
 +	struct bpf_insn *insn;
    u32 target_size, size_default, off;
    struct bpf_prog *new_prog;
    enum bpf_access_type type;
    bool is_narrower_load;
 +	int epilogue_idx = 0;
 +
 +	if (ops->gen_epilogue) {
 +		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
 +						 -(subprogs[0].stack_depth + 8));
 +		if (epilogue_cnt >= INSN_BUF_SIZE) {
 +			verbose(env, "bpf verifier is misconfigured\n");
 +			return -EINVAL;
 +		} else if (epilogue_cnt) {
 +			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
 +			cnt = 0;
 +			subprogs[0].stack_depth += 8;
 +			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
 +						      -subprogs[0].stack_depth);
 +			insn_buf[cnt++] = env->prog->insnsi[0];
 +			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
 +			if (!new_prog)
 +				return -ENOMEM;
 +			env->prog = new_prog;
 +			delta += cnt - 1;
 +		}
 +	}
if (ops->gen_prologue || env->seen_direct_write) {
    	if (!ops->gen_prologue) {
@@@ -19709,7 -19638,7 +19721,7 @@@
    	}
    	cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
    				env->prog);
 -		if (cnt >= ARRAY_SIZE(insn_buf)) {
 +		if (cnt >= INSN_BUF_SIZE) {
    		verbose(env, "bpf verifier is misconfigured\n");
    		return -EINVAL;
    	} else if (cnt) {
@@@ -19722,9 -19651,6 +19734,9 @@@
    	}
    }
+	if (delta)
 +		WARN_ON(adjust_jmp_off(env->prog, 0, delta));
 +
    if (bpf_prog_is_offloaded(env->prog->aux))
    	return 0;
@@@ -19757,25 -19683,6 +19769,25 @@@
    		insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
    		env->prog->aux->num_exentries++;
    		continue;
 +		} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
 +			   epilogue_cnt &&
 +			   i + delta < subprogs[1].start) {
 +			/* Generate epilogue for the main prog */
 +			if (epilogue_idx) {
 +				/* jump back to the earlier generated epilogue */
 +				insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
 +				cnt = 1;
 +			} else {
 +				memcpy(insn_buf, epilogue_buf,
 +				       epilogue_cnt * sizeof(*epilogue_buf));
 +				cnt = epilogue_cnt;
 +				/* epilogue_idx cannot be 0. It must have at
 +				 * least one ctx ptr saving insn before the
 +				 * epilogue.
 +				 */
 +				epilogue_idx = i + delta;
 +			}
 +			goto patch_insn_buf;
    	} else {
    		continue;
    	}
@@@ -19878,7 -19785,7 +19890,7 @@@
    	target_size = 0;
    	cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
    				 &target_size);
 -		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
 +		if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
    	    (ctx_field_size && !target_size)) {
    		verbose(env, "bpf verifier is misconfigured\n");
    		return -EINVAL;
@@@ -19887,7 -19794,7 +19899,7 @@@
    	if (is_narrower_load && size < target_size) {
    		u8 shift = bpf_ctx_narrow_access_offset(
    			off, size, size_default) * 8;
 -			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
 +			if (shift && cnt + 1 >= INSN_BUF_SIZE) {
    			verbose(env, "bpf verifier narrow ctx load misconfigured\n");
    			return -EINVAL;
    		}
@@@ -19912,7 -19819,6 +19924,7 @@@
    					       insn->dst_reg, insn->dst_reg,
    					       size * 8, 0);
+patch_insn_buf:
    	new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
    	if (!new_prog)
    		return -ENOMEM;
@@@ -20433,7 -20339,7 +20445,7 @@@ static int do_misc_fixups(struct bpf_ve
    const int insn_cnt = prog->len;
    const struct bpf_map_ops *ops;
    struct bpf_insn_aux_data *aux;
 -	struct bpf_insn insn_buf[16];
 +	struct bpf_insn *insn_buf = env->insn_buf;
    struct bpf_prog *new_prog;
    struct bpf_map *map_ptr;
    int i, ret, cnt, delta = 0, cur_subprog = 0;
@@@ -20476,46 -20382,13 +20488,46 @@@
    		/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
    		insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
-		/* Make divide-by-zero exceptions impossible. */
 +		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
 +		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
 +		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
 +		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
 +		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
 +		    insn->off == 1 && insn->imm == -1) {
 +			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
 +			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
 +			struct bpf_insn *patchlet;
 +			struct bpf_insn chk_and_sdiv[] = {
 +				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +					     BPF_NEG | BPF_K, insn->dst_reg,
 +					     0, 0, 0),
 +			};
 +			struct bpf_insn chk_and_smod[] = {
 +				BPF_MOV32_IMM(insn->dst_reg, 0),
 +			};
 +
 +			patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
 +			cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
 +
 +			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
 +			if (!new_prog)
 +				return -ENOMEM;
 +
 +			delta    += cnt - 1;
 +			env->prog = prog = new_prog;
 +			insn      = new_prog->insnsi + i + delta;
 +			goto next_insn;
 +		}
 +
 +		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
    	if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
    	    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
    	    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
    	    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
    		bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
    		bool isdiv = BPF_OP(insn->code) == BPF_DIV;
 +			bool is_sdiv = isdiv && insn->off == 1;
 +			bool is_smod = !isdiv && insn->off == 1;
    		struct bpf_insn *patchlet;
    		struct bpf_insn chk_and_div[] = {
    			/* [R,W]x div 0 -> 0 */
@@@ -20535,62 -20408,10 +20547,62 @@@
    			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
    			BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
    		};
 +			struct bpf_insn chk_and_sdiv[] = {
 +				/* [R,W]x sdiv 0 -> 0
 +				 * LLONG_MIN sdiv -1 -> LLONG_MIN
 +				 * INT_MIN sdiv -1 -> INT_MIN
 +				 */
 +				BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
 +				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +					     BPF_ADD | BPF_K, BPF_REG_AX,
 +					     0, 0, 1),
 +				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +					     BPF_JGT | BPF_K, BPF_REG_AX,
 +					     0, 4, 1),
 +				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +					     BPF_JEQ | BPF_K, BPF_REG_AX,
 +					     0, 1, 0),
 +				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +					     BPF_MOV | BPF_K, insn->dst_reg,
 +					     0, 0, 0),
 +				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
 +				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +					     BPF_NEG | BPF_K, insn->dst_reg,
 +					     0, 0, 0),
 +				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 +				*insn,
 +			};
 +			struct bpf_insn chk_and_smod[] = {
 +				/* [R,W]x mod 0 -> [R,W]x */
 +				/* [R,W]x mod -1 -> 0 */
 +				BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
 +				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +					     BPF_ADD | BPF_K, BPF_REG_AX,
 +					     0, 0, 1),
 +				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +					     BPF_JGT | BPF_K, BPF_REG_AX,
 +					     0, 3, 1),
 +				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +					     BPF_JEQ | BPF_K, BPF_REG_AX,
 +					     0, 3 + (is64 ? 0 : 1), 1),
 +				BPF_MOV32_IMM(insn->dst_reg, 0),
 +				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 +				*insn,
 +				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 +				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
 +			};
-			patchlet = isdiv ? chk_and_div : chk_and_mod;
 -			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
 -				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
 +			if (is_sdiv) {
 +				patchlet = chk_and_sdiv;
 +				cnt = ARRAY_SIZE(chk_and_sdiv);
 +			} else if (is_smod) {
 +				patchlet = chk_and_smod;
 +				cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
 +			} else {
 +				patchlet = isdiv ? chk_and_div : chk_and_mod;
 +				cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
 +					      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
 +			}
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
    		if (!new_prog)
@@@ -20637,7 -20458,7 +20649,7 @@@
    	    (BPF_MODE(insn->code) == BPF_ABS ||
    	     BPF_MODE(insn->code) == BPF_IND)) {
    		cnt = env->ops->gen_ld_abs(insn, insn_buf);
 -			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 +			if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
    			verbose(env, "bpf verifier is misconfigured\n");
    			return -EINVAL;
    		}
@@@ -20930,7 -20751,7 +20942,7 @@@
    			cnt = ops->map_gen_lookup(map_ptr, insn_buf);
    			if (cnt == -EOPNOTSUPP)
    				goto patch_map_ops_generic;
 -				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 +				if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
    				verbose(env, "bpf verifier is misconfigured\n");
    				return -EINVAL;
    			}
@@@ -21290,7 -21111,7 +21302,7 @@@ static struct bpf_prog *inline_bpf_loop
    				int position,
    				s32 stack_base,
    				u32 callback_subprogno,
 -					u32 *cnt)
 +					u32 *total_cnt)
  {
    s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
    s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
@@@ -21299,56 -21120,55 +21311,56 @@@
    int reg_loop_cnt = BPF_REG_7;
    int reg_loop_ctx = BPF_REG_8;
+	struct bpf_insn *insn_buf = env->insn_buf;
    struct bpf_prog *new_prog;
    u32 callback_start;
    u32 call_insn_offset;
    s32 callback_offset;
 +	u32 cnt = 0;
/* This represents an inlined version of bpf_iter.c:bpf_loop,
     * be careful to modify this code in sync.
     */
 -	struct bpf_insn insn_buf[] = {
 -		/* Return error and jump to the end of the patch if
 -		 * expected number of iterations is too big.
 -		 */
 -		BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
 -		BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
 -		BPF_JMP_IMM(BPF_JA, 0, 0, 16),
 -		/* spill R6, R7, R8 to use these as loop vars */
 -		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
 -		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
 -		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
 -		/* initialize loop vars */
 -		BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
 -		BPF_MOV32_IMM(reg_loop_cnt, 0),
 -		BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
 -		/* loop header,
 -		 * if reg_loop_cnt >= reg_loop_max skip the loop body
 -		 */
 -		BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
 -		/* callback call,
 -		 * correct callback offset would be set after patching
 -		 */
 -		BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
 -		BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
 -		BPF_CALL_REL(0),
 -		/* increment loop counter */
 -		BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
 -		/* jump to loop header if callback returned 0 */
 -		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
 -		/* return value of bpf_loop,
 -		 * set R0 to the number of iterations
 -		 */
 -		BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
 -		/* restore original values of R6, R7, R8 */
 -		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
 -		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
 -		BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
 -	};
-	*cnt = ARRAY_SIZE(insn_buf);
 -	new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
 +	/* Return error and jump to the end of the patch if
 +	 * expected number of iterations is too big.
 +	 */
 +	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
 +	insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
 +	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
 +	/* spill R6, R7, R8 to use these as loop vars */
 +	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
 +	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
 +	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
 +	/* initialize loop vars */
 +	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
 +	insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
 +	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
 +	/* loop header,
 +	 * if reg_loop_cnt >= reg_loop_max skip the loop body
 +	 */
 +	insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
 +	/* callback call,
 +	 * correct callback offset would be set after patching
 +	 */
 +	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
 +	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
 +	insn_buf[cnt++] = BPF_CALL_REL(0);
 +	/* increment loop counter */
 +	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
 +	/* jump to loop header if callback returned 0 */
 +	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
 +	/* return value of bpf_loop,
 +	 * set R0 to the number of iterations
 +	 */
 +	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
 +	/* restore original values of R6, R7, R8 */
 +	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
 +	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
 +	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
 +
 +	*total_cnt = cnt;
 +	new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
    if (!new_prog)
    	return new_prog;
@@@ -21423,10 -21243,10 +21435,10 @@@ static int optimize_bpf_loop(struct bpf
    return 0;
  }
-/* Remove unnecessary spill/fill pairs, members of nocsr pattern,
 +/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
   * adjust subprograms stack depth when possible.
   */
 -static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
 +static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
  {
    struct bpf_subprog_info *subprog = env->subprog_info;
    struct bpf_insn_aux_data *aux = env->insn_aux_data;
@@@ -21437,8 -21257,8 +21449,8 @@@
    int i, j;
for (i = 0; i < insn_cnt; i++, insn++) {
 -		if (aux[i].nocsr_spills_num > 0) {
 -			spills_num = aux[i].nocsr_spills_num;
 +		if (aux[i].fastcall_spills_num > 0) {
 +			spills_num = aux[i].fastcall_spills_num;
    		/* NOPs would be removed by opt_remove_nops() */
    		for (j = 1; j <= spills_num; ++j) {
    			*(insn - j) = NOP;
@@@ -21447,8 -21267,8 +21459,8 @@@
    		modified = true;
    	}
    	if ((subprog + 1)->start == i + 1) {
 -			if (modified && !subprog->keep_nocsr_stack)
 -				subprog->stack_depth = -subprog->nocsr_stack_off;
 +			if (modified && !subprog->keep_fastcall_stack)
 +				subprog->stack_depth = -subprog->fastcall_stack_off;
    		subprog++;
    		modified = false;
    	}
@@@ -21847,13 -21667,11 +21859,13 @@@ int bpf_check_attach_target(struct bpf_
  {
    bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
    bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
 +	char trace_symbol[KSYM_SYMBOL_LEN];
    const char prefix[] = "btf_trace_";
 +	struct bpf_raw_event_map *btp;
    int ret = 0, subprog = -1, i;
    const struct btf_type *t;
    bool conservative = true;
 -	const char *tname;
 +	const char *tname, *fname;
    struct btf *btf;
    long addr = 0;
    struct module *mod = NULL;
@@@ -21984,34 -21802,10 +21996,34 @@@
    		return -EINVAL;
    	}
    	tname += sizeof(prefix) - 1;
 -		t = btf_type_by_id(btf, t->type);
 -		if (!btf_type_is_ptr(t))
 -			/* should never happen in valid vmlinux build */
 +
 +		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
 +		 * names. Thus using bpf_raw_event_map to get argument names.
 +		 */
 +		btp = bpf_get_raw_tracepoint(tname);
 +		if (!btp)
    		return -EINVAL;
 +		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
 +					trace_symbol);
 +		bpf_put_raw_tracepoint(btp);
 +
 +		if (fname)
 +			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
 +
 +		if (!fname || ret < 0) {
 +			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
 +				prefix, tname);
 +			t = btf_type_by_id(btf, t->type);
 +			if (!btf_type_is_ptr(t))
 +				/* should never happen in valid vmlinux build */
 +				return -EINVAL;
 +		} else {
 +			t = btf_type_by_id(btf, ret);
 +			if (!btf_type_is_func(t))
 +				/* should never happen in valid vmlinux build */
 +				return -EINVAL;
 +		}
 +
    	t = btf_type_by_id(btf, t->type);
    	if (!btf_type_is_func_proto(t))
    		/* should never happen in valid vmlinux build */
@@@ -22397,7 -22191,7 +22409,7 @@@ int bpf_check(struct bpf_prog **prog, u
    if (ret < 0)
    	goto skip_full_check;
-	ret = mark_nocsr_patterns(env);
 +	ret = mark_fastcall_patterns(env);
    if (ret < 0)
    	goto skip_full_check;
@@@ -22414,7 -22208,7 +22426,7 @@@ skip_full_check
     * allocate additional slots.
     */
    if (ret == 0)
 -		ret = remove_nocsr_spills_fills(env);
 +		ret = remove_fastcall_spills_fills(env);
if (ret == 0)
    	ret = check_max_stack_depth(env);
diff --combined net/core/sock_map.c
index 724b6856fcc3e,0f5f80f44d520..242c91a6e3d38
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@@ -67,46 -67,39 +67,39 @@@ static struct bpf_map *sock_map_alloc(u
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
  {
- 	u32 ufd = attr->target_fd;
    struct bpf_map *map;
- 	struct fd f;
    int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->target_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
    mutex_lock(&sockmap_mutex);
    ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
    mutex_unlock(&sockmap_mutex);
- 	fdput(f);
    return ret;
  }
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
  {
- 	u32 ufd = attr->target_fd;
    struct bpf_prog *prog;
    struct bpf_map *map;
- 	struct fd f;
    int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->target_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
prog = bpf_prog_get(attr->attach_bpf_fd);
- 	if (IS_ERR(prog)) {
- 		ret = PTR_ERR(prog);
- 		goto put_map;
- 	}
+ 	if (IS_ERR(prog))
+ 		return PTR_ERR(prog);
if (prog->type != ptype) {
    	ret = -EINVAL;
@@@ -118,8 -111,6 +111,6 @@@
    mutex_unlock(&sockmap_mutex);
  put_prog:
    bpf_prog_put(prog);
- put_map:
- 	fdput(f);
    return ret;
  }
@@@ -1183,7 -1174,6 +1174,7 @@@ static void sock_hash_free(struct bpf_m
    		sock_put(elem->sk);
    		sock_hash_free_elem(htab, elem);
    	}
 +		cond_resched();
    }
/* wait for psock readers accessing its map link */
@@@ -1551,18 -1541,17 +1542,17 @@@ int sock_map_bpf_prog_query(const unio
    		    union bpf_attr __user *uattr)
  {
    __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
- 	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ 	u32 prog_cnt = 0, flags = 0;
    struct bpf_prog **pprog;
    struct bpf_prog *prog;
    struct bpf_map *map;
- 	struct fd f;
    u32 id = 0;
    int ret;
if (attr->query.query_flags)
    	return -EINVAL;
- 	f = fdget(ufd);
+ 	CLASS(fd, f)(attr->target_fd);
    map = __bpf_map_get(f);
    if (IS_ERR(map))
    	return PTR_ERR(map);
@@@ -1594,7 -1583,6 +1584,6 @@@ end
        copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
    	ret = -EFAULT;
- 	fdput(f);
    return ret;
  }
diff --combined security/security.c
index 4564a0a1e4ef3,d8d0b67ced250..6875eb4a59fcc
--- a/security/security.c
+++ b/security/security.c
@@@ -28,29 -28,30 +28,29 @@@
  #include <linux/xattr.h>
  #include <linux/msg.h>
  #include <linux/overflow.h>
 +#include <linux/perf_event.h>
 +#include <linux/fs.h>
  #include <net/flow.h>
 +#include <net/sock.h>
-/* How many LSMs were built into the kernel? */
 -#define LSM_COUNT (__end_lsm_info - __start_lsm_info)
 +#define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
/*
 - * How many LSMs are built into the kernel as determined at
 - * build time. Used to determine fixed array sizes.
 - * The capability module is accounted for by CONFIG_SECURITY
 - */
 -#define LSM_CONFIG_COUNT ( \
 -	(IS_ENABLED(CONFIG_SECURITY) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_SELINUX) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_SMACK) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_TOMOYO) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_APPARMOR) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_YAMA) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_LOADPIN) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_SAFESETID) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_BPF_LSM) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_IMA) ? 1 : 0) + \
 -	(IS_ENABLED(CONFIG_EVM) ? 1 : 0))
 + * Identifier for the LSM static calls.
 + * HOOK is an LSM hook as defined in linux/lsm_hookdefs.h
 + * IDX is the index of the static call. 0 <= NUM < MAX_LSM_COUNT
 + */
 +#define LSM_STATIC_CALL(HOOK, IDX) lsm_static_call_##HOOK##_##IDX
 +
 +/*
 + * Call the macro M for each LSM hook MAX_LSM_COUNT times.
 + */
 +#define LSM_LOOP_UNROLL(M, ...) 		\
 +do {						\
 +	UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__)	\
 +} while (0)
 +
 +#define LSM_DEFINE_UNROLL(M, ...) UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__)
/*
   * These are descriptions of the reasons that can be passed to the
@@@ -91,6 -92,7 +91,6 @@@ const char *const lockdown_reasons[LOCK
    [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
  };
-struct security_hook_heads security_hook_heads __ro_after_init;
  static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
static struct kmem_cache *lsm_file_cache;
@@@ -106,58 -108,9 +106,58 @@@ static __initdata const char *chosen_ma
  static __initconst const char *const builtin_lsm_order = CONFIG_LSM;
/* Ordered list of LSMs to initialize. */
 -static __initdata struct lsm_info **ordered_lsms;
 +static __initdata struct lsm_info *ordered_lsms[MAX_LSM_COUNT + 1];
  static __initdata struct lsm_info *exclusive;
+#ifdef CONFIG_HAVE_STATIC_CALL
 +#define LSM_HOOK_TRAMP(NAME, NUM) \
 +	&STATIC_CALL_TRAMP(LSM_STATIC_CALL(NAME, NUM))
 +#else
 +#define LSM_HOOK_TRAMP(NAME, NUM) NULL
 +#endif
 +
 +/*
 + * Define static calls and static keys for each LSM hook.
 + */
 +#define DEFINE_LSM_STATIC_CALL(NUM, NAME, RET, ...)			\
 +	DEFINE_STATIC_CALL_NULL(LSM_STATIC_CALL(NAME, NUM),		\
 +				*((RET(*)(__VA_ARGS__))NULL));		\
 +	DEFINE_STATIC_KEY_FALSE(SECURITY_HOOK_ACTIVE_KEY(NAME, NUM));
 +
 +#define LSM_HOOK(RET, DEFAULT, NAME, ...)				\
 +	LSM_DEFINE_UNROLL(DEFINE_LSM_STATIC_CALL, NAME, RET, __VA_ARGS__)
 +#include <linux/lsm_hook_defs.h>
 +#undef LSM_HOOK
 +#undef DEFINE_LSM_STATIC_CALL
 +
 +/*
 + * Initialise a table of static calls for each LSM hook.
 + * DEFINE_STATIC_CALL_NULL invocation above generates a key (STATIC_CALL_KEY)
 + * and a trampoline (STATIC_CALL_TRAMP) which are used to call
 + * __static_call_update when updating the static call.
 + *
 + * The static calls table is used by early LSMs, some architectures can fault on
 + * unaligned accesses and the fault handling code may not be ready by then.
 + * Thus, the static calls table should be aligned to avoid any unhandled faults
 + * in early init.
 + */
 +struct lsm_static_calls_table
 +	static_calls_table __ro_after_init __aligned(sizeof(u64)) = {
 +#define INIT_LSM_STATIC_CALL(NUM, NAME)					\
 +	(struct lsm_static_call) {					\
 +		.key = &STATIC_CALL_KEY(LSM_STATIC_CALL(NAME, NUM)),	\
 +		.trampoline = LSM_HOOK_TRAMP(NAME, NUM),		\
 +		.active = &SECURITY_HOOK_ACTIVE_KEY(NAME, NUM),		\
 +	},
 +#define LSM_HOOK(RET, DEFAULT, NAME, ...)				\
 +	.NAME = {							\
 +		LSM_DEFINE_UNROLL(INIT_LSM_STATIC_CALL, NAME)		\
 +	},
 +#include <linux/lsm_hook_defs.h>
 +#undef LSM_HOOK
 +#undef INIT_LSM_STATIC_CALL
 +	};
 +
  static __initdata bool debug;
  #define init_debug(...)						\
    do {							\
@@@ -218,7 -171,7 +218,7 @@@ static void __init append_ordered_lsm(s
    if (exists_ordered_lsm(lsm))
    	return;
-	if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from))
 +	if (WARN(last_lsm == MAX_LSM_COUNT, "%s: out of LSM static calls!?\n", from))
    	return;
/* Enable this LSM, if it is not already set. */
@@@ -265,7 -218,6 +265,7 @@@ static void __init lsm_set_blob_sizes(s
lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
    lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
 +	lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib);
    /*
     * The inode blob gets an rcu_head in addition to
     * what the modules might need.
@@@ -274,16 -226,11 +274,16 @@@
    	blob_sizes.lbs_inode = sizeof(struct rcu_head);
    lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
    lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc);
 +	lsm_set_blob_size(&needed->lbs_key, &blob_sizes.lbs_key);
    lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg);
 +	lsm_set_blob_size(&needed->lbs_perf_event, &blob_sizes.lbs_perf_event);
 +	lsm_set_blob_size(&needed->lbs_sock, &blob_sizes.lbs_sock);
    lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock);
    lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
 +	lsm_set_blob_size(&needed->lbs_tun_dev, &blob_sizes.lbs_tun_dev);
    lsm_set_blob_size(&needed->lbs_xattr_count,
    		  &blob_sizes.lbs_xattr_count);
 +	lsm_set_blob_size(&needed->lbs_bdev, &blob_sizes.lbs_bdev);
  }
/* Prepare LSM for initialization. */
@@@ -321,7 -268,7 +321,7 @@@ static void __init initialize_lsm(struc
   * Current index to use while initializing the lsm id list.
   */
  u32 lsm_active_cnt __ro_after_init;
 -const struct lsm_id *lsm_idlist[LSM_CONFIG_COUNT];
 +const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
/* Populate ordered LSMs list from comma-separated LSM name list. */
  static void __init ordered_lsm_parse(const char *order, const char *origin)
@@@ -403,25 -350,6 +403,25 @@@
    kfree(sep);
  }
+static void __init lsm_static_call_init(struct security_hook_list *hl)
 +{
 +	struct lsm_static_call *scall = hl->scalls;
 +	int i;
 +
 +	for (i = 0; i < MAX_LSM_COUNT; i++) {
 +		/* Update the first static call that is not used yet */
 +		if (!scall->hl) {
 +			__static_call_update(scall->key, scall->trampoline,
 +					     hl->hook.lsm_func_addr);
 +			scall->hl = hl;
 +			static_branch_enable(scall->active);
 +			return;
 +		}
 +		scall++;
 +	}
 +	panic("%s - Ran out of static slots.\n", __func__);
 +}
 +
  static void __init lsm_early_cred(struct cred *cred);
  static void __init lsm_early_task(struct task_struct *task);
@@@ -450,6 -378,9 +450,6 @@@ static void __init ordered_lsm_init(voi
  {
    struct lsm_info **lsm;
-	ordered_lsms = kcalloc(LSM_COUNT + 1, sizeof(*ordered_lsms),
 -			       GFP_KERNEL);
 -
    if (chosen_lsm_order) {
    	if (chosen_major_lsm) {
    		pr_warn("security=%s is ignored because it is superseded by lsm=%s\n",
@@@ -467,20 -398,12 +467,20 @@@
init_debug("cred blob size       = %d\n", blob_sizes.lbs_cred);
    init_debug("file blob size       = %d\n", blob_sizes.lbs_file);
 +	init_debug("ib blob size         = %d\n", blob_sizes.lbs_ib);
    init_debug("inode blob size      = %d\n", blob_sizes.lbs_inode);
    init_debug("ipc blob size        = %d\n", blob_sizes.lbs_ipc);
 +#ifdef CONFIG_KEYS
 +	init_debug("key blob size        = %d\n", blob_sizes.lbs_key);
 +#endif /* CONFIG_KEYS */
    init_debug("msg_msg blob size    = %d\n", blob_sizes.lbs_msg_msg);
 +	init_debug("sock blob size       = %d\n", blob_sizes.lbs_sock);
    init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
 +	init_debug("perf event blob size = %d\n", blob_sizes.lbs_perf_event);
    init_debug("task blob size       = %d\n", blob_sizes.lbs_task);
 +	init_debug("tun device blob size = %d\n", blob_sizes.lbs_tun_dev);
    init_debug("xattr slots          = %d\n", blob_sizes.lbs_xattr_count);
 +	init_debug("bdev blob size       = %d\n", blob_sizes.lbs_bdev);
/*
     * Create any kmem_caches needed for blobs
@@@ -498,12 -421,19 +498,12 @@@
    lsm_early_task(current);
    for (lsm = ordered_lsms; *lsm; lsm++)
    	initialize_lsm(*lsm);
 -
 -	kfree(ordered_lsms);
  }
int __init early_security_init(void)
  {
    struct lsm_info *lsm;
-#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
 -	INIT_HLIST_HEAD(&security_hook_heads.NAME);
 -#include "linux/lsm_hook_defs.h"
 -#undef LSM_HOOK
 -
    for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
    	if (!lsm->enabled)
    		lsm->enabled = &lsm_enabled_true;
@@@ -624,14 -554,14 +624,14 @@@ void __init security_add_hooks(struct s
     * Look at the previous entry, if there is one, for duplication.
     */
    if (lsm_active_cnt == 0 || lsm_idlist[lsm_active_cnt - 1] != lsmid) {
 -		if (lsm_active_cnt >= LSM_CONFIG_COUNT)
 +		if (lsm_active_cnt >= MAX_LSM_COUNT)
    		panic("%s Too many LSMs registered.\n", __func__);
    	lsm_idlist[lsm_active_cnt++] = lsmid;
    }
for (i = 0; i < count; i++) {
    	hooks[i].lsmid = lsmid;
 -		hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
 +		lsm_static_call_init(&hooks[i]);
    }
/*
@@@ -666,42 -596,27 +666,42 @@@ int unregister_blocking_lsm_notifier(st
  EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
/**
 - * lsm_cred_alloc - allocate a composite cred blob
 - * @cred: the cred that needs a blob
 + * lsm_blob_alloc - allocate a composite blob
 + * @dest: the destination for the blob
 + * @size: the size of the blob
   * @gfp: allocation type
   *
 - * Allocate the cred blob for all the modules
 + * Allocate a blob for all the modules
   *
   * Returns 0, or -ENOMEM if memory can't be allocated.
   */
 -static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
 +static int lsm_blob_alloc(void **dest, size_t size, gfp_t gfp)
  {
 -	if (blob_sizes.lbs_cred == 0) {
 -		cred->security = NULL;
 +	if (size == 0) {
 +		*dest = NULL;
    	return 0;
    }
-	cred->security = kzalloc(blob_sizes.lbs_cred, gfp);
 -	if (cred->security == NULL)
 +	*dest = kzalloc(size, gfp);
 +	if (*dest == NULL)
    	return -ENOMEM;
    return 0;
  }
+/**
 + * lsm_cred_alloc - allocate a composite cred blob
 + * @cred: the cred that needs a blob
 + * @gfp: allocation type
 + *
 + * Allocate the cred blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
 +{
 +	return lsm_blob_alloc(&cred->security, blob_sizes.lbs_cred, gfp);
 +}
 +
  /**
   * lsm_early_cred - during initialization allocate a composite cred blob
   * @cred: the cred that needs a blob
@@@ -745,7 -660,7 +745,7 @@@ static int lsm_file_alloc(struct file *
   *
   * Returns 0, or -ENOMEM if memory can't be allocated.
   */
 -int lsm_inode_alloc(struct inode *inode)
 +static int lsm_inode_alloc(struct inode *inode)
  {
    if (!lsm_inode_cache) {
    	inode->i_security = NULL;
@@@ -768,7 -683,15 +768,7 @@@
   */
  static int lsm_task_alloc(struct task_struct *task)
  {
 -	if (blob_sizes.lbs_task == 0) {
 -		task->security = NULL;
 -		return 0;
 -	}
 -
 -	task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL);
 -	if (task->security == NULL)
 -		return -ENOMEM;
 -	return 0;
 +	return lsm_blob_alloc(&task->security, blob_sizes.lbs_task, GFP_KERNEL);
  }
/**
@@@ -781,23 -704,16 +781,23 @@@
   */
  static int lsm_ipc_alloc(struct kern_ipc_perm *kip)
  {
 -	if (blob_sizes.lbs_ipc == 0) {
 -		kip->security = NULL;
 -		return 0;
 -	}
 +	return lsm_blob_alloc(&kip->security, blob_sizes.lbs_ipc, GFP_KERNEL);
 +}
-	kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL);
 -	if (kip->security == NULL)
 -		return -ENOMEM;
 -	return 0;
 +#ifdef CONFIG_KEYS
 +/**
 + * lsm_key_alloc - allocate a composite key blob
 + * @key: the key that needs a blob
 + *
 + * Allocate the key blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_key_alloc(struct key *key)
 +{
 +	return lsm_blob_alloc(&key->security, blob_sizes.lbs_key, GFP_KERNEL);
  }
 +#endif /* CONFIG_KEYS */
/**
   * lsm_msg_msg_alloc - allocate a composite msg_msg blob
@@@ -809,29 -725,14 +809,29 @@@
   */
  static int lsm_msg_msg_alloc(struct msg_msg *mp)
  {
 -	if (blob_sizes.lbs_msg_msg == 0) {
 -		mp->security = NULL;
 +	return lsm_blob_alloc(&mp->security, blob_sizes.lbs_msg_msg,
 +			      GFP_KERNEL);
 +}
 +
 +/**
 + * lsm_bdev_alloc - allocate a composite block_device blob
 + * @bdev: the block_device that needs a blob
 + *
 + * Allocate the block_device blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_bdev_alloc(struct block_device *bdev)
 +{
 +	if (blob_sizes.lbs_bdev == 0) {
 +		bdev->bd_security = NULL;
    	return 0;
    }
-	mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL);
 -	if (mp->security == NULL)
 +	bdev->bd_security = kzalloc(blob_sizes.lbs_bdev, GFP_KERNEL);
 +	if (!bdev->bd_security)
    	return -ENOMEM;
 +
    return 0;
  }
@@@ -859,8 -760,15 +859,8 @@@ static void __init lsm_early_task(struc
   */
  static int lsm_superblock_alloc(struct super_block *sb)
  {
 -	if (blob_sizes.lbs_superblock == 0) {
 -		sb->s_security = NULL;
 -		return 0;
 -	}
 -
 -	sb->s_security = kzalloc(blob_sizes.lbs_superblock, GFP_KERNEL);
 -	if (sb->s_security == NULL)
 -		return -ENOMEM;
 -	return 0;
 +	return lsm_blob_alloc(&sb->s_security, blob_sizes.lbs_superblock,
 +			      GFP_KERNEL);
  }
/**
@@@ -945,43 -853,29 +945,43 @@@ out
   * call_int_hook:
   *	This is a hook that returns a value.
   */
 +#define __CALL_STATIC_VOID(NUM, HOOK, ...)				     \
 +do {									     \
 +	if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) {    \
 +		static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__);	     \
 +	}								     \
 +} while (0);
-#define call_void_hook(FUNC, ...)				\
 -	do {							\
 -		struct security_hook_list *P;			\
 -								\
 -		hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \
 -			P->hook.FUNC(__VA_ARGS__);		\
 +#define call_void_hook(HOOK, ...)                                 \
 +	do {                                                      \
 +		LSM_LOOP_UNROLL(__CALL_STATIC_VOID, HOOK, __VA_ARGS__); \
    } while (0)
-#define call_int_hook(FUNC, ...) ({				\
 -	int RC = LSM_RET_DEFAULT(FUNC);				\
 -	do {							\
 -		struct security_hook_list *P;			\
 -								\
 -		hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \
 -			RC = P->hook.FUNC(__VA_ARGS__);		\
 -			if (RC != LSM_RET_DEFAULT(FUNC))	\
 -				break;				\
 -		}						\
 -	} while (0);						\
 -	RC;							\
 +
 +#define __CALL_STATIC_INT(NUM, R, HOOK, LABEL, ...)			     \
 +do {									     \
 +	if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) {  \
 +		R = static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__);    \
 +		if (R != LSM_RET_DEFAULT(HOOK))				     \
 +			goto LABEL;					     \
 +	}								     \
 +} while (0);
 +
 +#define call_int_hook(HOOK, ...)					\
 +({									\
 +	__label__ OUT;							\
 +	int RC = LSM_RET_DEFAULT(HOOK);					\
 +									\
 +	LSM_LOOP_UNROLL(__CALL_STATIC_INT, RC, HOOK, OUT, __VA_ARGS__);	\
 +OUT:									\
 +	RC;								\
  })
+#define lsm_for_each_hook(scall, NAME)					\
 +	for (scall = static_calls_table.NAME;				\
 +	     scall - static_calls_table.NAME < MAX_LSM_COUNT; scall++)  \
 +		if (static_key_enabled(&scall->active->key))
 +
  /* Security operations */
/**
@@@ -1216,19 -1110,20 +1216,19 @@@ int security_settime64(const struct tim
   */
  int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    int cap_sys_admin = 1;
    int rc;
/*
 -	 * The module will respond with a positive value if
 -	 * it thinks the __vm_enough_memory() call should be
 -	 * made with the cap_sys_admin set. If all of the modules
 -	 * agree that it should be set it will. If any module
 -	 * thinks it should not be set it won't.
 +	 * The module will respond with 0 if it thinks the __vm_enough_memory()
 +	 * call should be made with the cap_sys_admin set. If all of the modules
 +	 * agree that it should be set it will. If any module thinks it should
 +	 * not be set it won't.
     */
 -	hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) {
 -		rc = hp->hook.vm_enough_memory(mm, pages);
 -		if (rc <= 0) {
 +	lsm_for_each_hook(scall, vm_enough_memory) {
 +		rc = scall->hl->hook.vm_enough_memory(mm, pages);
 +		if (rc < 0) {
    		cap_sys_admin = 0;
    		break;
    	}
@@@ -1374,12 -1269,13 +1374,12 @@@ int security_fs_context_dup(struct fs_c
  int security_fs_context_parse_param(struct fs_context *fc,
    			    struct fs_parameter *param)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    int trc;
    int rc = -ENOPARAM;
-	hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
 -			     list) {
 -		trc = hp->hook.fs_context_parse_param(fc, param);
 +	lsm_for_each_hook(scall, fs_context_parse_param) {
 +		trc = scall->hl->hook.fs_context_parse_param(fc, param);
    	if (trc == 0)
    		rc = 0;
    	else if (trc != -ENOPARAM)
@@@ -1609,11 -1505,12 +1609,11 @@@ int security_sb_set_mnt_opts(struct sup
    		     unsigned long kern_flags,
    		     unsigned long *set_kern_flags)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    int rc = mnt_opts ? -EOPNOTSUPP : LSM_RET_DEFAULT(sb_set_mnt_opts);
-	hlist_for_each_entry(hp, &security_hook_heads.sb_set_mnt_opts,
 -			     list) {
 -		rc = hp->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
 +	lsm_for_each_hook(scall, sb_set_mnt_opts) {
 +		rc = scall->hl->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
    				      set_kern_flags);
    	if (rc != LSM_RET_DEFAULT(sb_set_mnt_opts))
    		break;
@@@ -1699,8 -1596,9 +1699,8 @@@ int security_inode_alloc(struct inode *
static void inode_free_by_rcu(struct rcu_head *head)
  {
 -	/*
 -	 * The rcu head is at the start of the inode blob
 -	 */
 +	/* The rcu head is at the start of the inode blob */
 +	call_void_hook(inode_free_security_rcu, head);
    kmem_cache_free(lsm_inode_cache, head);
  }
@@@ -1708,24 -1606,23 +1708,24 @@@
   * security_inode_free() - Free an inode's LSM blob
   * @inode: the inode
   *
 - * Deallocate the inode security structure and set @inode->i_security to NULL.
 + * Release any LSM resources associated with @inode, although due to the
 + * inode's RCU protections it is possible that the resources will not be
 + * fully released until after the current RCU grace period has elapsed.
 + *
 + * It is important for LSMs to note that despite being present in a call to
 + * security_inode_free(), @inode may still be referenced in a VFS path walk
 + * and calls to security_inode_permission() may be made during, or after,
 + * a call to security_inode_free().  For this reason the inode->i_security
 + * field is released via a call_rcu() callback and any LSMs which need to
 + * retain inode state for use in security_inode_permission() should only
 + * release that state in the inode_free_security_rcu() LSM hook callback.
   */
  void security_inode_free(struct inode *inode)
  {
    call_void_hook(inode_free_security, inode);
 -	/*
 -	 * The inode may still be referenced in a path walk and
 -	 * a call to security_inode_permission() can be made
 -	 * after inode_free_security() is called. Ideally, the VFS
 -	 * wouldn't do this, but fixing that is a much harder
 -	 * job. For now, simply free the i_security via RCU, and
 -	 * leave the current inode->i_security pointer intact.
 -	 * The inode will be freed after the RCU grace period too.
 -	 */
 -	if (inode->i_security)
 -		call_rcu((struct rcu_head *)inode->i_security,
 -			 inode_free_by_rcu);
 +	if (!inode->i_security)
 +		return;
 +	call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
  }
/**
@@@ -1808,7 -1705,7 +1808,7 @@@ int security_inode_init_security(struc
    			 const struct qstr *qstr,
    			 const initxattrs initxattrs, void *fs_data)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    struct xattr *new_xattrs = NULL;
    int ret = -EOPNOTSUPP, xattr_count = 0;
@@@ -1826,8 -1723,9 +1826,8 @@@
    		return -ENOMEM;
    }
-	hlist_for_each_entry(hp, &security_hook_heads.inode_init_security,
 -			     list) {
 -		ret = hp->hook.inode_init_security(inode, dir, qstr, new_xattrs,
 +	lsm_for_each_hook(scall, inode_init_security) {
 +		ret = scall->hl->hook.inode_init_security(inode, dir, qstr, new_xattrs,
    					  &xattr_count);
    	if (ret && ret != -EOPNOTSUPP)
    		goto out;
@@@ -2763,14 -2661,19 +2763,14 @@@ EXPORT_SYMBOL(security_inode_copy_up)
   * lower layer to the union/overlay layer.   The caller is responsible for
   * reading and writing the xattrs, this hook is merely a filter.
   *
 - * Return: Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP
 - *         if the security module does not know about attribute, or a negative
 - *         error code to abort the copy up.
 + * Return: Returns 0 to accept the xattr, -ECANCELED to discard the xattr,
 + *         -EOPNOTSUPP if the security module does not know about attribute,
 + *         or a negative error code to abort the copy up.
   */
  int security_inode_copy_up_xattr(struct dentry *src, const char *name)
  {
    int rc;
-	/*
 -	 * The implementation can return 0 (accept the xattr), 1 (discard the
 -	 * xattr), -EOPNOTSUPP if it does not know anything about the xattr or
 -	 * any other error code in case of an error.
 -	 */
    rc = call_int_hook(inode_copy_up_xattr, src, name);
    if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr))
    	return rc;
@@@ -2779,26 -2682,6 +2779,26 @@@
  }
  EXPORT_SYMBOL(security_inode_copy_up_xattr);
+/**
 + * security_inode_setintegrity() - Set the inode's integrity data
 + * @inode: inode
 + * @type: type of integrity, e.g. hash digest, signature, etc
 + * @value: the integrity value
 + * @size: size of the integrity value
 + *
 + * Register a verified integrity measurement of a inode with LSMs.
 + * LSMs should free the previously saved data if @value is NULL.
 + *
 + * Return: Returns 0 on success, negative values on failure.
 + */
 +int security_inode_setintegrity(const struct inode *inode,
 +				enum lsm_integrity_type type, const void *value,
 +				size_t size)
 +{
 +	return call_int_hook(inode_setintegrity, inode, type, value, size);
 +}
 +EXPORT_SYMBOL(security_inode_setintegrity);
 +
  /**
   * security_kernfs_init_security() - Init LSM context for a kernfs node
   * @kn_dir: parent kernfs node
@@@ -3048,8 -2931,6 +3048,8 @@@ int security_file_fcntl(struct file *fi
   * Save owner security information (typically from current->security) in
   * file->f_security for later use by the send_sigiotask hook.
   *
 + * This hook is called with file->f_owner.lock held.
 + *
   * Return: Returns 0 on success.
   */
  void security_file_set_fowner(struct file *file)
@@@ -3676,10 -3557,10 +3676,10 @@@ int security_task_prctl(int option, uns
  {
    int thisrc;
    int rc = LSM_RET_DEFAULT(task_prctl);
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
-	hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) {
 -		thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);
 +	lsm_for_each_hook(scall, task_prctl) {
 +		thisrc = scall->hl->hook.task_prctl(option, arg2, arg3, arg4, arg5);
    	if (thisrc != LSM_RET_DEFAULT(task_prctl)) {
    		rc = thisrc;
    		if (thisrc != 0)
@@@ -4085,7 -3966,7 +4085,7 @@@ EXPORT_SYMBOL(security_d_instantiate)
  int security_getselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
    		 u32 __user *size, u32 flags)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    struct lsm_ctx lctx = { .id = LSM_ID_UNDEF, };
    u8 __user *base = (u8 __user *)uctx;
    u32 entrysize;
@@@ -4123,13 -4004,13 +4123,13 @@@
     * In the usual case gather all the data from the LSMs.
     * In the single case only get the data from the LSM specified.
     */
 -	hlist_for_each_entry(hp, &security_hook_heads.getselfattr, list) {
 -		if (single && lctx.id != hp->lsmid->id)
 +	lsm_for_each_hook(scall, getselfattr) {
 +		if (single && lctx.id != scall->hl->lsmid->id)
    		continue;
    	entrysize = left;
    	if (base)
    		uctx = (struct lsm_ctx __user *)(base + total);
 -		rc = hp->hook.getselfattr(attr, uctx, &entrysize, flags);
 +		rc = scall->hl->hook.getselfattr(attr, uctx, &entrysize, flags);
    	if (rc == -EOPNOTSUPP) {
    		rc = 0;
    		continue;
@@@ -4178,7 -4059,7 +4178,7 @@@
  int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
    		 u32 size, u32 flags)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    struct lsm_ctx *lctx;
    int rc = LSM_RET_DEFAULT(setselfattr);
    u64 required_len;
@@@ -4201,9 -4082,9 +4201,9 @@@
    	goto free_out;
    }
-	hlist_for_each_entry(hp, &security_hook_heads.setselfattr, list)
 -		if ((hp->lsmid->id) == lctx->id) {
 -			rc = hp->hook.setselfattr(attr, lctx, size, flags);
 +	lsm_for_each_hook(scall, setselfattr)
 +		if ((scall->hl->lsmid->id) == lctx->id) {
 +			rc = scall->hl->hook.setselfattr(attr, lctx, size, flags);
    		break;
    	}
@@@ -4226,12 -4107,12 +4226,12 @@@ free_out
  int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
    		 char **value)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
-	hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) {
 -		if (lsmid != 0 && lsmid != hp->lsmid->id)
 +	lsm_for_each_hook(scall, getprocattr) {
 +		if (lsmid != 0 && lsmid != scall->hl->lsmid->id)
    		continue;
 -		return hp->hook.getprocattr(p, name, value);
 +		return scall->hl->hook.getprocattr(p, name, value);
    }
    return LSM_RET_DEFAULT(getprocattr);
  }
@@@ -4250,12 -4131,12 +4250,12 @@@
   */
  int security_setprocattr(int lsmid, const char *name, void *value, size_t size)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
-	hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) {
 -		if (lsmid != 0 && lsmid != hp->lsmid->id)
 +	lsm_for_each_hook(scall, setprocattr) {
 +		if (lsmid != 0 && lsmid != scall->hl->lsmid->id)
    		continue;
 -		return hp->hook.setprocattr(name, value, size);
 +		return scall->hl->hook.setprocattr(name, value, size);
    }
    return LSM_RET_DEFAULT(setprocattr);
  }
@@@ -4792,20 -4673,6 +4792,20 @@@ int security_socket_getpeersec_dgram(st
  }
  EXPORT_SYMBOL(security_socket_getpeersec_dgram);
+/**
 + * lsm_sock_alloc - allocate a composite sock blob
 + * @sock: the sock that needs a blob
 + * @gfp: allocation mode
 + *
 + * Allocate the sock blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_sock_alloc(struct sock *sock, gfp_t gfp)
 +{
 +	return lsm_blob_alloc(&sock->sk_security, blob_sizes.lbs_sock, gfp);
 +}
 +
  /**
   * security_sk_alloc() - Allocate and initialize a sock's LSM blob
   * @sk: sock
@@@ -4819,14 -4686,7 +4819,14 @@@
   */
  int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
  {
 -	return call_int_hook(sk_alloc_security, sk, family, priority);
 +	int rc = lsm_sock_alloc(sk, priority);
 +
 +	if (unlikely(rc))
 +		return rc;
 +	rc = call_int_hook(sk_alloc_security, sk, family, priority);
 +	if (unlikely(rc))
 +		security_sk_free(sk);
 +	return rc;
  }
/**
@@@ -4838,8 -4698,6 +4838,8 @@@
  void security_sk_free(struct sock *sk)
  {
    call_void_hook(sk_free_security, sk);
 +	kfree(sk->sk_security);
 +	sk->sk_security = NULL;
  }
/**
@@@ -4987,18 -4845,7 +4987,18 @@@ EXPORT_SYMBOL(security_secmark_refcount
   */
  int security_tun_dev_alloc_security(void **security)
  {
 -	return call_int_hook(tun_dev_alloc_security, security);
 +	int rc;
 +
 +	rc = lsm_blob_alloc(security, blob_sizes.lbs_tun_dev, GFP_KERNEL);
 +	if (rc)
 +		return rc;
 +
 +	rc = call_int_hook(tun_dev_alloc_security, *security);
 +	if (rc) {
 +		kfree(*security);
 +		*security = NULL;
 +	}
 +	return rc;
  }
  EXPORT_SYMBOL(security_tun_dev_alloc_security);
@@@ -5010,7 -4857,7 +5010,7 @@@
   */
  void security_tun_dev_free_security(void *security)
  {
 -	call_void_hook(tun_dev_free_security, security);
 +	kfree(security);
  }
  EXPORT_SYMBOL(security_tun_dev_free_security);
@@@ -5206,18 -5053,7 +5206,18 @@@ EXPORT_SYMBOL(security_ib_endport_manag
   */
  int security_ib_alloc_security(void **sec)
  {
 -	return call_int_hook(ib_alloc_security, sec);
 +	int rc;
 +
 +	rc = lsm_blob_alloc(sec, blob_sizes.lbs_ib, GFP_KERNEL);
 +	if (rc)
 +		return rc;
 +
 +	rc = call_int_hook(ib_alloc_security, *sec);
 +	if (rc) {
 +		kfree(*sec);
 +		*sec = NULL;
 +	}
 +	return rc;
  }
  EXPORT_SYMBOL(security_ib_alloc_security);
@@@ -5229,7 -5065,7 +5229,7 @@@
   */
  void security_ib_free_security(void *sec)
  {
 -	call_void_hook(ib_free_security, sec);
 +	kfree(sec);
  }
  EXPORT_SYMBOL(security_ib_free_security);
  #endif	/* CONFIG_SECURITY_INFINIBAND */
@@@ -5387,7 -5223,7 +5387,7 @@@ int security_xfrm_state_pol_flow_match(
    			       struct xfrm_policy *xp,
    			       const struct flowi_common *flic)
  {
 -	struct security_hook_list *hp;
 +	struct lsm_static_call *scall;
    int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match);
/*
@@@ -5399,8 -5235,9 +5399,8 @@@
     * For speed optimization, we explicitly break the loop rather than
     * using the macro
     */
 -	hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match,
 -			     list) {
 -		rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic);
 +	lsm_for_each_hook(scall, xfrm_state_pol_flow_match) {
 +		rc = scall->hl->hook.xfrm_state_pol_flow_match(x, xp, flic);
    	break;
    }
    return rc;
@@@ -5445,14 -5282,7 +5445,14 @@@ EXPORT_SYMBOL(security_skb_classify_flo
  int security_key_alloc(struct key *key, const struct cred *cred,
    	       unsigned long flags)
  {
 -	return call_int_hook(key_alloc, key, cred, flags);
 +	int rc = lsm_key_alloc(key);
 +
 +	if (unlikely(rc))
 +		return rc;
 +	rc = call_int_hook(key_alloc, key, cred, flags);
 +	if (unlikely(rc))
 +		security_key_free(key);
 +	return rc;
  }
/**
@@@ -5463,8 -5293,7 +5463,8 @@@
   */
  void security_key_free(struct key *key)
  {
 -	call_void_hook(key_free, key);
 +	kfree(key->security);
 +	key->security = NULL;
  }
/**
@@@ -5681,7 -5510,7 +5681,7 @@@ int security_bpf_prog_load(struct bpf_p
   * Return: Returns 0 on success, error on failure.
   */
  int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- 			      struct path *path)
+ 			      const struct path *path)
  {
    return call_int_hook(bpf_token_create, token, attr, path);
  }
@@@ -5767,85 -5596,6 +5767,85 @@@ int security_locked_down(enum lockdown_
  }
  EXPORT_SYMBOL(security_locked_down);
+/**
 + * security_bdev_alloc() - Allocate a block device LSM blob
 + * @bdev: block device
 + *
 + * Allocate and attach a security structure to @bdev->bd_security.  The
 + * security field is initialized to NULL when the bdev structure is
 + * allocated.
 + *
 + * Return: Return 0 if operation was successful.
 + */
 +int security_bdev_alloc(struct block_device *bdev)
 +{
 +	int rc = 0;
 +
 +	rc = lsm_bdev_alloc(bdev);
 +	if (unlikely(rc))
 +		return rc;
 +
 +	rc = call_int_hook(bdev_alloc_security, bdev);
 +	if (unlikely(rc))
 +		security_bdev_free(bdev);
 +
 +	return rc;
 +}
 +EXPORT_SYMBOL(security_bdev_alloc);
 +
 +/**
 + * security_bdev_free() - Free a block device's LSM blob
 + * @bdev: block device
 + *
 + * Deallocate the bdev security structure and set @bdev->bd_security to NULL.
 + */
 +void security_bdev_free(struct block_device *bdev)
 +{
 +	if (!bdev->bd_security)
 +		return;
 +
 +	call_void_hook(bdev_free_security, bdev);
 +
 +	kfree(bdev->bd_security);
 +	bdev->bd_security = NULL;
 +}
 +EXPORT_SYMBOL(security_bdev_free);
 +
 +/**
 + * security_bdev_setintegrity() - Set the device's integrity data
 + * @bdev: block device
 + * @type: type of integrity, e.g. hash digest, signature, etc
 + * @value: the integrity value
 + * @size: size of the integrity value
 + *
 + * Register a verified integrity measurement of a bdev with LSMs.
 + * LSMs should free the previously saved data if @value is NULL.
 + * Please note that the new hook should be invoked every time the security
 + * information is updated to keep these data current. For example, in dm-verity,
 + * if the mapping table is reloaded and configured to use a different dm-verity
 + * target with a new roothash and signing information, the previously stored
 + * data in the LSM blob will become obsolete. It is crucial to re-invoke the
 + * hook to refresh these data and ensure they are up to date. This necessity
 + * arises from the design of device-mapper, where a device-mapper device is
 + * first created, and then targets are subsequently loaded into it. These
 + * targets can be modified multiple times during the device's lifetime.
 + * Therefore, while the LSM blob is allocated during the creation of the block
 + * device, its actual contents are not initialized at this stage and can change
 + * substantially over time. This includes alterations from data that the LSMs
 + * 'trusts' to those they do not, making it essential to handle these changes
 + * correctly. Failure to address this dynamic aspect could potentially allow
 + * for bypassing LSM checks.
 + *
 + * Return: Returns 0 on success, negative values on failure.
 + */
 +int security_bdev_setintegrity(struct block_device *bdev,
 +			       enum lsm_integrity_type type, const void *value,
 +			       size_t size)
 +{
 +	return call_int_hook(bdev_setintegrity, bdev, type, value, size);
 +}
 +EXPORT_SYMBOL(security_bdev_setintegrity);
 +
  #ifdef CONFIG_PERF_EVENTS
  /**
   * security_perf_event_open() - Check if a perf event open is allowed
@@@ -5871,19 -5621,7 +5871,19 @@@ int security_perf_event_open(struct per
   */
  int security_perf_event_alloc(struct perf_event *event)
  {
 -	return call_int_hook(perf_event_alloc, event);
 +	int rc;
 +
 +	rc = lsm_blob_alloc(&event->security, blob_sizes.lbs_perf_event,
 +			    GFP_KERNEL);
 +	if (rc)
 +		return rc;
 +
 +	rc = call_int_hook(perf_event_alloc, event);
 +	if (rc) {
 +		kfree(event->security);
 +		event->security = NULL;
 +	}
 +	return rc;
  }
/**
@@@ -5894,8 -5632,7 +5894,8 @@@
   */
  void security_perf_event_free(struct perf_event *event)
  {
 -	call_void_hook(perf_event_free, event);
 +	kfree(event->security);
 +	event->security = NULL;
  }
/**
@@@ -5966,13 -5703,3 +5966,13 @@@ int security_uring_cmd(struct io_uring_
    return call_int_hook(uring_cmd, ioucmd);
  }
  #endif /* CONFIG_IO_URING */
 +
 +/**
 + * security_initramfs_populated() - Notify LSMs that initramfs has been loaded
 + *
 + * Tells the LSMs the initramfs has been unpacked into the rootfs.
 + */
 +void security_initramfs_populated(void)
 +{
 +	call_void_hook(initramfs_populated);
 +}
diff --combined security/selinux/hooks.c
index 94c5231401259,0eec141a8f37e..fc926d3cac6e2
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@@ -282,13 -282,8 +282,13 @@@ static int __inode_security_revalidate(
might_sleep_if(may_sleep);
+	/*
 +	 * The check of isec->initialized below is racy but
 +	 * inode_doinit_with_dentry() will recheck with
 +	 * isec->lock held.
 +	 */
    if (selinux_initialized() &&
 -	    isec->initialized != LABEL_INITIALIZED) {
 +	    data_race(isec->initialized != LABEL_INITIALIZED)) {
    	if (!may_sleep)
    		return -ECHILD;
@@@ -2207,16 -2202,23 +2207,16 @@@ static int selinux_syslog(int type
  }
/*
 - * Check that a process has enough memory to allocate a new virtual
 - * mapping. 0 means there is enough memory for the allocation to
 - * succeed and -ENOMEM implies there is not.
 + * Check permission for allocating a new virtual mapping. Returns
 + * 0 if permission is granted, negative error code if not.
   *
   * Do not audit the selinux permission check, as this is applied to all
   * processes that allocate mappings.
   */
  static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
  {
 -	int rc, cap_sys_admin = 0;
 -
 -	rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN,
 -				 CAP_OPT_NOAUDIT, true);
 -	if (rc == 0)
 -		cap_sys_admin = 1;
 -
 -	return cap_sys_admin;
 +	return cred_has_capability(current_cred(), CAP_SYS_ADMIN,
 +				   CAP_OPT_NOAUDIT, true);
  }
/* binprm security operations */
@@@ -3536,8 -3538,8 +3536,8 @@@ static int selinux_inode_copy_up_xattr(
     * xattrs up.  Instead, filter out SELinux-related xattrs following
     * policy load.
     */
 -	if (selinux_initialized() && strcmp(name, XATTR_NAME_SELINUX) == 0)
 -		return 1; /* Discard */
 +	if (selinux_initialized() && !strcmp(name, XATTR_NAME_SELINUX))
 +		return -ECANCELED; /* Discard */
    /*
     * Any other attribute apart from SELINUX is not claimed, supported
     * by selinux.
@@@ -3850,17 -3852,7 +3850,17 @@@ static int selinux_file_mprotect(struc
    if (default_noexec &&
        (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
    	int rc = 0;
 -		if (vma_is_initial_heap(vma)) {
 +		/*
 +		 * We don't use the vma_is_initial_heap() helper as it has
 +		 * a history of problems and is currently broken on systems
 +		 * where there is no heap, e.g. brk == start_brk.  Before
 +		 * replacing the conditional below with vma_is_initial_heap(),
 +		 * or something similar, please ensure that the logic is the
 +		 * same as what we have below or you have tested every possible
 +		 * corner case you can think to test.
 +		 */
 +		if (vma->vm_start >= vma->vm_mm->start_brk &&
 +		    vma->vm_end <= vma->vm_mm->brk) {
    		rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
    				  PROCESS__EXECHEAP, NULL);
    	} else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
@@@ -3948,7 -3940,7 +3948,7 @@@ static int selinux_file_send_sigiotask(
    struct file_security_struct *fsec;
/* struct fown_struct is never outside the context of a struct file */
 -	file = container_of(fown, struct file, f_owner);
 +	file = fown->file;
fsec = selinux_file(file);
@@@ -4592,7 -4584,7 +4592,7 @@@ static int socket_sockcreate_sid(const
static int sock_has_perm(struct sock *sk, u32 perms)
  {
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    struct common_audit_data ad;
    struct lsm_network_audit net;
@@@ -4660,7 -4652,7 +4660,7 @@@ static int selinux_socket_post_create(s
    isec->initialized = LABEL_INITIALIZED;
if (sock->sk) {
 -		sksec = sock->sk->sk_security;
 +		sksec = selinux_sock(sock->sk);
    	sksec->sclass = sclass;
    	sksec->sid = sid;
    	/* Allows detection of the first association on this socket */
@@@ -4676,8 -4668,8 +4676,8 @@@
  static int selinux_socket_socketpair(struct socket *socka,
    			     struct socket *sockb)
  {
 -	struct sk_security_struct *sksec_a = socka->sk->sk_security;
 -	struct sk_security_struct *sksec_b = sockb->sk->sk_security;
 +	struct sk_security_struct *sksec_a = selinux_sock(socka->sk);
 +	struct sk_security_struct *sksec_b = selinux_sock(sockb->sk);
sksec_a->peer_sid = sksec_b->sid;
    sksec_b->peer_sid = sksec_a->sid;
@@@ -4692,7 -4684,7 +4692,7 @@@
  static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
  {
    struct sock *sk = sock->sk;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    u16 family;
    int err;
@@@ -4832,7 -4824,7 +4832,7 @@@ static int selinux_socket_connect_helpe
    				 struct sockaddr *address, int addrlen)
  {
    struct sock *sk = sock->sk;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    int err;
err = sock_has_perm(sk, SOCKET__CONNECT);
@@@ -5010,9 -5002,9 +5010,9 @@@ static int selinux_socket_unix_stream_c
    				      struct sock *other,
    				      struct sock *newsk)
  {
 -	struct sk_security_struct *sksec_sock = sock->sk_security;
 -	struct sk_security_struct *sksec_other = other->sk_security;
 -	struct sk_security_struct *sksec_new = newsk->sk_security;
 +	struct sk_security_struct *sksec_sock = selinux_sock(sock);
 +	struct sk_security_struct *sksec_other = selinux_sock(other);
 +	struct sk_security_struct *sksec_new = selinux_sock(newsk);
    struct common_audit_data ad;
    struct lsm_network_audit net;
    int err;
@@@ -5041,8 -5033,8 +5041,8 @@@
  static int selinux_socket_unix_may_send(struct socket *sock,
    				struct socket *other)
  {
 -	struct sk_security_struct *ssec = sock->sk->sk_security;
 -	struct sk_security_struct *osec = other->sk->sk_security;
 +	struct sk_security_struct *ssec = selinux_sock(sock->sk);
 +	struct sk_security_struct *osec = selinux_sock(other->sk);
    struct common_audit_data ad;
    struct lsm_network_audit net;
@@@ -5079,7 -5071,7 +5079,7 @@@ static int selinux_sock_rcv_skb_compat(
    			       u16 family)
  {
    int err = 0;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    u32 sk_sid = sksec->sid;
    struct common_audit_data ad;
    struct lsm_network_audit net;
@@@ -5108,7 -5100,7 +5108,7 @@@
  static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
  {
    int err, peerlbl_active, secmark_active;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    u16 family = sk->sk_family;
    u32 sk_sid = sksec->sid;
    struct common_audit_data ad;
@@@ -5176,7 -5168,7 +5176,7 @@@ static int selinux_socket_getpeersec_st
    int err = 0;
    char *scontext = NULL;
    u32 scontext_len;
 -	struct sk_security_struct *sksec = sock->sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sock->sk);
    u32 peer_sid = SECSID_NULL;
if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET ||
@@@ -5236,27 -5228,34 +5236,27 @@@ static int selinux_socket_getpeersec_dg
static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
  {
 -	struct sk_security_struct *sksec;
 -
 -	sksec = kzalloc(sizeof(*sksec), priority);
 -	if (!sksec)
 -		return -ENOMEM;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
sksec->peer_sid = SECINITSID_UNLABELED;
    sksec->sid = SECINITSID_UNLABELED;
    sksec->sclass = SECCLASS_SOCKET;
    selinux_netlbl_sk_security_reset(sksec);
 -	sk->sk_security = sksec;
return 0;
  }
static void selinux_sk_free_security(struct sock *sk)
  {
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
-	sk->sk_security = NULL;
    selinux_netlbl_sk_security_free(sksec);
 -	kfree(sksec);
  }
static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
  {
 -	struct sk_security_struct *sksec = sk->sk_security;
 -	struct sk_security_struct *newsksec = newsk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
 +	struct sk_security_struct *newsksec = selinux_sock(newsk);
newsksec->sid = sksec->sid;
    newsksec->peer_sid = sksec->peer_sid;
@@@ -5270,7 -5269,7 +5270,7 @@@ static void selinux_sk_getsecid(const s
    if (!sk)
    	*secid = SECINITSID_ANY_SOCKET;
    else {
 -		const struct sk_security_struct *sksec = sk->sk_security;
 +		const struct sk_security_struct *sksec = selinux_sock(sk);
*secid = sksec->sid;
    }
@@@ -5280,7 -5279,7 +5280,7 @@@ static void selinux_sock_graft(struct s
  {
    struct inode_security_struct *isec =
    	inode_security_novalidate(SOCK_INODE(parent));
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
        sk->sk_family == PF_UNIX)
@@@ -5297,7 -5296,7 +5297,7 @@@ static int selinux_sctp_process_new_ass
  {
    struct sock *sk = asoc->base.sk;
    u16 family = sk->sk_family;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    struct common_audit_data ad;
    struct lsm_network_audit net;
    int err;
@@@ -5352,7 -5351,7 +5352,7 @@@
  static int selinux_sctp_assoc_request(struct sctp_association *asoc,
    			      struct sk_buff *skb)
  {
 -	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
    u32 conn_sid;
    int err;
@@@ -5385,7 -5384,7 +5385,7 @@@
  static int selinux_sctp_assoc_established(struct sctp_association *asoc,
    				  struct sk_buff *skb)
  {
 -	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
if (!selinux_policycap_extsockclass())
    	return 0;
@@@ -5484,8 -5483,8 +5484,8 @@@ static int selinux_sctp_bind_connect(st
  static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
    			  struct sock *newsk)
  {
 -	struct sk_security_struct *sksec = sk->sk_security;
 -	struct sk_security_struct *newsksec = newsk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
 +	struct sk_security_struct *newsksec = selinux_sock(newsk);
/* If policy does not support SECCLASS_SCTP_SOCKET then call
     * the non-sctp clone version.
@@@ -5501,8 -5500,8 +5501,8 @@@
static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
  {
 -	struct sk_security_struct *ssksec = ssk->sk_security;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *ssksec = selinux_sock(ssk);
 +	struct sk_security_struct *sksec = selinux_sock(sk);
ssksec->sclass = sksec->sclass;
    ssksec->sid = sksec->sid;
@@@ -5517,7 -5516,7 +5517,7 @@@
  static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
    			     struct request_sock *req)
  {
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    int err;
    u16 family = req->rsk_ops->family;
    u32 connsid;
@@@ -5538,7 -5537,7 +5538,7 @@@
  static void selinux_inet_csk_clone(struct sock *newsk,
    			   const struct request_sock *req)
  {
 -	struct sk_security_struct *newsksec = newsk->sk_security;
 +	struct sk_security_struct *newsksec = selinux_sock(newsk);
newsksec->sid = req->secid;
    newsksec->peer_sid = req->peer_secid;
@@@ -5555,7 -5554,7 +5555,7 @@@
  static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
  {
    u16 family = sk->sk_family;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
/* handle mapped IPv4 packets arriving via IPv6 sockets */
    if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
@@@ -5586,14 -5585,24 +5586,14 @@@ static void selinux_req_classify_flow(c
    flic->flowic_secid = req->secid;
  }
-static int selinux_tun_dev_alloc_security(void **security)
 +static int selinux_tun_dev_alloc_security(void *security)
  {
 -	struct tun_security_struct *tunsec;
 +	struct tun_security_struct *tunsec = selinux_tun_dev(security);
-	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
 -	if (!tunsec)
 -		return -ENOMEM;
    tunsec->sid = current_sid();
 -
 -	*security = tunsec;
    return 0;
  }
-static void selinux_tun_dev_free_security(void *security)
 -{
 -	kfree(security);
 -}
 -
  static int selinux_tun_dev_create(void)
  {
    u32 sid = current_sid();
@@@ -5611,7 -5620,7 +5611,7 @@@
static int selinux_tun_dev_attach_queue(void *security)
  {
 -	struct tun_security_struct *tunsec = security;
 +	struct tun_security_struct *tunsec = selinux_tun_dev(security);
return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
    		    TUN_SOCKET__ATTACH_QUEUE, NULL);
@@@ -5619,8 -5628,8 +5619,8 @@@
static int selinux_tun_dev_attach(struct sock *sk, void *security)
  {
 -	struct tun_security_struct *tunsec = security;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct tun_security_struct *tunsec = selinux_tun_dev(security);
 +	struct sk_security_struct *sksec = selinux_sock(sk);
/* we don't currently perform any NetLabel based labeling here and it
     * isn't clear that we would want to do so anyway; while we could apply
@@@ -5637,7 -5646,7 +5637,7 @@@
static int selinux_tun_dev_open(void *security)
  {
 -	struct tun_security_struct *tunsec = security;
 +	struct tun_security_struct *tunsec = selinux_tun_dev(security);
    u32 sid = current_sid();
    int err;
@@@ -5743,7 -5752,7 +5743,7 @@@ static unsigned int selinux_ip_output(v
    		return NF_ACCEPT;
/* standard practice, label using the parent socket */
 -		sksec = sk->sk_security;
 +		sksec = selinux_sock(sk);
    	sid = sksec->sid;
    } else
    	sid = SECINITSID_KERNEL;
@@@ -5766,7 -5775,7 +5766,7 @@@ static unsigned int selinux_ip_postrout
    sk = skb_to_full_sk(skb);
    if (sk == NULL)
    	return NF_ACCEPT;
 -	sksec = sk->sk_security;
 +	sksec = selinux_sock(sk);
ad_net_init_from_iif(&ad, &net, state->out->ifindex, state->pf);
    if (selinux_parse_skb(skb, &ad, NULL, 0, &proto))
@@@ -5855,7 -5864,7 +5855,7 @@@ static unsigned int selinux_ip_postrout
    	u32 skb_sid;
    	struct sk_security_struct *sksec;
-		sksec = sk->sk_security;
 +		sksec = selinux_sock(sk);
    	if (selinux_skb_peerlbl_sid(skb, family, &skb_sid))
    		return NF_DROP;
    	/* At this point, if the returned skb peerlbl is SECSID_NULL
@@@ -5884,7 -5893,7 +5884,7 @@@
    } else {
    	/* Locally generated packet, fetch the security label from the
    	 * associated socket. */
 -		struct sk_security_struct *sksec = sk->sk_security;
 +		struct sk_security_struct *sksec = selinux_sock(sk);
    	peer_sid = sksec->sid;
    	secmark_perm = PACKET__SEND;
    }
@@@ -5927,7 -5936,7 +5927,7 @@@ static int selinux_netlink_send(struct 
    unsigned int data_len = skb->len;
    unsigned char *data = skb->data;
    struct nlmsghdr *nlh;
 -	struct sk_security_struct *sksec = sk->sk_security;
 +	struct sk_security_struct *sksec = selinux_sock(sk);
    u16 sclass = sksec->sclass;
    u32 perm;
@@@ -6641,8 -6650,8 +6641,8 @@@ static int selinux_inode_notifysecctx(s
   */
  static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
  {
 -	return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
 -				     ctx, ctxlen, 0);
 +	return __vfs_setxattr_locked(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
 +				     ctx, ctxlen, 0, NULL);
  }
static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
@@@ -6661,7 -6670,11 +6661,7 @@@ static int selinux_key_alloc(struct ke
    		     unsigned long flags)
  {
    const struct task_security_struct *tsec;
 -	struct key_security_struct *ksec;
 -
 -	ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL);
 -	if (!ksec)
 -		return -ENOMEM;
 +	struct key_security_struct *ksec = selinux_key(k);
tsec = selinux_cred(cred);
    if (tsec->keycreate_sid)
@@@ -6669,9 -6682,18 +6669,9 @@@
    else
    	ksec->sid = tsec->sid;
-	k->security = ksec;
    return 0;
  }
-static void selinux_key_free(struct key *k)
 -{
 -	struct key_security_struct *ksec = k->security;
 -
 -	k->security = NULL;
 -	kfree(ksec);
 -}
 -
  static int selinux_key_permission(key_ref_t key_ref,
    			  const struct cred *cred,
    			  enum key_need_perm need_perm)
@@@ -6712,14 -6734,14 +6712,14 @@@
sid = cred_sid(cred);
    key = key_ref_to_ptr(key_ref);
 -	ksec = key->security;
 +	ksec = selinux_key(key);
return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL);
  }
static int selinux_key_getsecurity(struct key *key, char **_buffer)
  {
 -	struct key_security_struct *ksec = key->security;
 +	struct key_security_struct *ksec = selinux_key(key);
    char *context = NULL;
    unsigned len;
    int rc;
@@@ -6735,7 -6757,7 +6735,7 @@@
  #ifdef CONFIG_KEY_NOTIFICATIONS
  static int selinux_watch_key(struct key *key)
  {
 -	struct key_security_struct *ksec = key->security;
 +	struct key_security_struct *ksec = selinux_key(key);
    u32 sid = current_sid();
return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, KEY__VIEW, NULL);
@@@ -6789,13 -6811,23 +6789,13 @@@ static int selinux_ib_endport_manage_su
    		    INFINIBAND_ENDPORT__MANAGE_SUBNET, &ad);
  }
-static int selinux_ib_alloc_security(void **ib_sec)
 +static int selinux_ib_alloc_security(void *ib_sec)
  {
 -	struct ib_security_struct *sec;
 +	struct ib_security_struct *sec = selinux_ib(ib_sec);
-	sec = kzalloc(sizeof(*sec), GFP_KERNEL);
 -	if (!sec)
 -		return -ENOMEM;
    sec->sid = current_sid();
 -
 -	*ib_sec = sec;
    return 0;
  }
 -
 -static void selinux_ib_free_security(void *ib_sec)
 -{
 -	kfree(ib_sec);
 -}
  #endif
#ifdef CONFIG_BPF_SYSCALL
@@@ -6933,7 -6965,7 +6933,7 @@@ static void selinux_bpf_prog_free(struc
  }
static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- 				    struct path *path)
+ 				    const struct path *path)
  {
    struct bpf_security_struct *bpfsec;
@@@ -6961,16 -6993,9 +6961,16 @@@ struct lsm_blob_sizes selinux_blob_size
    .lbs_file = sizeof(struct file_security_struct),
    .lbs_inode = sizeof(struct inode_security_struct),
    .lbs_ipc = sizeof(struct ipc_security_struct),
 +	.lbs_key = sizeof(struct key_security_struct),
    .lbs_msg_msg = sizeof(struct msg_security_struct),
 +#ifdef CONFIG_PERF_EVENTS
 +	.lbs_perf_event = sizeof(struct perf_event_security_struct),
 +#endif
 +	.lbs_sock = sizeof(struct sk_security_struct),
    .lbs_superblock = sizeof(struct superblock_security_struct),
    .lbs_xattr_count = SELINUX_INODE_INIT_XATTRS,
 +	.lbs_tun_dev = sizeof(struct tun_security_struct),
 +	.lbs_ib = sizeof(struct ib_security_struct),
  };
#ifdef CONFIG_PERF_EVENTS
@@@ -6997,12 -7022,24 +6997,12 @@@ static int selinux_perf_event_alloc(str
  {
    struct perf_event_security_struct *perfsec;
-	perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL);
 -	if (!perfsec)
 -		return -ENOMEM;
 -
 +	perfsec = selinux_perf_event(event->security);
    perfsec->sid = current_sid();
 -	event->security = perfsec;
return 0;
  }
-static void selinux_perf_event_free(struct perf_event *event)
 -{
 -	struct perf_event_security_struct *perfsec = event->security;
 -
 -	event->security = NULL;
 -	kfree(perfsec);
 -}
 -
  static int selinux_perf_event_read(struct perf_event *event)
  {
    struct perf_event_security_struct *perfsec = event->security;
@@@ -7270,6 -7307,7 +7270,6 @@@ static struct security_hook_list selinu
    LSM_HOOK_INIT(secmark_refcount_inc, selinux_secmark_refcount_inc),
    LSM_HOOK_INIT(secmark_refcount_dec, selinux_secmark_refcount_dec),
    LSM_HOOK_INIT(req_classify_flow, selinux_req_classify_flow),
 -	LSM_HOOK_INIT(tun_dev_free_security, selinux_tun_dev_free_security),
    LSM_HOOK_INIT(tun_dev_create, selinux_tun_dev_create),
    LSM_HOOK_INIT(tun_dev_attach_queue, selinux_tun_dev_attach_queue),
    LSM_HOOK_INIT(tun_dev_attach, selinux_tun_dev_attach),
@@@ -7278,6 -7316,7 +7278,6 @@@
    LSM_HOOK_INIT(ib_pkey_access, selinux_ib_pkey_access),
    LSM_HOOK_INIT(ib_endport_manage_subnet,
    	      selinux_ib_endport_manage_subnet),
 -	LSM_HOOK_INIT(ib_free_security, selinux_ib_free_security),
  #endif
  #ifdef CONFIG_SECURITY_NETWORK_XFRM
    LSM_HOOK_INIT(xfrm_policy_free_security, selinux_xfrm_policy_free),
@@@ -7291,6 -7330,7 +7291,6 @@@
  #endif
#ifdef CONFIG_KEYS
 -	LSM_HOOK_INIT(key_free, selinux_key_free),
    LSM_HOOK_INIT(key_permission, selinux_key_permission),
    LSM_HOOK_INIT(key_getsecurity, selinux_key_getsecurity),
  #ifdef CONFIG_KEY_NOTIFICATIONS
@@@ -7315,6 -7355,7 +7315,6 @@@
#ifdef CONFIG_PERF_EVENTS
    LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open),
 -	LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free),
    LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read),
    LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write),
  #endif
-- 
LinuxNextTracking