The following commit has been merged in the master branch: commit fa8380a06bd0523e51f826520aac1beb8c585521 Merge: 68e5c7d4cefb66de3953a874e670ec8f1ce86a24 37d3dd663f7485bf3e444f40abee3c68f53158cb Author: Linus Torvalds torvalds@linux-foundation.org Date: Tue Sep 24 14:54:26 2024 -0700
Merge tag 'bpf-next-6.12-struct-fd' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf 'struct fd' updates from Alexei Starovoitov: "This includes struct_fd BPF changes from Al and Andrii"
* tag 'bpf-next-6.12-struct-fd' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: bpf: convert bpf_token_create() to CLASS(fd, ...) security,bpf: constify struct path in bpf_token_create() LSM hook bpf: more trivial fdget() conversions bpf: trivial conversions for fdget() bpf: switch maps to CLASS(fd, ...) bpf: factor out fetching bpf_map from FD and adding it to used_maps list bpf: switch fdget_raw() uses to CLASS(fd_raw, ...) bpf: convert __bpf_prog_get() to CLASS(fd, ...)
diff --combined include/linux/bpf.h index 0c3893c471711,9f35df07e86d7..19d8ca8ac960f --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -695,11 -695,6 +695,11 @@@ enum bpf_type_flag /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
+ /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@@ -737,6 -732,8 +737,6 @@@ enum bpf_arg_type ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ @@@ -747,7 -744,7 +747,7 @@@ ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX,
@@@ -811,12 -808,12 +811,12 @@@ struct bpf_func_proto bool gpl_only; bool pkt_access; bool might_sleep; - /* set to true if helper follows contract for gcc/llvm - * attribute no_caller_saved_registers: + /* set to true if helper follows contract for llvm + * attribute bpf_fastcall: * - void functions do not scratch r0 * - functions taking N arguments scratch only registers r1-rN */ - bool allow_nocsr; + bool allow_fastcall; enum bpf_return_type ret_type; union { struct { @@@ -977,8 -974,6 +977,8 @@@ struct bpf_verifier_ops struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); + int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog, + s16 ctx_stack_off); int (*gen_ld_abs)(const struct bpf_insn *orig, struct bpf_insn *insn_buf); u32 (*convert_ctx_access)(enum bpf_access_type type, @@@ -2246,7 -2241,16 +2246,16 @@@ void __bpf_obj_drop_impl(void *p, cons
struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); - struct bpf_map *__bpf_map_get(struct fd f); + + static inline struct bpf_map *__bpf_map_get(struct fd f) + { + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; + } + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); @@@ -3203,9 -3207,7 +3212,9 @@@ extern const struct bpf_func_proto bpf_ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_stackid_proto_pe; extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; @@@ -3213,7 -3215,6 +3222,7 @@@ extern const struct bpf_func_proto bpf_ extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; diff --combined include/linux/lsm_hook_defs.h index 1d59513bf2301,462b553782410..9eca013aa5e1f --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@@ -48,7 -48,7 +48,7 @@@ LSM_HOOK(int, 0, quota_on, struct dentr LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) @@@ -114,7 -114,6 +114,7 @@@ LSM_HOOK(int, 0, path_notify, const str unsigned int obj_type) LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) +LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security) LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count) @@@ -180,8 -179,6 +180,8 @@@ LSM_HOOK(void, LSM_RET_VOID, inode_gets LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, const char *name) +LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode, + enum lsm_integrity_type type, const void *value, size_t size) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) @@@ -356,7 -353,8 +356,7 @@@ LSM_HOOK(void, LSM_RET_VOID, secmark_re LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, struct flowi_common *flic) -LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) -LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) +LSM_HOOK(int, 0, tun_dev_alloc_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) @@@ -376,7 -374,8 +376,7 @@@ LSM_HOOK(int, 0, mptcp_add_subflow, str LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey) LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name, u8 port_num) -LSM_HOOK(int, 0, ib_alloc_security, void **sec) -LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec) +LSM_HOOK(int, 0, ib_alloc_security, void *sec) #endif /* CONFIG_SECURITY_INFINIBAND */
#ifdef CONFIG_SECURITY_NETWORK_XFRM @@@ -404,6 -403,7 +404,6 @@@ LSM_HOOK(int, 0, xfrm_decode_session, s #ifdef CONFIG_KEYS LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) -LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) @@@ -431,7 -431,7 +431,7 @@@ LSM_HOOK(int, 0, bpf_prog_load, struct struct bpf_token *token) LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) @@@ -442,6 -442,7 +442,6 @@@ LSM_HOOK(int, 0, locked_down, enum lock #ifdef CONFIG_PERF_EVENTS LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) -LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ @@@ -451,10 -452,3 +451,10 @@@ LSM_HOOK(int, 0, uring_override_creds, LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ + +LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) + +LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev) +LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev) +LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev, + enum lsm_integrity_type type, const void *value, size_t size) diff --combined include/linux/security.h index c37c32ebbdcd8,31523a2c71c4d..b86ec2afc6910 --- a/include/linux/security.h +++ b/include/linux/security.h @@@ -83,18 -83,6 +83,18 @@@ enum lsm_event LSM_POLICY_CHANGE, };
+struct dm_verity_digest { + const char *alg; + const u8 *digest; + size_t digest_len; +}; + +enum lsm_integrity_type { + LSM_INT_DMVERITY_SIG_VALID, + LSM_INT_DMVERITY_ROOTHASH, + LSM_INT_FSVERITY_BUILTINSIG_VALID, +}; + /* * These are reasons that can be passed to the security_locked_down() * LSM hook. Lockdown reasons that protect kernel integrity (ie, the @@@ -411,9 -399,6 +411,9 @@@ int security_inode_listsecurity(struct void security_inode_getsecid(struct inode *inode, u32 *secid); int security_inode_copy_up(struct dentry *src, struct cred **new); int security_inode_copy_up_xattr(struct dentry *src, const char *name); +int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, const void *value, + size_t size); int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); @@@ -524,11 -509,6 +524,11 @@@ int security_inode_getsecctx(struct ino int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); +int security_bdev_alloc(struct block_device *bdev); +void security_bdev_free(struct block_device *bdev); +int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, const void *value, + size_t size); #else /* CONFIG_SECURITY */
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@@ -654,7 -634,7 +654,7 @@@ static inline int security_settime64(co
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages)); + return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages)); }
static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) @@@ -1030,13 -1010,6 +1030,13 @@@ static inline int security_inode_copy_u return 0; }
+static inline int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, + const void *value, size_t size) +{ + return 0; +} + static inline int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn) { @@@ -1510,23 -1483,6 +1510,23 @@@ static inline int lsm_fill_user_ctx(str { return -EOPNOTSUPP; } + +static inline int security_bdev_alloc(struct block_device *bdev) +{ + return 0; +} + +static inline void security_bdev_free(struct block_device *bdev) +{ +} + +static inline int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, + const void *value, size_t size) +{ + return 0; +} + #endif /* CONFIG_SECURITY */
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@@ -2134,7 -2090,6 +2134,7 @@@ struct dentry *securityfs_create_symlin const char *target, const struct inode_operations *iops); extern void securityfs_remove(struct dentry *dentry); +extern void securityfs_recursive_remove(struct dentry *dentry);
#else /* CONFIG_SECURITYFS */
@@@ -2182,7 -2137,7 +2182,7 @@@ extern int security_bpf_prog_load(struc struct bpf_token *token); extern void security_bpf_prog_free(struct bpf_prog *prog); extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path); + const struct path *path); extern void security_bpf_token_free(struct bpf_token *token); extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); extern int security_bpf_token_capable(const struct bpf_token *token, int cap); @@@ -2222,7 -2177,7 +2222,7 @@@ static inline void security_bpf_prog_fr { }
static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) { return 0; } @@@ -2301,12 -2256,4 +2301,12 @@@ static inline int security_uring_cmd(st #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */
+#ifdef CONFIG_SECURITY +extern void security_initramfs_populated(void); +#else +static inline void security_initramfs_populated(void) +{ +} +#endif /* CONFIG_SECURITY */ + #endif /* ! __LINUX_SECURITY_H */ diff --combined kernel/bpf/btf.c index 83bbf935c5628,c4506d788c858..75e4fe83c5091 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@@ -212,7 -212,7 +212,7 @@@ enum btf_kfunc_hook BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, - BTF_KFUNC_HOOK_CGROUP_SKB, + BTF_KFUNC_HOOK_CGROUP, BTF_KFUNC_HOOK_SCHED_ACT, BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, @@@ -790,7 -790,7 +790,7 @@@ const char *btf_str_by_offset(const str return NULL; }
-static bool __btf_name_valid(const struct btf *btf, u32 offset) +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) { /* offset must be valid */ const char *src = btf_str_by_offset(btf, offset); @@@ -811,6 -811,11 +811,6 @@@ return !*src; }
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) -{ - return __btf_name_valid(btf, offset); -} - /* Allow any printable character in DATASEC names */ static bool btf_name_valid_section(const struct btf *btf, u32 offset) { @@@ -818,11 -823,9 +818,11 @@@ const char *src = btf_str_by_offset(btf, offset); const char *src_limit;
+ if (!*src) + return false; + /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; - src++; while (*src && src < src_limit) { if (!isprint(*src)) return false; @@@ -3756,7 -3759,6 +3756,7 @@@ static int btf_find_field(const struct return -EINVAL; }
+/* Callers have to ensure the life cycle of btf if it is program BTF */ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { @@@ -3785,6 -3787,7 +3785,6 @@@ field->kptr.dtor = NULL; id = info->kptr.type_id; kptr_btf = (struct btf *)btf; - btf_get(kptr_btf); goto found_dtor; } if (id < 0) @@@ -4626,7 -4629,7 +4626,7 @@@ static s32 btf_var_check_meta(struct bt }
if (!t->name_off || - !__btf_name_valid(env->btf, t->name_off)) { + !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } @@@ -5514,72 -5517,36 +5514,72 @@@ static const char *alloc_obj_fields[] static struct btf_struct_metas * btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) { - union { - struct btf_id_set set; - struct { - u32 _cnt; - u32 _ids[ARRAY_SIZE(alloc_obj_fields)]; - } _arr; - } aof; struct btf_struct_metas *tab = NULL; + struct btf_id_set *aof; int i, n, id, ret;
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0); BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
- memset(&aof, 0, sizeof(aof)); + aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN); + if (!aof) + return ERR_PTR(-ENOMEM); + aof->cnt = 0; + for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { /* Try to find whether this special type exists in user BTF, and * if so remember its ID so we can easily find it among members * of structs that we iterate in the next loop. */ + struct btf_id_set *new_aof; + id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); if (id < 0) continue; - aof.set.ids[aof.set.cnt++] = id; + + new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_aof) { + ret = -ENOMEM; + goto free_aof; + } + aof = new_aof; + aof->ids[aof->cnt++] = id; + } + + n = btf_nr_types(btf); + for (i = 1; i < n; i++) { + /* Try to find if there are kptrs in user BTF and remember their ID */ + struct btf_id_set *new_aof; + struct btf_field_info tmp; + const struct btf_type *t; + + t = btf_type_by_id(btf, i); + if (!t) { + ret = -EINVAL; + goto free_aof; + } + + ret = btf_find_kptr(btf, t, 0, 0, &tmp); + if (ret != BTF_FIELD_FOUND) + continue; + + new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_aof) { + ret = -ENOMEM; + goto free_aof; + } + aof = new_aof; + aof->ids[aof->cnt++] = i; }
- if (!aof.set.cnt) + if (!aof->cnt) { + kfree(aof); return NULL; - sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL); + } + sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
- n = btf_nr_types(btf); for (i = 1; i < n; i++) { struct btf_struct_metas *new_tab; const struct btf_member *member; @@@ -5589,13 -5556,17 +5589,13 @@@ int j, tab_cnt;
t = btf_type_by_id(btf, i); - if (!t) { - ret = -EINVAL; - goto free; - } if (!__btf_type_is_struct(t)) continue;
cond_resched();
for_each_member(j, t, member) { - if (btf_id_set_contains(&aof.set, member->type)) + if (btf_id_set_contains(aof, member->type)) goto parse; } continue; @@@ -5614,8 -5585,7 +5614,8 @@@ type = &tab->types[tab->cnt]; type->btf_id = i; record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | - BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size); + BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT | + BPF_KPTR, t->size); /* The record cannot be unset, treat it as an error if so */ if (IS_ERR_OR_NULL(record)) { ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; @@@ -5624,12 -5594,9 +5624,12 @@@ type->record = record; tab->cnt++; } + kfree(aof); return tab; free: btf_struct_metas_free(tab); +free_aof: + kfree(aof); return ERR_PTR(ret); }
@@@ -6276,11 -6243,12 +6276,11 @@@ static struct btf *btf_parse_module(con btf->kernel_btf = true; snprintf(btf->name, sizeof(btf->name), "%s", module_name);
- btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN); + btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN); if (!btf->data) { err = -ENOMEM; goto errout; } - memcpy(btf->data, data, data_size); btf->data_size = data_size;
err = btf_parse_hdr(env); @@@ -6315,7 -6283,7 +6315,7 @@@
errout: btf_verifier_env_free(env); - if (base_btf != vmlinux_btf) + if (!IS_ERR(base_btf) && base_btf != vmlinux_btf) btf_free(base_btf); if (btf) { kvfree(btf->data); @@@ -6558,9 -6526,6 +6558,9 @@@ bool btf_ctx_access(int off, int size, if (prog_args_trusted(prog)) info->reg_type |= PTR_TRUSTED;
+ if (btf_param_match_suffix(btf, &args[arg], "__nullable")) + info->reg_type |= PTR_MAYBE_NULL; + if (tgt_prog) { enum bpf_prog_type tgt_type;
@@@ -7711,21 -7676,16 +7711,16 @@@ int btf_new_fd(const union bpf_attr *at struct btf *btf_get_by_fd(int fd) { struct btf *btf; - struct fd f; + CLASS(fd, f)(fd);
- f = fdget(fd); - - if (!fd_file(f)) + if (fd_empty(f)) return ERR_PTR(-EBADF);
- if (fd_file(f)->f_op != &btf_fops) { - fdput(f); + if (fd_file(f)->f_op != &btf_fops) return ERR_PTR(-EINVAL); - }
btf = fd_file(f)->private_data; refcount_inc(&btf->refcnt); - fdput(f);
return btf; } @@@ -8087,44 -8047,15 +8082,44 @@@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE
+/* Validate well-formedness of iter argument type. + * On success, return positive BTF ID of iter state's STRUCT type. + * On error, negative error is returned. + */ +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx) +{ + const struct btf_param *arg; + const struct btf_type *t; + const char *name; + int btf_id; + + if (btf_type_vlen(func) <= arg_idx) + return -EINVAL; + + arg = &btf_params(func)[arg_idx]; + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + t = btf_type_skip_modifiers(btf, t->type, &btf_id); + if (!t || !__btf_type_is_struct(t)) + return -EINVAL; + + name = btf_name_by_offset(btf, t->name_off); + if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) + return -EINVAL; + + return btf_id; +} + static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, const struct btf_type *func, u32 func_flags) { u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); - const char *name, *sfx, *iter_name; - const struct btf_param *arg; + const char *sfx, *iter_name; const struct btf_type *t; char exp_name[128]; u32 nr_args; + int btf_id;
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ if (!flags || (flags & (flags - 1))) @@@ -8135,21 -8066,28 +8130,21 @@@ if (nr_args < 1) return -EINVAL;
- arg = &btf_params(func)[0]; - t = btf_type_skip_modifiers(btf, arg->type, NULL); - if (!t || !btf_type_is_ptr(t)) - return -EINVAL; - t = btf_type_skip_modifiers(btf, t->type, NULL); - if (!t || !__btf_type_is_struct(t)) - return -EINVAL; - - name = btf_name_by_offset(btf, t->name_off); - if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) - return -EINVAL; + btf_id = btf_check_iter_arg(btf, func, 0); + if (btf_id < 0) + return btf_id;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to * fit nicely in stack slots */ + t = btf_type_by_id(btf, btf_id); if (t->size == 0 || (t->size % 8)) return -EINVAL;
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *) * naming pattern */ - iter_name = name + sizeof(ITER_PREFIX) - 1; + iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1; if (flags & KF_ITER_NEW) sfx = "new"; else if (flags & KF_ITER_NEXT) @@@ -8364,19 -8302,13 +8359,19 @@@ static int bpf_prog_type_to_kfunc_hook( case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_LSM: return BTF_KFUNC_HOOK_TRACING; case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - return BTF_KFUNC_HOOK_CGROUP_SKB; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: @@@ -8952,7 -8884,6 +8947,7 @@@ int bpf_core_apply(struct bpf_core_ctx struct bpf_core_cand_list cands = {}; struct bpf_core_relo_res targ_res; struct bpf_core_spec *specs; + const struct btf_type *type; int err;
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" @@@ -8962,13 -8893,6 +8957,13 @@@ if (!specs) return -ENOMEM;
+ type = btf_type_by_id(ctx->btf, relo->type_id); + if (!type) { + bpf_log(ctx->log, "relo #%u: bad type id %u\n", + relo_idx, relo->type_id); + return -EINVAL; + } + if (need_cands) { struct bpf_cand_cache *cc; int i; diff --combined kernel/bpf/syscall.c index 8386f25bc532c,65dcd92d0b2c5..a8f1808a1ca54 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@@ -550,8 -550,7 +550,8 @@@ void btf_record_free(struct btf_record case BPF_KPTR_PERCPU: if (rec->fields[i].kptr.module) module_put(rec->fields[i].kptr.module); - btf_put(rec->fields[i].kptr.btf); + if (btf_is_kernel(rec->fields[i].kptr.btf)) + btf_put(rec->fields[i].kptr.btf); break; case BPF_LIST_HEAD: case BPF_LIST_NODE: @@@ -597,8 -596,7 +597,8 @@@ struct btf_record *btf_record_dup(cons case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: - btf_get(fields[i].kptr.btf); + if (btf_is_kernel(fields[i].kptr.btf)) + btf_get(fields[i].kptr.btf); if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { ret = -ENXIO; goto free; @@@ -735,11 -733,15 +735,11 @@@ void bpf_obj_free_fields(const struct b } }
-/* called from workqueue */ -static void bpf_map_free_deferred(struct work_struct *work) +static void bpf_map_free(struct bpf_map *map) { - struct bpf_map *map = container_of(work, struct bpf_map, work); struct btf_record *rec = map->record; struct btf *btf = map->btf;
- security_bpf_map_free(map); - bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); /* Delay freeing of btf_record for maps, as map_free @@@ -758,16 -760,6 +758,16 @@@ btf_put(btf); }
+/* called from workqueue */ +static void bpf_map_free_deferred(struct work_struct *work) +{ + struct bpf_map *map = container_of(work, struct bpf_map, work); + + security_bpf_map_free(map); + bpf_map_release_memcg(map); + bpf_map_free(map); +} + static void bpf_map_put_uref(struct bpf_map *map) { if (atomic64_dec_and_test(&map->usercnt)) { @@@ -1419,27 -1411,13 +1419,12 @@@ static int map_create(union bpf_attr *a free_map_sec: security_bpf_map_free(map); free_map: - btf_put(map->btf); - map->ops->map_free(map); + bpf_map_free(map); put_token: bpf_token_put(token); return err; }
- /* if error is returned, fd is released. - * On success caller should complete fd access with matching fdput() - */ - struct bpf_map *__bpf_map_get(struct fd f) - { - if (!fd_file(f)) - return ERR_PTR(-EBADF); - if (fd_file(f)->f_op != &bpf_map_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - return fd_file(f)->private_data; - } - void bpf_map_inc(struct bpf_map *map) { atomic64_inc(&map->refcnt); @@@ -1455,15 -1433,11 +1440,11 @@@ EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref
struct bpf_map *bpf_map_get(u32 ufd) { - struct fd f = fdget(ufd); - struct bpf_map *map; + CLASS(fd, f)(ufd); + struct bpf_map *map = __bpf_map_get(f);
- map = __bpf_map_get(f); - if (IS_ERR(map)) - return map; - - bpf_map_inc(map); - fdput(f); + if (!IS_ERR(map)) + bpf_map_inc(map);
return map; } @@@ -1471,15 -1445,11 +1452,11 @@@ EXPORT_SYMBOL(bpf_map_get)
struct bpf_map *bpf_map_get_with_uref(u32 ufd) { - struct fd f = fdget(ufd); - struct bpf_map *map; + CLASS(fd, f)(ufd); + struct bpf_map *map = __bpf_map_get(f);
- map = __bpf_map_get(f); - if (IS_ERR(map)) - return map; - - bpf_map_inc_with_uref(map); - fdput(f); + if (!IS_ERR(map)) + bpf_map_inc_with_uref(map);
return map; } @@@ -1544,11 -1514,9 +1521,9 @@@ static int map_lookup_elem(union bpf_at { void __user *ukey = u64_to_user_ptr(attr->key); void __user *uvalue = u64_to_user_ptr(attr->value); - int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; u32 value_size; - struct fd f; int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) @@@ -1557,26 -1525,20 +1532,20 @@@ if (attr->flags & ~BPF_F_LOCK) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { - err = -EPERM; - goto err_put; - } + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) + return -EPERM;
if ((attr->flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - err = -EINVAL; - goto err_put; - } + !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL;
key = __bpf_copy_key(ukey, map->key_size); - if (IS_ERR(key)) { - err = PTR_ERR(key); - goto err_put; - } + if (IS_ERR(key)) + return PTR_ERR(key);
value_size = bpf_map_value_size(map);
@@@ -1607,8 -1569,6 +1576,6 @@@ free_value kvfree(value); free_key: kvfree(key); - err_put: - fdput(f); return err; }
@@@ -1619,17 -1579,15 +1586,15 @@@ static int map_update_elem(union bpf_at { bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); - int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; u32 value_size; - struct fd f; int err;
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@@ -1667,7 -1625,6 +1632,6 @@@ free_key kvfree(key); err_put: bpf_map_write_active_dec(map); - fdput(f); return err; }
@@@ -1676,16 -1633,14 +1640,14 @@@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) { bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); - int ufd = attr->map_fd; struct bpf_map *map; - struct fd f; void *key; int err;
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@@ -1722,7 -1677,6 +1684,6 @@@ out kvfree(key); err_put: bpf_map_write_active_dec(map); - fdput(f); return err; }
@@@ -1733,30 -1687,24 +1694,24 @@@ static int map_get_next_key(union bpf_a { void __user *ukey = u64_to_user_ptr(attr->key); void __user *unext_key = u64_to_user_ptr(attr->next_key); - int ufd = attr->map_fd; struct bpf_map *map; void *key, *next_key; - struct fd f; int err;
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { - err = -EPERM; - goto err_put; - } + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) + return -EPERM;
if (ukey) { key = __bpf_copy_key(ukey, map->key_size); - if (IS_ERR(key)) { - err = PTR_ERR(key); - goto err_put; - } + if (IS_ERR(key)) + return PTR_ERR(key); } else { key = NULL; } @@@ -1788,8 -1736,6 +1743,6 @@@ free_next_key kvfree(next_key); free_key: kvfree(key); - err_put: - fdput(f); return err; }
@@@ -2018,11 -1964,9 +1971,9 @@@ static int map_lookup_and_delete_elem(u { void __user *ukey = u64_to_user_ptr(attr->key); void __user *uvalue = u64_to_user_ptr(attr->value); - int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; u32 value_size; - struct fd f; int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) @@@ -2031,7 -1975,7 +1982,7 @@@ if (attr->flags & ~BPF_F_LOCK) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@@ -2101,7 -2045,6 +2052,6 @@@ free_key kvfree(key); err_put: bpf_map_write_active_dec(map); - fdput(f); return err; }
@@@ -2109,27 -2052,22 +2059,22 @@@
static int map_freeze(const union bpf_attr *attr) { - int err = 0, ufd = attr->map_fd; + int err = 0; struct bpf_map *map; - struct fd f;
if (CHECK_ATTR(BPF_MAP_FREEZE)) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map);
- if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) { - fdput(f); + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) return -ENOTSUPP; - }
- if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { - fdput(f); + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) return -EPERM; - }
mutex_lock(&map->freeze_mutex); if (bpf_map_write_active(map)) { @@@ -2144,7 -2082,6 +2089,6 @@@ WRITE_ONCE(map->frozen, true); err_put: mutex_unlock(&map->freeze_mutex); - fdput(f); return err; }
@@@ -2414,18 -2351,6 +2358,6 @@@ int bpf_prog_new_fd(struct bpf_prog *pr O_RDWR | O_CLOEXEC); }
- static struct bpf_prog *____bpf_prog_get(struct fd f) - { - if (!fd_file(f)) - return ERR_PTR(-EBADF); - if (fd_file(f)->f_op != &bpf_prog_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - return fd_file(f)->private_data; - } - void bpf_prog_add(struct bpf_prog *prog, int i) { atomic64_add(i, &prog->aux->refcnt); @@@ -2481,20 -2406,19 +2413,19 @@@ bool bpf_prog_get_ok(struct bpf_prog *p static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, bool attach_drv) { - struct fd f = fdget(ufd); + CLASS(fd, f)(ufd); struct bpf_prog *prog;
- prog = ____bpf_prog_get(f); - if (IS_ERR(prog)) - return prog; - if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { - prog = ERR_PTR(-EINVAL); - goto out; - } + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (fd_file(f)->f_op != &bpf_prog_fops) + return ERR_PTR(-EINVAL); + + prog = fd_file(f)->private_data; + if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) + return ERR_PTR(-EINVAL);
bpf_prog_inc(prog); - out: - fdput(f); return prog; }
@@@ -3263,20 -3187,16 +3194,16 @@@ int bpf_link_new_fd(struct bpf_link *li
struct bpf_link *bpf_link_get_from_fd(u32 ufd) { - struct fd f = fdget(ufd); + CLASS(fd, f)(ufd); struct bpf_link *link;
- if (!fd_file(f)) + if (fd_empty(f)) return ERR_PTR(-EBADF); - if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) { - fdput(f); + if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) return ERR_PTR(-EINVAL); - }
link = fd_file(f)->private_data; bpf_link_inc(link); - fdput(f); - return link; } EXPORT_SYMBOL(bpf_link_get_from_fd); @@@ -4981,33 -4901,25 +4908,25 @@@ static int bpf_link_get_info_by_fd(stru static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, union bpf_attr __user *uattr) { - int ufd = attr->info.bpf_fd; - struct fd f; - int err; - if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) return -EINVAL;
- f = fdget(ufd); - if (!fd_file(f)) + CLASS(fd, f)(attr->info.bpf_fd); + if (fd_empty(f)) return -EBADFD;
if (fd_file(f)->f_op == &bpf_prog_fops) - err = bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, + return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); else if (fd_file(f)->f_op == &bpf_map_fops) - err = bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, + return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); else if (fd_file(f)->f_op == &btf_fops) - err = bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); + return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) - err = bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, + return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); - else - err = -EINVAL; - - fdput(f); - return err; + return -EINVAL; }
#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd @@@ -5195,14 -5107,13 +5114,13 @@@ static int bpf_map_do_batch(const unio cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; struct bpf_map *map; - int err, ufd; - struct fd f; + int err;
if (CHECK_ATTR(BPF_MAP_BATCH)) return -EINVAL;
- ufd = attr->batch.map_fd; - f = fdget(ufd); + CLASS(fd, f)(attr->batch.map_fd); + map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@@ -5230,7 -5141,6 +5148,6 @@@ err_put maybe_wait_bpf_programs(map); bpf_map_write_active_dec(map); } - fdput(f); return err; }
@@@ -5675,7 -5585,7 +5592,7 @@@ static int token_create(union bpf_attr return bpf_token_create(attr); }
-static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; int err; @@@ -5939,7 -5849,6 +5856,7 @@@ static const struct bpf_func_proto bpf_
BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) { + *res = 0; if (flags) return -EINVAL;
@@@ -5960,8 -5869,7 +5877,8 @@@ static const struct bpf_func_proto bpf_ .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(u64), };
static const struct bpf_func_proto * diff --combined kernel/bpf/verifier.c index dd86282ccaa4a,e3932f8ce10a3..9a7ed527e47e3 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -28,8 -28,6 +28,8 @@@ #include <linux/cpumask.h> #include <linux/bpf_mem_alloc.h> #include <net/xdp.h> +#include <linux/trace_events.h> +#include <linux/kallsyms.h>
#include "disasm.h"
@@@ -385,6 -383,11 +385,6 @@@ static void verbose_invalid_scalar(stru verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); }
-static bool type_may_be_null(u32 type) -{ - return type & PTR_MAYBE_NULL; -} - static bool reg_not_null(const struct bpf_reg_state *reg) { enum bpf_reg_type type; @@@ -4576,28 -4579,28 +4576,28 @@@ static int get_reg_width(struct bpf_reg return fls64(reg->umax_value); }
-/* See comment for mark_nocsr_pattern_for_call() */ -static void check_nocsr_stack_contract(struct bpf_verifier_env *env, struct bpf_func_state *state, - int insn_idx, int off) +/* See comment for mark_fastcall_pattern_for_call() */ +static void check_fastcall_stack_contract(struct bpf_verifier_env *env, + struct bpf_func_state *state, int insn_idx, int off) { struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; struct bpf_insn_aux_data *aux = env->insn_aux_data; int i;
- if (subprog->nocsr_stack_off <= off || aux[insn_idx].nocsr_pattern) + if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern) return; - /* access to the region [max_stack_depth .. nocsr_stack_off) - * from something that is not a part of the nocsr pattern, - * disable nocsr rewrites for current subprogram by setting - * nocsr_stack_off to a value smaller than any possible offset. + /* access to the region [max_stack_depth .. fastcall_stack_off) + * from something that is not a part of the fastcall pattern, + * disable fastcall rewrites for current subprogram by setting + * fastcall_stack_off to a value smaller than any possible offset. */ - subprog->nocsr_stack_off = S16_MIN; - /* reset nocsr aux flags within subprogram, + subprog->fastcall_stack_off = S16_MIN; + /* reset fastcall aux flags within subprogram, * happens at most once per subprogram */ for (i = subprog->start; i < (subprog + 1)->start; ++i) { - aux[i].nocsr_spills_num = 0; - aux[i].nocsr_pattern = 0; + aux[i].fastcall_spills_num = 0; + aux[i].fastcall_pattern = 0; } }
@@@ -4649,7 -4652,7 +4649,7 @@@ static int check_stack_write_fixed_off( if (err) return err;
- check_nocsr_stack_contract(env, state, insn_idx, off); + check_fastcall_stack_contract(env, state, insn_idx, off); mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { bool reg_value_fits; @@@ -4784,7 -4787,7 +4784,7 @@@ static int check_stack_write_var_off(st return err; }
- check_nocsr_stack_contract(env, state, insn_idx, min_off); + check_fastcall_stack_contract(env, state, insn_idx, min_off); /* Variable offset writes destroy any spilled pointers in range. */ for (i = min_off; i < max_off; i++) { u8 new_type, *stype; @@@ -4923,7 -4926,7 +4923,7 @@@ static int check_stack_read_fixed_off(s reg = ®_state->stack[spi].spilled_ptr;
mark_stack_slot_scratched(env, spi); - check_nocsr_stack_contract(env, state, env->insn_idx, off); + check_fastcall_stack_contract(env, state, env->insn_idx, off);
if (is_spilled_reg(®_state->stack[spi])) { u8 spill_size = 1; @@@ -5084,7 -5087,7 +5084,7 @@@ static int check_stack_read_var_off(str min_off = reg->smin_value + off; max_off = reg->smax_value + off; mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); - check_nocsr_stack_contract(env, ptr_state, env->insn_idx, min_off); + check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off); return 0; }
@@@ -6801,13 -6804,13 +6801,13 @@@ static int check_stack_slot_within_boun struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx]; int min_valid_off, max_bpf_stack;
- /* If accessing instruction is a spill/fill from nocsr pattern, + /* If accessing instruction is a spill/fill from bpf_fastcall pattern, * add room for all caller saved registers below MAX_BPF_STACK. - * In case if nocsr rewrite won't happen maximal stack depth + * In case if bpf_fastcall rewrite won't happen maximal stack depth * would be checked by check_max_stack_depth_subprog(). */ max_bpf_stack = MAX_BPF_STACK; - if (aux->nocsr_pattern) + if (aux->fastcall_pattern) max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
if (t == BPF_WRITE || env->allow_uninit_stack) @@@ -7800,38 -7803,29 +7800,38 @@@ static int process_kptr_func(struct bpf struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; - struct bpf_map *map_ptr = reg->map_ptr; struct btf_field *kptr_field; + struct bpf_map *map_ptr; + struct btf_record *rec; u32 kptr_off;
+ if (type_is_ptr_alloc_obj(reg->type)) { + rec = reg_btf_record(reg); + } else { /* PTR_TO_MAP_VALUE */ + map_ptr = reg->map_ptr; + if (!map_ptr->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", + map_ptr->name); + return -EINVAL; + } + rec = map_ptr->record; + meta->map_ptr = map_ptr; + } + if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d doesn't have constant offset. kptr has to be at the constant offset\n", regno); return -EINVAL; } - if (!map_ptr->btf) { - verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", - map_ptr->name); - return -EINVAL; - } - if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) { - verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); + + if (!btf_record_has_field(rec, BPF_KPTR)) { + verbose(env, "R%d has no valid kptr\n", regno); return -EINVAL; }
- meta->map_ptr = map_ptr; kptr_off = reg->off + reg->var_off.value; - kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR); + kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR); if (!kptr_field) { verbose(env, "off=%d doesn't point to kptr\n", kptr_off); return -EACCES; @@@ -7976,17 -7970,12 +7976,17 @@@ static bool is_iter_destroy_kfunc(struc return meta->kfunc_flags & KF_ITER_DESTROY; }
-static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg) +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx, + const struct btf_param *arg) { /* btf_check_iter_kfuncs() guarantees that first argument of any iter * kfunc is iter state pointer */ - return arg == 0 && is_iter_kfunc(meta); + if (is_iter_kfunc(meta)) + return arg_idx == 0; + + /* iter passed as an argument to a generic kfunc */ + return btf_param_match_suffix(meta->btf, arg, "__iter"); }
static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx, @@@ -7994,20 -7983,14 +7994,20 @@@ { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; const struct btf_type *t; - const struct btf_param *arg; - int spi, err, i, nr_slots; - u32 btf_id; + int spi, err, i, nr_slots, btf_id;
- /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */ - arg = &btf_params(meta->func_proto)[0]; - t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */ - t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */ + /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs() + * ensures struct convention, so we wouldn't need to do any BTF + * validation here. But given iter state can be passed as a parameter + * to any kfunc, if arg has "__iter" suffix, we need to be a bit more + * conservative here. + */ + btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1); + if (btf_id < 0) { + verbose(env, "expected valid iter pointer as arg #%d\n", regno); + return -EINVAL; + } + t = btf_type_by_id(meta->btf, btf_id); nr_slots = t->size / BPF_REG_SIZE;
if (is_iter_new_kfunc(meta)) { @@@ -8029,9 -8012,7 +8029,9 @@@ if (err) return err; } else { - /* iter_next() or iter_destroy() expect initialized iter state*/ + /* iter_next() or iter_destroy(), as well as any kfunc + * accepting iter argument, expect initialized iter state + */ err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots); switch (err) { case 0: @@@ -8145,15 -8126,6 +8145,15 @@@ static int widen_imprecise_scalars(stru return 0; }
+static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st, + struct bpf_kfunc_call_arg_meta *meta) +{ + int iter_frameno = meta->iter.frameno; + int iter_spi = meta->iter.spi; + + return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; +} + /* process_iter_next_call() is called when verifier gets to iterator's next * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer * to it as just "iter_next()" in comments below. @@@ -8238,10 -8210,12 +8238,10 @@@ static int process_iter_next_call(struc struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; struct bpf_reg_state *cur_iter, *queued_iter; - int iter_frameno = meta->iter.frameno; - int iter_spi = meta->iter.spi;
BTF_TYPE_EMIT(struct bpf_iter);
- cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + cur_iter = get_iter_from_state(cur_st, meta);
if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { @@@ -8269,7 -8243,7 +8269,7 @@@ if (!queued_st) return -ENOMEM;
- queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + queued_iter = get_iter_from_state(queued_st, meta); queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; queued_iter->iter.depth++; if (prev_st) @@@ -8293,12 -8267,6 +8293,12 @@@ static bool arg_type_is_mem_size(enum b type == ARG_CONST_SIZE_OR_ZERO; }
+static bool arg_type_is_raw_mem(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_MEM && + type & MEM_UNINIT; +} + static bool arg_type_is_release(enum bpf_arg_type type) { return type & OBJ_RELEASE; @@@ -8309,6 -8277,16 +8309,6 @@@ static bool arg_type_is_dynptr(enum bpf return base_type(type) == ARG_PTR_TO_DYNPTR; }
-static int int_ptr_type_to_size(enum bpf_arg_type type) -{ - if (type == ARG_PTR_TO_INT) - return sizeof(u32); - else if (type == ARG_PTR_TO_LONG) - return sizeof(u64); - - return -EINVAL; -} - static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_arg_type *arg_type) @@@ -8381,6 -8359,16 +8381,6 @@@ static const struct bpf_reg_types mem_t }, };
-static const struct bpf_reg_types int_ptr_types = { - .types = { - PTR_TO_STACK, - PTR_TO_PACKET, - PTR_TO_PACKET_META, - PTR_TO_MAP_KEY, - PTR_TO_MAP_VALUE, - }, -}; - static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE, @@@ -8411,12 -8399,7 +8411,12 @@@ static const struct bpf_reg_types func_ static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_xchg_dest_types = { + .types = { + PTR_TO_MAP_VALUE, + PTR_TO_BTF_ID | MEM_ALLOC + } +}; static const struct bpf_reg_types dynptr_types = { .types = { PTR_TO_STACK, @@@ -8441,12 -8424,14 +8441,12 @@@ static const struct bpf_reg_types *comp [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, - [ARG_PTR_TO_INT] = &int_ptr_types, - [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, - [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types, [ARG_PTR_TO_DYNPTR] = &dynptr_types, };
@@@ -8485,8 -8470,7 +8485,8 @@@ static int check_reg_type(struct bpf_ve if (base_type(arg_type) == ARG_PTR_TO_MEM) type &= ~DYNPTR_TYPE_FLAG_MASK;
- if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) { + /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */ + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) { type &= ~MEM_ALLOC; type &= ~MEM_PERCPU; } @@@ -8579,8 -8563,7 +8579,8 @@@ found verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); return -EFAULT; } - if (meta->func_id == BPF_FUNC_kptr_xchg) { + /* Check if local kptr in src arg matches kptr in dst arg */ + if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) { if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) return -EACCES; } @@@ -8891,7 -8874,7 +8891,7 @@@ skip_type_check meta->release_regno = regno; }
- if (reg->ref_obj_id) { + if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { if (meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno, reg->ref_obj_id, @@@ -9003,11 -8986,9 +9003,11 @@@ */ meta->raw_mode = arg_type & MEM_UNINIT; if (arg_type & MEM_FIXED_SIZE) { - err = check_helper_mem_access(env, regno, - fn->arg_size[arg], false, - meta); + err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta); + if (err) + return err; + if (arg_type & MEM_ALIGNED) + err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); } break; case ARG_CONST_SIZE: @@@ -9032,6 -9013,17 +9032,6 @@@ if (err) return err; break; - case ARG_PTR_TO_INT: - case ARG_PTR_TO_LONG: - { - int size = int_ptr_type_to_size(arg_type); - - err = check_helper_mem_access(env, regno, size, false, meta); - if (err) - return err; - err = check_ptr_alignment(env, reg, 0, size, true); - break; - } case ARG_PTR_TO_CONST_STR: { err = check_reg_const_str(env, reg, regno); @@@ -9039,7 -9031,7 +9039,7 @@@ return err; break; } - case ARG_PTR_TO_KPTR: + case ARG_KPTR_XCHG_DEST: err = process_kptr_func(env, regno, meta); if (err) return err; @@@ -9348,15 -9340,15 +9348,15 @@@ static bool check_raw_mode_ok(const str { int count = 0;
- if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg1_type)) count++; - if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg2_type)) count++; - if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg3_type)) count++; - if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg4_type)) count++; - if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg5_type)) count++;
/* We only support one arg being in raw mode at the moment, @@@ -11390,7 -11382,7 +11390,7 @@@ get_kfunc_ptr_arg_type(struct bpf_verif if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) return KF_ARG_PTR_TO_DYNPTR;
- if (is_kfunc_arg_iter(meta, argno)) + if (is_kfunc_arg_iter(meta, argno, &args[argno])) return KF_ARG_PTR_TO_ITER;
if (is_kfunc_arg_list_head(meta->btf, &args[argno])) @@@ -11492,7 -11484,8 +11492,7 @@@ static int process_kf_arg_ptr_to_btf_id * btf_struct_ids_match() to walk the struct at the 0th offset, and * resolve types. */ - if (is_kfunc_acquire(meta) || - (is_kfunc_release(meta) && reg->ref_obj_id) || + if ((is_kfunc_release(meta) && reg->ref_obj_id) || btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) strict_type_match = true;
@@@ -12109,8 -12102,7 +12109,8 @@@ static int check_kfunc_args(struct bpf_ switch (kf_arg_type) { case KF_ARG_PTR_TO_CTX: if (reg->type != PTR_TO_CTX) { - verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t)); + verbose(env, "arg#%d expected pointer to ctx, but got %s\n", + i, reg_type_str(env, reg->type)); return -EINVAL; }
@@@ -12833,17 -12825,6 +12833,17 @@@ static int check_kfunc_call(struct bpf_ regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; + + if (is_iter_next_kfunc(&meta)) { + struct bpf_reg_state *cur_iter; + + cur_iter = get_iter_from_state(env->cur_state, &meta); + + if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */ + regs[BPF_REG_0].type |= MEM_RCU; + else + regs[BPF_REG_0].type |= PTR_TRUSTED; + } }
if (is_kfunc_ret_null(&meta)) { @@@ -16124,14 -16105,14 +16124,14 @@@ static int visit_func_call_insn(int t,
/* Return a bitmask specifying which caller saved registers are * clobbered by a call to a helper *as if* this helper follows - * no_caller_saved_registers contract: + * bpf_fastcall contract: * - includes R0 if function is non-void; * - includes R1-R5 if corresponding parameter has is described * in the function prototype. */ -static u32 helper_nocsr_clobber_mask(const struct bpf_func_proto *fn) +static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn) { - u8 mask; + u32 mask; int i;
mask = 0; @@@ -16144,8 -16125,8 +16144,8 @@@ }
/* True if do_misc_fixups() replaces calls to helper number 'imm', - * replacement patch is presumed to follow no_caller_saved_registers contract - * (see mark_nocsr_pattern_for_call() below). + * replacement patch is presumed to follow bpf_fastcall contract + * (see mark_fastcall_pattern_for_call() below). */ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) { @@@ -16159,30 -16140,7 +16159,30 @@@ } }
-/* GCC and LLVM define a no_caller_saved_registers function attribute. +/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */ +static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) +{ + u32 vlen, i, mask; + + vlen = btf_type_vlen(meta->func_proto); + mask = 0; + if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type))) + mask |= BIT(BPF_REG_0); + for (i = 0; i < vlen; ++i) + mask |= BIT(BPF_REG_1 + i); + return mask; +} + +/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ +static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) +{ + if (meta->btf == btf_vmlinux) + return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || + meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]; + return false; +} + +/* LLVM define a bpf_fastcall function attribute. * This attribute means that function scratches only some of * the caller saved registers defined by ABI. * For BPF the set of such registers could be defined as follows: @@@ -16192,12 -16150,13 +16192,12 @@@ * * The contract between kernel and clang allows to simultaneously use * such functions and maintain backwards compatibility with old - * kernels that don't understand no_caller_saved_registers calls - * (nocsr for short): + * kernels that don't understand bpf_fastcall calls: * - * - for nocsr calls clang allocates registers as-if relevant r0-r5 + * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5 * registers are not scratched by the call; * - * - as a post-processing step, clang visits each nocsr call and adds + * - as a post-processing step, clang visits each bpf_fastcall call and adds * spill/fill for every live r0-r5; * * - stack offsets used for the spill/fill are allocated as lowest @@@ -16205,11 -16164,11 +16205,11 @@@ * purposes; * * - when kernel loads a program, it looks for such patterns - * (nocsr function surrounded by spills/fills) and checks if - * spill/fill stack offsets are used exclusively in nocsr patterns; + * (bpf_fastcall function surrounded by spills/fills) and checks if + * spill/fill stack offsets are used exclusively in fastcall patterns; * * - if so, and if verifier or current JIT inlines the call to the - * nocsr function (e.g. a helper call), kernel removes unnecessary + * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary * spill/fill pairs; * * - when old kernel loads a program, presence of spill/fill pairs @@@ -16228,22 -16187,22 +16228,22 @@@ * r0 += r2; * exit; * - * The purpose of mark_nocsr_pattern_for_call is to: + * The purpose of mark_fastcall_pattern_for_call is to: * - look for such patterns; - * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern; - * - mark set env->insn_aux_data[*].nocsr_spills_num for call instruction; - * - update env->subprog_info[*]->nocsr_stack_off to find an offset - * at which nocsr spill/fill stack slots start; - * - update env->subprog_info[*]->keep_nocsr_stack. + * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern; + * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction; + * - update env->subprog_info[*]->fastcall_stack_off to find an offset + * at which bpf_fastcall spill/fill stack slots start; + * - update env->subprog_info[*]->keep_fastcall_stack. * - * The .nocsr_pattern and .nocsr_stack_off are used by - * check_nocsr_stack_contract() to check if every stack access to - * nocsr spill/fill stack slot originates from spill/fill - * instructions, members of nocsr patterns. + * The .fastcall_pattern and .fastcall_stack_off are used by + * check_fastcall_stack_contract() to check if every stack access to + * fastcall spill/fill stack slot originates from spill/fill + * instructions, members of fastcall patterns. * - * If such condition holds true for a subprogram, nocsr patterns could - * be rewritten by remove_nocsr_spills_fills(). - * Otherwise nocsr patterns are not changed in the subprogram + * If such condition holds true for a subprogram, fastcall patterns could + * be rewritten by remove_fastcall_spills_fills(). + * Otherwise bpf_fastcall patterns are not changed in the subprogram * (code, presumably, generated by an older clang version). * * For example, it is *not* safe to remove spill/fill below: @@@ -16256,9 -16215,9 +16256,9 @@@ * r0 += r1; exit; * exit; */ -static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env, - struct bpf_subprog_info *subprog, - int insn_idx, s16 lowest_off) +static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, + struct bpf_subprog_info *subprog, + int insn_idx, s16 lowest_off) { struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; struct bpf_insn *call = &env->prog->insnsi[insn_idx]; @@@ -16273,25 -16232,12 +16273,25 @@@ if (get_helper_proto(env, call->imm, &fn) < 0) /* error would be reported later */ return; - clobbered_regs_mask = helper_nocsr_clobber_mask(fn); - can_be_inlined = fn->allow_nocsr && + clobbered_regs_mask = helper_fastcall_clobber_mask(fn); + can_be_inlined = fn->allow_fastcall && (verifier_inlines_helper_call(env, call->imm) || bpf_jit_inlines_helper_call(call->imm)); }
+ if (bpf_pseudo_kfunc_call(call)) { + struct bpf_kfunc_call_arg_meta meta; + int err; + + err = fetch_kfunc_meta(env, call, &meta, NULL); + if (err < 0) + /* error would be reported later */ + return; + + clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta); + can_be_inlined = is_fastcall_kfunc_call(&meta); + } + if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS) return;
@@@ -16330,36 -16276,36 +16330,36 @@@ if (stx->off != off || ldx->off != off) break; expected_regs_mask &= ~BIT(stx->src_reg); - env->insn_aux_data[insn_idx - i].nocsr_pattern = 1; - env->insn_aux_data[insn_idx + i].nocsr_pattern = 1; + env->insn_aux_data[insn_idx - i].fastcall_pattern = 1; + env->insn_aux_data[insn_idx + i].fastcall_pattern = 1; } if (i == 1) return;
- /* Conditionally set 'nocsr_spills_num' to allow forward + /* Conditionally set 'fastcall_spills_num' to allow forward * compatibility when more helper functions are marked as - * nocsr at compile time than current kernel supports, e.g: + * bpf_fastcall at compile time than current kernel supports, e.g: * * 1: *(u64 *)(r10 - 8) = r1 - * 2: call A ;; assume A is nocsr for current kernel + * 2: call A ;; assume A is bpf_fastcall for current kernel * 3: r1 = *(u64 *)(r10 - 8) * 4: *(u64 *)(r10 - 8) = r1 - * 5: call B ;; assume B is not nocsr for current kernel + * 5: call B ;; assume B is not bpf_fastcall for current kernel * 6: r1 = *(u64 *)(r10 - 8) * - * There is no need to block nocsr rewrite for such program. - * Set 'nocsr_pattern' for both calls to keep check_nocsr_stack_contract() happy, - * don't set 'nocsr_spills_num' for call B so that remove_nocsr_spills_fills() + * There is no need to block bpf_fastcall rewrite for such program. + * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy, + * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() * does not remove spill/fill pair {4,6}. */ if (can_be_inlined) - env->insn_aux_data[insn_idx].nocsr_spills_num = i - 1; + env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; else - subprog->keep_nocsr_stack = 1; - subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off); + subprog->keep_fastcall_stack = 1; + subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off); }
-static int mark_nocsr_patterns(struct bpf_verifier_env *env) +static int mark_fastcall_patterns(struct bpf_verifier_env *env) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn; @@@ -16376,12 -16322,12 +16376,12 @@@ continue; lowest_off = min(lowest_off, insn->off); } - /* use this offset to find nocsr patterns */ + /* use this offset to find fastcall patterns */ for (i = subprog->start; i < (subprog + 1)->start; ++i) { insn = env->prog->insnsi + i; if (insn->code != (BPF_JMP | BPF_CALL)) continue; - mark_nocsr_pattern_for_call(env, subprog, i, lowest_off); + mark_fastcall_pattern_for_call(env, subprog, i, lowest_off); } } return 0; @@@ -17396,9 -17342,8 +17396,9 @@@ static bool stacksafe(struct bpf_verifi spi = i / BPF_REG_SIZE;
if (exact != NOT_EXACT && - old->stack[spi].slot_type[i % BPF_REG_SIZE] != - cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + (i >= cur->allocated_stack || + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false;
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) @@@ -18920,6 -18865,53 +18920,53 @@@ static bool bpf_map_is_cgroup_storage(s map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); }
+ /* Add map behind fd to used maps list, if it's not already there, and return + * its index. Also set *reused to true if this map was already in the list of + * used maps. + * Returns <0 on error, or >= 0 index, on success. + */ + static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd, bool *reused) + { + CLASS(fd, f)(fd); + struct bpf_map *map; + int i; + + map = __bpf_map_get(f); + if (IS_ERR(map)) { + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); + return PTR_ERR(map); + } + + /* check whether we recorded this map already */ + for (i = 0; i < env->used_map_cnt; i++) { + if (env->used_maps[i] == map) { + *reused = true; + return i; + } + } + + if (env->used_map_cnt >= MAX_USED_MAPS) { + verbose(env, "The total number of maps per program has reached the limit of %u\n", + MAX_USED_MAPS); + return -E2BIG; + } + + if (env->prog->sleepable) + atomic64_inc(&map->sleepable_refcnt); + + /* hold the map. If the program is rejected by verifier, + * the map will be released by release_maps() or it + * will be used by the valid program until it's unloaded + * and all maps are released in bpf_free_used_maps() + */ + bpf_map_inc(map); + + *reused = false; + env->used_maps[env->used_map_cnt++] = map; + + return env->used_map_cnt - 1; + } + /* find and rewrite pseudo imm in ld_imm64 instructions: * * 1. if it accesses map FD, replace it with actual map pointer. @@@ -18931,7 -18923,7 +18978,7 @@@ static int resolve_pseudo_ldimm64(struc { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; - int i, j, err; + int i, err;
err = bpf_prog_calc_tag(env->prog); if (err) @@@ -18948,9 -18940,10 +18995,10 @@@ if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_insn_aux_data *aux; struct bpf_map *map; - struct fd f; + int map_idx; u64 addr; u32 fd; + bool reused;
if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || @@@ -19011,20 -19004,18 +19059,18 @@@ break; }
- f = fdget(fd); - map = __bpf_map_get(f); - if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); - return PTR_ERR(map); - } + map_idx = add_used_map_from_fd(env, fd, &reused); + if (map_idx < 0) + return map_idx; + map = env->used_maps[map_idx]; + + aux = &env->insn_aux_data[i]; + aux->map_index = map_idx;
err = check_map_prog_compatibility(env, map, env->prog); - if (err) { - fdput(f); + if (err) return err; - }
- aux = &env->insn_aux_data[i]; if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { addr = (unsigned long)map; @@@ -19033,13 -19024,11 +19079,11 @@@
if (off >= BPF_MAX_VAR_OFF) { verbose(env, "direct value offset of %u is not allowed\n", off); - fdput(f); return -EINVAL; }
if (!map->ops->map_direct_value_addr) { verbose(env, "no direct value access support for this map type\n"); - fdput(f); return -EINVAL; }
@@@ -19047,7 -19036,6 +19091,6 @@@ if (err) { verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", map->value_size, off); - fdput(f); return err; }
@@@ -19058,70 -19046,39 +19101,39 @@@ insn[0].imm = (u32)addr; insn[1].imm = addr >> 32;
- /* check whether we recorded this map already */ - for (j = 0; j < env->used_map_cnt; j++) { - if (env->used_maps[j] == map) { - aux->map_index = j; - fdput(f); - goto next_insn; - } - } - - if (env->used_map_cnt >= MAX_USED_MAPS) { - verbose(env, "The total number of maps per program has reached the limit of %u\n", - MAX_USED_MAPS); - fdput(f); - return -E2BIG; - } - - if (env->prog->sleepable) - atomic64_inc(&map->sleepable_refcnt); - /* hold the map. If the program is rejected by verifier, - * the map will be released by release_maps() or it - * will be used by the valid program until it's unloaded - * and all maps are released in bpf_free_used_maps() - */ - bpf_map_inc(map); - - aux->map_index = env->used_map_cnt; - env->used_maps[env->used_map_cnt++] = map; + /* proceed with extra checks only if its newly added used map */ + if (reused) + goto next_insn;
if (bpf_map_is_cgroup_storage(map) && bpf_cgroup_storage_assign(env->prog->aux, map)) { verbose(env, "only one cgroup storage of each type is allowed\n"); - fdput(f); return -EBUSY; } if (map->map_type == BPF_MAP_TYPE_ARENA) { if (env->prog->aux->arena) { verbose(env, "Only one arena per program\n"); - fdput(f); return -EBUSY; } if (!env->allow_ptr_leaks || !env->bpf_capable) { verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n"); - fdput(f); return -EPERM; } if (!env->prog->jit_requested) { verbose(env, "JIT is required to use arena\n"); - fdput(f); return -EOPNOTSUPP; } if (!bpf_jit_supports_arena()) { verbose(env, "JIT doesn't support arena\n"); - fdput(f); return -EOPNOTSUPP; } env->prog->aux->arena = (void *)map; if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { verbose(env, "arena's user address must be set via map_extra or mmap()\n"); - fdput(f); return -EINVAL; } }
- fdput(f); next_insn: insn++; i++; @@@ -19277,9 -19234,6 +19289,9 @@@ static int adjust_jmp_off(struct bpf_pr for (i = 0; i < insn_cnt; i++, insn++) { u8 code = insn->code;
+ if (tgt_idx <= i && i < tgt_idx + delta) + continue; + if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) continue; @@@ -19668,39 -19622,14 +19680,39 @@@ apply_patch_buffer */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { + struct bpf_subprog_info *subprogs = env->subprog_info; const struct bpf_verifier_ops *ops = env->ops; - int i, cnt, size, ctx_field_size, delta = 0; + int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0; const int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16], *insn; + struct bpf_insn *epilogue_buf = env->epilogue_buf; + struct bpf_insn *insn_buf = env->insn_buf; + struct bpf_insn *insn; u32 target_size, size_default, off; struct bpf_prog *new_prog; enum bpf_access_type type; bool is_narrower_load; + int epilogue_idx = 0; + + if (ops->gen_epilogue) { + epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog, + -(subprogs[0].stack_depth + 8)); + if (epilogue_cnt >= INSN_BUF_SIZE) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (epilogue_cnt) { + /* Save the ARG_PTR_TO_CTX for the epilogue to use */ + cnt = 0; + subprogs[0].stack_depth += 8; + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1, + -subprogs[0].stack_depth); + insn_buf[cnt++] = env->prog->insnsi[0]; + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + delta += cnt - 1; + } + }
if (ops->gen_prologue || env->seen_direct_write) { if (!ops->gen_prologue) { @@@ -19709,7 -19638,7 +19721,7 @@@ } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); - if (cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { @@@ -19722,9 -19651,6 +19734,9 @@@ } }
+ if (delta) + WARN_ON(adjust_jmp_off(env->prog, 0, delta)); + if (bpf_prog_is_offloaded(env->prog->aux)) return 0;
@@@ -19757,25 -19683,6 +19769,25 @@@ insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); env->prog->aux->num_exentries++; continue; + } else if (insn->code == (BPF_JMP | BPF_EXIT) && + epilogue_cnt && + i + delta < subprogs[1].start) { + /* Generate epilogue for the main prog */ + if (epilogue_idx) { + /* jump back to the earlier generated epilogue */ + insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1); + cnt = 1; + } else { + memcpy(insn_buf, epilogue_buf, + epilogue_cnt * sizeof(*epilogue_buf)); + cnt = epilogue_cnt; + /* epilogue_idx cannot be 0. It must have at + * least one ctx ptr saving insn before the + * epilogue. + */ + epilogue_idx = i + delta; + } + goto patch_insn_buf; } else { continue; } @@@ -19878,7 -19785,7 +19890,7 @@@ target_size = 0; cnt = convert_ctx_access(type, insn, insn_buf, env->prog, &target_size); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || + if (cnt == 0 || cnt >= INSN_BUF_SIZE || (ctx_field_size && !target_size)) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; @@@ -19887,7 -19794,7 +19899,7 @@@ if (is_narrower_load && size < target_size) { u8 shift = bpf_ctx_narrow_access_offset( off, size, size_default) * 8; - if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) { + if (shift && cnt + 1 >= INSN_BUF_SIZE) { verbose(env, "bpf verifier narrow ctx load misconfigured\n"); return -EINVAL; } @@@ -19912,7 -19819,6 +19924,7 @@@ insn->dst_reg, insn->dst_reg, size * 8, 0);
+patch_insn_buf: new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; @@@ -20433,7 -20339,7 +20445,7 @@@ static int do_misc_fixups(struct bpf_ve const int insn_cnt = prog->len; const struct bpf_map_ops *ops; struct bpf_insn_aux_data *aux; - struct bpf_insn insn_buf[16]; + struct bpf_insn *insn_buf = env->insn_buf; struct bpf_prog *new_prog; struct bpf_map *map_ptr; int i, ret, cnt, delta = 0, cur_subprog = 0; @@@ -20476,46 -20382,13 +20488,46 @@@ /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */ insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
- /* Make divide-by-zero exceptions impossible. */ + /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ + if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || + insn->code == (BPF_ALU | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && + insn->off == 1 && insn->imm == -1) { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + bool isdiv = BPF_OP(insn->code) == BPF_DIV; + struct bpf_insn *patchlet; + struct bpf_insn chk_and_sdiv[] = { + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + }; + struct bpf_insn chk_and_smod[] = { + BPF_MOV32_IMM(insn->dst_reg, 0), + }; + + patchlet = isdiv ? chk_and_sdiv : chk_and_smod; + cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod); + + new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + + /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; bool isdiv = BPF_OP(insn->code) == BPF_DIV; + bool is_sdiv = isdiv && insn->off == 1; + bool is_smod = !isdiv && insn->off == 1; struct bpf_insn *patchlet; struct bpf_insn chk_and_div[] = { /* [R,W]x div 0 -> 0 */ @@@ -20535,62 -20408,10 +20547,62 @@@ BPF_JMP_IMM(BPF_JA, 0, 0, 1), BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; + struct bpf_insn chk_and_sdiv[] = { + /* [R,W]x sdiv 0 -> 0 + * LLONG_MIN sdiv -1 -> LLONG_MIN + * INT_MIN sdiv -1 -> INT_MIN + */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 4, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 1, 0), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_MOV | BPF_K, insn->dst_reg, + 0, 0, 0), + /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + }; + struct bpf_insn chk_and_smod[] = { + /* [R,W]x mod 0 -> [R,W]x */ + /* [R,W]x mod -1 -> 0 */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 3, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 3 + (is64 ? 0 : 1), 1), + BPF_MOV32_IMM(insn->dst_reg, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), + };
- patchlet = isdiv ? chk_and_div : chk_and_mod; - cnt = isdiv ? ARRAY_SIZE(chk_and_div) : - ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + if (is_sdiv) { + patchlet = chk_and_sdiv; + cnt = ARRAY_SIZE(chk_and_sdiv); + } else if (is_smod) { + patchlet = chk_and_smod; + cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0); + } else { + patchlet = isdiv ? chk_and_div : chk_and_mod; + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + }
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) @@@ -20637,7 -20458,7 +20649,7 @@@ (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) { cnt = env->ops->gen_ld_abs(insn, insn_buf); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt == 0 || cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@@ -20930,7 -20751,7 +20942,7 @@@ cnt = ops->map_gen_lookup(map_ptr, insn_buf); if (cnt == -EOPNOTSUPP) goto patch_map_ops_generic; - if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt <= 0 || cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@@ -21290,7 -21111,7 +21302,7 @@@ static struct bpf_prog *inline_bpf_loop int position, s32 stack_base, u32 callback_subprogno, - u32 *cnt) + u32 *total_cnt) { s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; @@@ -21299,56 -21120,55 +21311,56 @@@ int reg_loop_cnt = BPF_REG_7; int reg_loop_ctx = BPF_REG_8;
+ struct bpf_insn *insn_buf = env->insn_buf; struct bpf_prog *new_prog; u32 callback_start; u32 call_insn_offset; s32 callback_offset; + u32 cnt = 0;
/* This represents an inlined version of bpf_iter.c:bpf_loop, * be careful to modify this code in sync. */ - struct bpf_insn insn_buf[] = { - /* Return error and jump to the end of the patch if - * expected number of iterations is too big. - */ - BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2), - BPF_MOV32_IMM(BPF_REG_0, -E2BIG), - BPF_JMP_IMM(BPF_JA, 0, 0, 16), - /* spill R6, R7, R8 to use these as loop vars */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset), - /* initialize loop vars */ - BPF_MOV64_REG(reg_loop_max, BPF_REG_1), - BPF_MOV32_IMM(reg_loop_cnt, 0), - BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3), - /* loop header, - * if reg_loop_cnt >= reg_loop_max skip the loop body - */ - BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5), - /* callback call, - * correct callback offset would be set after patching - */ - BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt), - BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx), - BPF_CALL_REL(0), - /* increment loop counter */ - BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1), - /* jump to loop header if callback returned 0 */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6), - /* return value of bpf_loop, - * set R0 to the number of iterations - */ - BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt), - /* restore original values of R6, R7, R8 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset), - BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset), - };
- *cnt = ARRAY_SIZE(insn_buf); - new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt); + /* Return error and jump to the end of the patch if + * expected number of iterations is too big. + */ + insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2); + insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG); + insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16); + /* spill R6, R7, R8 to use these as loop vars */ + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset); + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset); + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset); + /* initialize loop vars */ + insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1); + insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0); + insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3); + /* loop header, + * if reg_loop_cnt >= reg_loop_max skip the loop body + */ + insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5); + /* callback call, + * correct callback offset would be set after patching + */ + insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt); + insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx); + insn_buf[cnt++] = BPF_CALL_REL(0); + /* increment loop counter */ + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1); + /* jump to loop header if callback returned 0 */ + insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6); + /* return value of bpf_loop, + * set R0 to the number of iterations + */ + insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt); + /* restore original values of R6, R7, R8 */ + insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset); + insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset); + insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset); + + *total_cnt = cnt; + new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt); if (!new_prog) return new_prog;
@@@ -21423,10 -21243,10 +21435,10 @@@ static int optimize_bpf_loop(struct bpf return 0; }
-/* Remove unnecessary spill/fill pairs, members of nocsr pattern, +/* Remove unnecessary spill/fill pairs, members of fastcall pattern, * adjust subprograms stack depth when possible. */ -static int remove_nocsr_spills_fills(struct bpf_verifier_env *env) +static int remove_fastcall_spills_fills(struct bpf_verifier_env *env) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn_aux_data *aux = env->insn_aux_data; @@@ -21437,8 -21257,8 +21449,8 @@@ int i, j;
for (i = 0; i < insn_cnt; i++, insn++) { - if (aux[i].nocsr_spills_num > 0) { - spills_num = aux[i].nocsr_spills_num; + if (aux[i].fastcall_spills_num > 0) { + spills_num = aux[i].fastcall_spills_num; /* NOPs would be removed by opt_remove_nops() */ for (j = 1; j <= spills_num; ++j) { *(insn - j) = NOP; @@@ -21447,8 -21267,8 +21459,8 @@@ modified = true; } if ((subprog + 1)->start == i + 1) { - if (modified && !subprog->keep_nocsr_stack) - subprog->stack_depth = -subprog->nocsr_stack_off; + if (modified && !subprog->keep_fastcall_stack) + subprog->stack_depth = -subprog->fastcall_stack_off; subprog++; modified = false; } @@@ -21847,13 -21667,11 +21859,13 @@@ int bpf_check_attach_target(struct bpf_ { bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING; + char trace_symbol[KSYM_SYMBOL_LEN]; const char prefix[] = "btf_trace_"; + struct bpf_raw_event_map *btp; int ret = 0, subprog = -1, i; const struct btf_type *t; bool conservative = true; - const char *tname; + const char *tname, *fname; struct btf *btf; long addr = 0; struct module *mod = NULL; @@@ -21984,34 -21802,10 +21996,34 @@@ return -EINVAL; } tname += sizeof(prefix) - 1; - t = btf_type_by_id(btf, t->type); - if (!btf_type_is_ptr(t)) - /* should never happen in valid vmlinux build */ + + /* The func_proto of "btf_trace_##tname" is generated from typedef without argument + * names. Thus using bpf_raw_event_map to get argument names. + */ + btp = bpf_get_raw_tracepoint(tname); + if (!btp) return -EINVAL; + fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL, + trace_symbol); + bpf_put_raw_tracepoint(btp); + + if (fname) + ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC); + + if (!fname || ret < 0) { + bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n", + prefix, tname); + t = btf_type_by_id(btf, t->type); + if (!btf_type_is_ptr(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + } else { + t = btf_type_by_id(btf, ret); + if (!btf_type_is_func(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + } + t = btf_type_by_id(btf, t->type); if (!btf_type_is_func_proto(t)) /* should never happen in valid vmlinux build */ @@@ -22397,7 -22191,7 +22409,7 @@@ int bpf_check(struct bpf_prog **prog, u if (ret < 0) goto skip_full_check;
- ret = mark_nocsr_patterns(env); + ret = mark_fastcall_patterns(env); if (ret < 0) goto skip_full_check;
@@@ -22414,7 -22208,7 +22426,7 @@@ skip_full_check * allocate additional slots. */ if (ret == 0) - ret = remove_nocsr_spills_fills(env); + ret = remove_fastcall_spills_fills(env);
if (ret == 0) ret = check_max_stack_depth(env); diff --combined net/core/sock_map.c index 724b6856fcc3e,0f5f80f44d520..242c91a6e3d38 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@@ -67,46 -67,39 +67,39 @@@ static struct bpf_map *sock_map_alloc(u
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { - u32 ufd = attr->target_fd; struct bpf_map *map; - struct fd f; int ret;
if (attr->attach_flags || attr->replace_bpf_fd) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->target_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); mutex_lock(&sockmap_mutex); ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type); mutex_unlock(&sockmap_mutex); - fdput(f); return ret; }
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) { - u32 ufd = attr->target_fd; struct bpf_prog *prog; struct bpf_map *map; - struct fd f; int ret;
if (attr->attach_flags || attr->replace_bpf_fd) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->target_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map);
prog = bpf_prog_get(attr->attach_bpf_fd); - if (IS_ERR(prog)) { - ret = PTR_ERR(prog); - goto put_map; - } + if (IS_ERR(prog)) + return PTR_ERR(prog);
if (prog->type != ptype) { ret = -EINVAL; @@@ -118,8 -111,6 +111,6 @@@ mutex_unlock(&sockmap_mutex); put_prog: bpf_prog_put(prog); - put_map: - fdput(f); return ret; }
@@@ -1183,7 -1174,6 +1174,7 @@@ static void sock_hash_free(struct bpf_m sock_put(elem->sk); sock_hash_free_elem(htab, elem); } + cond_resched(); }
/* wait for psock readers accessing its map link */ @@@ -1551,18 -1541,17 +1542,17 @@@ int sock_map_bpf_prog_query(const unio union bpf_attr __user *uattr) { __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); - u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd; + u32 prog_cnt = 0, flags = 0; struct bpf_prog **pprog; struct bpf_prog *prog; struct bpf_map *map; - struct fd f; u32 id = 0; int ret;
if (attr->query.query_flags) return -EINVAL;
- f = fdget(ufd); + CLASS(fd, f)(attr->target_fd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@@ -1594,7 -1583,6 +1584,6 @@@ end copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) ret = -EFAULT;
- fdput(f); return ret; }
diff --combined security/security.c index 4564a0a1e4ef3,d8d0b67ced250..6875eb4a59fcc --- a/security/security.c +++ b/security/security.c @@@ -28,29 -28,30 +28,29 @@@ #include <linux/xattr.h> #include <linux/msg.h> #include <linux/overflow.h> +#include <linux/perf_event.h> +#include <linux/fs.h> #include <net/flow.h> +#include <net/sock.h>
-/* How many LSMs were built into the kernel? */ -#define LSM_COUNT (__end_lsm_info - __start_lsm_info) +#define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
/* - * How many LSMs are built into the kernel as determined at - * build time. Used to determine fixed array sizes. - * The capability module is accounted for by CONFIG_SECURITY - */ -#define LSM_CONFIG_COUNT ( \ - (IS_ENABLED(CONFIG_SECURITY) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_SELINUX) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_SMACK) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_TOMOYO) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_APPARMOR) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_YAMA) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_LOADPIN) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_SAFESETID) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_BPF_LSM) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_IMA) ? 1 : 0) + \ - (IS_ENABLED(CONFIG_EVM) ? 1 : 0)) + * Identifier for the LSM static calls. + * HOOK is an LSM hook as defined in linux/lsm_hookdefs.h + * IDX is the index of the static call. 0 <= NUM < MAX_LSM_COUNT + */ +#define LSM_STATIC_CALL(HOOK, IDX) lsm_static_call_##HOOK##_##IDX + +/* + * Call the macro M for each LSM hook MAX_LSM_COUNT times. + */ +#define LSM_LOOP_UNROLL(M, ...) \ +do { \ + UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__) \ +} while (0) + +#define LSM_DEFINE_UNROLL(M, ...) UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__)
/* * These are descriptions of the reasons that can be passed to the @@@ -91,6 -92,7 +91,6 @@@ const char *const lockdown_reasons[LOCK [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality", };
-struct security_hook_heads security_hook_heads __ro_after_init; static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
static struct kmem_cache *lsm_file_cache; @@@ -106,58 -108,9 +106,58 @@@ static __initdata const char *chosen_ma static __initconst const char *const builtin_lsm_order = CONFIG_LSM;
/* Ordered list of LSMs to initialize. */ -static __initdata struct lsm_info **ordered_lsms; +static __initdata struct lsm_info *ordered_lsms[MAX_LSM_COUNT + 1]; static __initdata struct lsm_info *exclusive;
+#ifdef CONFIG_HAVE_STATIC_CALL +#define LSM_HOOK_TRAMP(NAME, NUM) \ + &STATIC_CALL_TRAMP(LSM_STATIC_CALL(NAME, NUM)) +#else +#define LSM_HOOK_TRAMP(NAME, NUM) NULL +#endif + +/* + * Define static calls and static keys for each LSM hook. + */ +#define DEFINE_LSM_STATIC_CALL(NUM, NAME, RET, ...) \ + DEFINE_STATIC_CALL_NULL(LSM_STATIC_CALL(NAME, NUM), \ + *((RET(*)(__VA_ARGS__))NULL)); \ + DEFINE_STATIC_KEY_FALSE(SECURITY_HOOK_ACTIVE_KEY(NAME, NUM)); + +#define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + LSM_DEFINE_UNROLL(DEFINE_LSM_STATIC_CALL, NAME, RET, __VA_ARGS__) +#include <linux/lsm_hook_defs.h> +#undef LSM_HOOK +#undef DEFINE_LSM_STATIC_CALL + +/* + * Initialise a table of static calls for each LSM hook. + * DEFINE_STATIC_CALL_NULL invocation above generates a key (STATIC_CALL_KEY) + * and a trampoline (STATIC_CALL_TRAMP) which are used to call + * __static_call_update when updating the static call. + * + * The static calls table is used by early LSMs, some architectures can fault on + * unaligned accesses and the fault handling code may not be ready by then. + * Thus, the static calls table should be aligned to avoid any unhandled faults + * in early init. + */ +struct lsm_static_calls_table + static_calls_table __ro_after_init __aligned(sizeof(u64)) = { +#define INIT_LSM_STATIC_CALL(NUM, NAME) \ + (struct lsm_static_call) { \ + .key = &STATIC_CALL_KEY(LSM_STATIC_CALL(NAME, NUM)), \ + .trampoline = LSM_HOOK_TRAMP(NAME, NUM), \ + .active = &SECURITY_HOOK_ACTIVE_KEY(NAME, NUM), \ + }, +#define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + .NAME = { \ + LSM_DEFINE_UNROLL(INIT_LSM_STATIC_CALL, NAME) \ + }, +#include <linux/lsm_hook_defs.h> +#undef LSM_HOOK +#undef INIT_LSM_STATIC_CALL + }; + static __initdata bool debug; #define init_debug(...) \ do { \ @@@ -218,7 -171,7 +218,7 @@@ static void __init append_ordered_lsm(s if (exists_ordered_lsm(lsm)) return;
- if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from)) + if (WARN(last_lsm == MAX_LSM_COUNT, "%s: out of LSM static calls!?\n", from)) return;
/* Enable this LSM, if it is not already set. */ @@@ -265,7 -218,6 +265,7 @@@ static void __init lsm_set_blob_sizes(s
lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); + lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib); /* * The inode blob gets an rcu_head in addition to * what the modules might need. @@@ -274,16 -226,11 +274,16 @@@ blob_sizes.lbs_inode = sizeof(struct rcu_head); lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc); + lsm_set_blob_size(&needed->lbs_key, &blob_sizes.lbs_key); lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg); + lsm_set_blob_size(&needed->lbs_perf_event, &blob_sizes.lbs_perf_event); + lsm_set_blob_size(&needed->lbs_sock, &blob_sizes.lbs_sock); lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock); lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); + lsm_set_blob_size(&needed->lbs_tun_dev, &blob_sizes.lbs_tun_dev); lsm_set_blob_size(&needed->lbs_xattr_count, &blob_sizes.lbs_xattr_count); + lsm_set_blob_size(&needed->lbs_bdev, &blob_sizes.lbs_bdev); }
/* Prepare LSM for initialization. */ @@@ -321,7 -268,7 +321,7 @@@ static void __init initialize_lsm(struc * Current index to use while initializing the lsm id list. */ u32 lsm_active_cnt __ro_after_init; -const struct lsm_id *lsm_idlist[LSM_CONFIG_COUNT]; +const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
/* Populate ordered LSMs list from comma-separated LSM name list. */ static void __init ordered_lsm_parse(const char *order, const char *origin) @@@ -403,25 -350,6 +403,25 @@@ kfree(sep); }
+static void __init lsm_static_call_init(struct security_hook_list *hl) +{ + struct lsm_static_call *scall = hl->scalls; + int i; + + for (i = 0; i < MAX_LSM_COUNT; i++) { + /* Update the first static call that is not used yet */ + if (!scall->hl) { + __static_call_update(scall->key, scall->trampoline, + hl->hook.lsm_func_addr); + scall->hl = hl; + static_branch_enable(scall->active); + return; + } + scall++; + } + panic("%s - Ran out of static slots.\n", __func__); +} + static void __init lsm_early_cred(struct cred *cred); static void __init lsm_early_task(struct task_struct *task);
@@@ -450,6 -378,9 +450,6 @@@ static void __init ordered_lsm_init(voi { struct lsm_info **lsm;
- ordered_lsms = kcalloc(LSM_COUNT + 1, sizeof(*ordered_lsms), - GFP_KERNEL); - if (chosen_lsm_order) { if (chosen_major_lsm) { pr_warn("security=%s is ignored because it is superseded by lsm=%s\n", @@@ -467,20 -398,12 +467,20 @@@
init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); + init_debug("ib blob size = %d\n", blob_sizes.lbs_ib); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc); +#ifdef CONFIG_KEYS + init_debug("key blob size = %d\n", blob_sizes.lbs_key); +#endif /* CONFIG_KEYS */ init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg); + init_debug("sock blob size = %d\n", blob_sizes.lbs_sock); init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock); + init_debug("perf event blob size = %d\n", blob_sizes.lbs_perf_event); init_debug("task blob size = %d\n", blob_sizes.lbs_task); + init_debug("tun device blob size = %d\n", blob_sizes.lbs_tun_dev); init_debug("xattr slots = %d\n", blob_sizes.lbs_xattr_count); + init_debug("bdev blob size = %d\n", blob_sizes.lbs_bdev);
/* * Create any kmem_caches needed for blobs @@@ -498,12 -421,19 +498,12 @@@ lsm_early_task(current); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); - - kfree(ordered_lsms); }
int __init early_security_init(void) { struct lsm_info *lsm;
-#define LSM_HOOK(RET, DEFAULT, NAME, ...) \ - INIT_HLIST_HEAD(&security_hook_heads.NAME); -#include "linux/lsm_hook_defs.h" -#undef LSM_HOOK - for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) { if (!lsm->enabled) lsm->enabled = &lsm_enabled_true; @@@ -624,14 -554,14 +624,14 @@@ void __init security_add_hooks(struct s * Look at the previous entry, if there is one, for duplication. */ if (lsm_active_cnt == 0 || lsm_idlist[lsm_active_cnt - 1] != lsmid) { - if (lsm_active_cnt >= LSM_CONFIG_COUNT) + if (lsm_active_cnt >= MAX_LSM_COUNT) panic("%s Too many LSMs registered.\n", __func__); lsm_idlist[lsm_active_cnt++] = lsmid; }
for (i = 0; i < count; i++) { hooks[i].lsmid = lsmid; - hlist_add_tail_rcu(&hooks[i].list, hooks[i].head); + lsm_static_call_init(&hooks[i]); }
/* @@@ -666,42 -596,27 +666,42 @@@ int unregister_blocking_lsm_notifier(st EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
/** - * lsm_cred_alloc - allocate a composite cred blob - * @cred: the cred that needs a blob + * lsm_blob_alloc - allocate a composite blob + * @dest: the destination for the blob + * @size: the size of the blob * @gfp: allocation type * - * Allocate the cred blob for all the modules + * Allocate a blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ -static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) +static int lsm_blob_alloc(void **dest, size_t size, gfp_t gfp) { - if (blob_sizes.lbs_cred == 0) { - cred->security = NULL; + if (size == 0) { + *dest = NULL; return 0; }
- cred->security = kzalloc(blob_sizes.lbs_cred, gfp); - if (cred->security == NULL) + *dest = kzalloc(size, gfp); + if (*dest == NULL) return -ENOMEM; return 0; }
+/** + * lsm_cred_alloc - allocate a composite cred blob + * @cred: the cred that needs a blob + * @gfp: allocation type + * + * Allocate the cred blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) +{ + return lsm_blob_alloc(&cred->security, blob_sizes.lbs_cred, gfp); +} + /** * lsm_early_cred - during initialization allocate a composite cred blob * @cred: the cred that needs a blob @@@ -745,7 -660,7 +745,7 @@@ static int lsm_file_alloc(struct file * * * Returns 0, or -ENOMEM if memory can't be allocated. */ -int lsm_inode_alloc(struct inode *inode) +static int lsm_inode_alloc(struct inode *inode) { if (!lsm_inode_cache) { inode->i_security = NULL; @@@ -768,7 -683,15 +768,7 @@@ */ static int lsm_task_alloc(struct task_struct *task) { - if (blob_sizes.lbs_task == 0) { - task->security = NULL; - return 0; - } - - task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL); - if (task->security == NULL) - return -ENOMEM; - return 0; + return lsm_blob_alloc(&task->security, blob_sizes.lbs_task, GFP_KERNEL); }
/** @@@ -781,23 -704,16 +781,23 @@@ */ static int lsm_ipc_alloc(struct kern_ipc_perm *kip) { - if (blob_sizes.lbs_ipc == 0) { - kip->security = NULL; - return 0; - } + return lsm_blob_alloc(&kip->security, blob_sizes.lbs_ipc, GFP_KERNEL); +}
- kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL); - if (kip->security == NULL) - return -ENOMEM; - return 0; +#ifdef CONFIG_KEYS +/** + * lsm_key_alloc - allocate a composite key blob + * @key: the key that needs a blob + * + * Allocate the key blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_key_alloc(struct key *key) +{ + return lsm_blob_alloc(&key->security, blob_sizes.lbs_key, GFP_KERNEL); } +#endif /* CONFIG_KEYS */
/** * lsm_msg_msg_alloc - allocate a composite msg_msg blob @@@ -809,29 -725,14 +809,29 @@@ */ static int lsm_msg_msg_alloc(struct msg_msg *mp) { - if (blob_sizes.lbs_msg_msg == 0) { - mp->security = NULL; + return lsm_blob_alloc(&mp->security, blob_sizes.lbs_msg_msg, + GFP_KERNEL); +} + +/** + * lsm_bdev_alloc - allocate a composite block_device blob + * @bdev: the block_device that needs a blob + * + * Allocate the block_device blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_bdev_alloc(struct block_device *bdev) +{ + if (blob_sizes.lbs_bdev == 0) { + bdev->bd_security = NULL; return 0; }
- mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL); - if (mp->security == NULL) + bdev->bd_security = kzalloc(blob_sizes.lbs_bdev, GFP_KERNEL); + if (!bdev->bd_security) return -ENOMEM; + return 0; }
@@@ -859,8 -760,15 +859,8 @@@ static void __init lsm_early_task(struc */ static int lsm_superblock_alloc(struct super_block *sb) { - if (blob_sizes.lbs_superblock == 0) { - sb->s_security = NULL; - return 0; - } - - sb->s_security = kzalloc(blob_sizes.lbs_superblock, GFP_KERNEL); - if (sb->s_security == NULL) - return -ENOMEM; - return 0; + return lsm_blob_alloc(&sb->s_security, blob_sizes.lbs_superblock, + GFP_KERNEL); }
/** @@@ -945,43 -853,29 +945,43 @@@ out * call_int_hook: * This is a hook that returns a value. */ +#define __CALL_STATIC_VOID(NUM, HOOK, ...) \ +do { \ + if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) { \ + static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__); \ + } \ +} while (0);
-#define call_void_hook(FUNC, ...) \ - do { \ - struct security_hook_list *P; \ - \ - hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \ - P->hook.FUNC(__VA_ARGS__); \ +#define call_void_hook(HOOK, ...) \ + do { \ + LSM_LOOP_UNROLL(__CALL_STATIC_VOID, HOOK, __VA_ARGS__); \ } while (0)
-#define call_int_hook(FUNC, ...) ({ \ - int RC = LSM_RET_DEFAULT(FUNC); \ - do { \ - struct security_hook_list *P; \ - \ - hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \ - RC = P->hook.FUNC(__VA_ARGS__); \ - if (RC != LSM_RET_DEFAULT(FUNC)) \ - break; \ - } \ - } while (0); \ - RC; \ + +#define __CALL_STATIC_INT(NUM, R, HOOK, LABEL, ...) \ +do { \ + if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) { \ + R = static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__); \ + if (R != LSM_RET_DEFAULT(HOOK)) \ + goto LABEL; \ + } \ +} while (0); + +#define call_int_hook(HOOK, ...) \ +({ \ + __label__ OUT; \ + int RC = LSM_RET_DEFAULT(HOOK); \ + \ + LSM_LOOP_UNROLL(__CALL_STATIC_INT, RC, HOOK, OUT, __VA_ARGS__); \ +OUT: \ + RC; \ })
+#define lsm_for_each_hook(scall, NAME) \ + for (scall = static_calls_table.NAME; \ + scall - static_calls_table.NAME < MAX_LSM_COUNT; scall++) \ + if (static_key_enabled(&scall->active->key)) + /* Security operations */
/** @@@ -1216,19 -1110,20 +1216,19 @@@ int security_settime64(const struct tim */ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - struct security_hook_list *hp; + struct lsm_static_call *scall; int cap_sys_admin = 1; int rc;
/* - * The module will respond with a positive value if - * it thinks the __vm_enough_memory() call should be - * made with the cap_sys_admin set. If all of the modules - * agree that it should be set it will. If any module - * thinks it should not be set it won't. + * The module will respond with 0 if it thinks the __vm_enough_memory() + * call should be made with the cap_sys_admin set. If all of the modules + * agree that it should be set it will. If any module thinks it should + * not be set it won't. */ - hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) { - rc = hp->hook.vm_enough_memory(mm, pages); - if (rc <= 0) { + lsm_for_each_hook(scall, vm_enough_memory) { + rc = scall->hl->hook.vm_enough_memory(mm, pages); + if (rc < 0) { cap_sys_admin = 0; break; } @@@ -1374,12 -1269,13 +1374,12 @@@ int security_fs_context_dup(struct fs_c int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct security_hook_list *hp; + struct lsm_static_call *scall; int trc; int rc = -ENOPARAM;
- hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param, - list) { - trc = hp->hook.fs_context_parse_param(fc, param); + lsm_for_each_hook(scall, fs_context_parse_param) { + trc = scall->hl->hook.fs_context_parse_param(fc, param); if (trc == 0) rc = 0; else if (trc != -ENOPARAM) @@@ -1609,11 -1505,12 +1609,11 @@@ int security_sb_set_mnt_opts(struct sup unsigned long kern_flags, unsigned long *set_kern_flags) { - struct security_hook_list *hp; + struct lsm_static_call *scall; int rc = mnt_opts ? -EOPNOTSUPP : LSM_RET_DEFAULT(sb_set_mnt_opts);
- hlist_for_each_entry(hp, &security_hook_heads.sb_set_mnt_opts, - list) { - rc = hp->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags, + lsm_for_each_hook(scall, sb_set_mnt_opts) { + rc = scall->hl->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags, set_kern_flags); if (rc != LSM_RET_DEFAULT(sb_set_mnt_opts)) break; @@@ -1699,8 -1596,9 +1699,8 @@@ int security_inode_alloc(struct inode *
static void inode_free_by_rcu(struct rcu_head *head) { - /* - * The rcu head is at the start of the inode blob - */ + /* The rcu head is at the start of the inode blob */ + call_void_hook(inode_free_security_rcu, head); kmem_cache_free(lsm_inode_cache, head); }
@@@ -1708,24 -1606,23 +1708,24 @@@ * security_inode_free() - Free an inode's LSM blob * @inode: the inode * - * Deallocate the inode security structure and set @inode->i_security to NULL. + * Release any LSM resources associated with @inode, although due to the + * inode's RCU protections it is possible that the resources will not be + * fully released until after the current RCU grace period has elapsed. + * + * It is important for LSMs to note that despite being present in a call to + * security_inode_free(), @inode may still be referenced in a VFS path walk + * and calls to security_inode_permission() may be made during, or after, + * a call to security_inode_free(). For this reason the inode->i_security + * field is released via a call_rcu() callback and any LSMs which need to + * retain inode state for use in security_inode_permission() should only + * release that state in the inode_free_security_rcu() LSM hook callback. */ void security_inode_free(struct inode *inode) { call_void_hook(inode_free_security, inode); - /* - * The inode may still be referenced in a path walk and - * a call to security_inode_permission() can be made - * after inode_free_security() is called. Ideally, the VFS - * wouldn't do this, but fixing that is a much harder - * job. For now, simply free the i_security via RCU, and - * leave the current inode->i_security pointer intact. - * The inode will be freed after the RCU grace period too. - */ - if (inode->i_security) - call_rcu((struct rcu_head *)inode->i_security, - inode_free_by_rcu); + if (!inode->i_security) + return; + call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu); }
/** @@@ -1808,7 -1705,7 +1808,7 @@@ int security_inode_init_security(struc const struct qstr *qstr, const initxattrs initxattrs, void *fs_data) { - struct security_hook_list *hp; + struct lsm_static_call *scall; struct xattr *new_xattrs = NULL; int ret = -EOPNOTSUPP, xattr_count = 0;
@@@ -1826,8 -1723,9 +1826,8 @@@ return -ENOMEM; }
- hlist_for_each_entry(hp, &security_hook_heads.inode_init_security, - list) { - ret = hp->hook.inode_init_security(inode, dir, qstr, new_xattrs, + lsm_for_each_hook(scall, inode_init_security) { + ret = scall->hl->hook.inode_init_security(inode, dir, qstr, new_xattrs, &xattr_count); if (ret && ret != -EOPNOTSUPP) goto out; @@@ -2763,14 -2661,19 +2763,14 @@@ EXPORT_SYMBOL(security_inode_copy_up) * lower layer to the union/overlay layer. The caller is responsible for * reading and writing the xattrs, this hook is merely a filter. * - * Return: Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP - * if the security module does not know about attribute, or a negative - * error code to abort the copy up. + * Return: Returns 0 to accept the xattr, -ECANCELED to discard the xattr, + * -EOPNOTSUPP if the security module does not know about attribute, + * or a negative error code to abort the copy up. */ int security_inode_copy_up_xattr(struct dentry *src, const char *name) { int rc;
- /* - * The implementation can return 0 (accept the xattr), 1 (discard the - * xattr), -EOPNOTSUPP if it does not know anything about the xattr or - * any other error code in case of an error. - */ rc = call_int_hook(inode_copy_up_xattr, src, name); if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr)) return rc; @@@ -2779,26 -2682,6 +2779,26 @@@ } EXPORT_SYMBOL(security_inode_copy_up_xattr);
+/** + * security_inode_setintegrity() - Set the inode's integrity data + * @inode: inode + * @type: type of integrity, e.g. hash digest, signature, etc + * @value: the integrity value + * @size: size of the integrity value + * + * Register a verified integrity measurement of a inode with LSMs. + * LSMs should free the previously saved data if @value is NULL. + * + * Return: Returns 0 on success, negative values on failure. + */ +int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, const void *value, + size_t size) +{ + return call_int_hook(inode_setintegrity, inode, type, value, size); +} +EXPORT_SYMBOL(security_inode_setintegrity); + /** * security_kernfs_init_security() - Init LSM context for a kernfs node * @kn_dir: parent kernfs node @@@ -3048,8 -2931,6 +3048,8 @@@ int security_file_fcntl(struct file *fi * Save owner security information (typically from current->security) in * file->f_security for later use by the send_sigiotask hook. * + * This hook is called with file->f_owner.lock held. + * * Return: Returns 0 on success. */ void security_file_set_fowner(struct file *file) @@@ -3676,10 -3557,10 +3676,10 @@@ int security_task_prctl(int option, uns { int thisrc; int rc = LSM_RET_DEFAULT(task_prctl); - struct security_hook_list *hp; + struct lsm_static_call *scall;
- hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) { - thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5); + lsm_for_each_hook(scall, task_prctl) { + thisrc = scall->hl->hook.task_prctl(option, arg2, arg3, arg4, arg5); if (thisrc != LSM_RET_DEFAULT(task_prctl)) { rc = thisrc; if (thisrc != 0) @@@ -4085,7 -3966,7 +4085,7 @@@ EXPORT_SYMBOL(security_d_instantiate) int security_getselfattr(unsigned int attr, struct lsm_ctx __user *uctx, u32 __user *size, u32 flags) { - struct security_hook_list *hp; + struct lsm_static_call *scall; struct lsm_ctx lctx = { .id = LSM_ID_UNDEF, }; u8 __user *base = (u8 __user *)uctx; u32 entrysize; @@@ -4123,13 -4004,13 +4123,13 @@@ * In the usual case gather all the data from the LSMs. * In the single case only get the data from the LSM specified. */ - hlist_for_each_entry(hp, &security_hook_heads.getselfattr, list) { - if (single && lctx.id != hp->lsmid->id) + lsm_for_each_hook(scall, getselfattr) { + if (single && lctx.id != scall->hl->lsmid->id) continue; entrysize = left; if (base) uctx = (struct lsm_ctx __user *)(base + total); - rc = hp->hook.getselfattr(attr, uctx, &entrysize, flags); + rc = scall->hl->hook.getselfattr(attr, uctx, &entrysize, flags); if (rc == -EOPNOTSUPP) { rc = 0; continue; @@@ -4178,7 -4059,7 +4178,7 @@@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx, u32 size, u32 flags) { - struct security_hook_list *hp; + struct lsm_static_call *scall; struct lsm_ctx *lctx; int rc = LSM_RET_DEFAULT(setselfattr); u64 required_len; @@@ -4201,9 -4082,9 +4201,9 @@@ goto free_out; }
- hlist_for_each_entry(hp, &security_hook_heads.setselfattr, list) - if ((hp->lsmid->id) == lctx->id) { - rc = hp->hook.setselfattr(attr, lctx, size, flags); + lsm_for_each_hook(scall, setselfattr) + if ((scall->hl->lsmid->id) == lctx->id) { + rc = scall->hl->hook.setselfattr(attr, lctx, size, flags); break; }
@@@ -4226,12 -4107,12 +4226,12 @@@ free_out int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value) { - struct security_hook_list *hp; + struct lsm_static_call *scall;
- hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) { - if (lsmid != 0 && lsmid != hp->lsmid->id) + lsm_for_each_hook(scall, getprocattr) { + if (lsmid != 0 && lsmid != scall->hl->lsmid->id) continue; - return hp->hook.getprocattr(p, name, value); + return scall->hl->hook.getprocattr(p, name, value); } return LSM_RET_DEFAULT(getprocattr); } @@@ -4250,12 -4131,12 +4250,12 @@@ */ int security_setprocattr(int lsmid, const char *name, void *value, size_t size) { - struct security_hook_list *hp; + struct lsm_static_call *scall;
- hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) { - if (lsmid != 0 && lsmid != hp->lsmid->id) + lsm_for_each_hook(scall, setprocattr) { + if (lsmid != 0 && lsmid != scall->hl->lsmid->id) continue; - return hp->hook.setprocattr(name, value, size); + return scall->hl->hook.setprocattr(name, value, size); } return LSM_RET_DEFAULT(setprocattr); } @@@ -4792,20 -4673,6 +4792,20 @@@ int security_socket_getpeersec_dgram(st } EXPORT_SYMBOL(security_socket_getpeersec_dgram);
+/** + * lsm_sock_alloc - allocate a composite sock blob + * @sock: the sock that needs a blob + * @gfp: allocation mode + * + * Allocate the sock blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_sock_alloc(struct sock *sock, gfp_t gfp) +{ + return lsm_blob_alloc(&sock->sk_security, blob_sizes.lbs_sock, gfp); +} + /** * security_sk_alloc() - Allocate and initialize a sock's LSM blob * @sk: sock @@@ -4819,14 -4686,7 +4819,14 @@@ */ int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { - return call_int_hook(sk_alloc_security, sk, family, priority); + int rc = lsm_sock_alloc(sk, priority); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(sk_alloc_security, sk, family, priority); + if (unlikely(rc)) + security_sk_free(sk); + return rc; }
/** @@@ -4838,8 -4698,6 +4838,8 @@@ void security_sk_free(struct sock *sk) { call_void_hook(sk_free_security, sk); + kfree(sk->sk_security); + sk->sk_security = NULL; }
/** @@@ -4987,18 -4845,7 +4987,18 @@@ EXPORT_SYMBOL(security_secmark_refcount */ int security_tun_dev_alloc_security(void **security) { - return call_int_hook(tun_dev_alloc_security, security); + int rc; + + rc = lsm_blob_alloc(security, blob_sizes.lbs_tun_dev, GFP_KERNEL); + if (rc) + return rc; + + rc = call_int_hook(tun_dev_alloc_security, *security); + if (rc) { + kfree(*security); + *security = NULL; + } + return rc; } EXPORT_SYMBOL(security_tun_dev_alloc_security);
@@@ -5010,7 -4857,7 +5010,7 @@@ */ void security_tun_dev_free_security(void *security) { - call_void_hook(tun_dev_free_security, security); + kfree(security); } EXPORT_SYMBOL(security_tun_dev_free_security);
@@@ -5206,18 -5053,7 +5206,18 @@@ EXPORT_SYMBOL(security_ib_endport_manag */ int security_ib_alloc_security(void **sec) { - return call_int_hook(ib_alloc_security, sec); + int rc; + + rc = lsm_blob_alloc(sec, blob_sizes.lbs_ib, GFP_KERNEL); + if (rc) + return rc; + + rc = call_int_hook(ib_alloc_security, *sec); + if (rc) { + kfree(*sec); + *sec = NULL; + } + return rc; } EXPORT_SYMBOL(security_ib_alloc_security);
@@@ -5229,7 -5065,7 +5229,7 @@@ */ void security_ib_free_security(void *sec) { - call_void_hook(ib_free_security, sec); + kfree(sec); } EXPORT_SYMBOL(security_ib_free_security); #endif /* CONFIG_SECURITY_INFINIBAND */ @@@ -5387,7 -5223,7 +5387,7 @@@ int security_xfrm_state_pol_flow_match( struct xfrm_policy *xp, const struct flowi_common *flic) { - struct security_hook_list *hp; + struct lsm_static_call *scall; int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match);
/* @@@ -5399,8 -5235,9 +5399,8 @@@ * For speed optimization, we explicitly break the loop rather than * using the macro */ - hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match, - list) { - rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic); + lsm_for_each_hook(scall, xfrm_state_pol_flow_match) { + rc = scall->hl->hook.xfrm_state_pol_flow_match(x, xp, flic); break; } return rc; @@@ -5445,14 -5282,7 +5445,14 @@@ EXPORT_SYMBOL(security_skb_classify_flo int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { - return call_int_hook(key_alloc, key, cred, flags); + int rc = lsm_key_alloc(key); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(key_alloc, key, cred, flags); + if (unlikely(rc)) + security_key_free(key); + return rc; }
/** @@@ -5463,8 -5293,7 +5463,8 @@@ */ void security_key_free(struct key *key) { - call_void_hook(key_free, key); + kfree(key->security); + key->security = NULL; }
/** @@@ -5681,7 -5510,7 +5681,7 @@@ int security_bpf_prog_load(struct bpf_p * Return: Returns 0 on success, error on failure. */ int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) { return call_int_hook(bpf_token_create, token, attr, path); } @@@ -5767,85 -5596,6 +5767,85 @@@ int security_locked_down(enum lockdown_ } EXPORT_SYMBOL(security_locked_down);
+/** + * security_bdev_alloc() - Allocate a block device LSM blob + * @bdev: block device + * + * Allocate and attach a security structure to @bdev->bd_security. The + * security field is initialized to NULL when the bdev structure is + * allocated. + * + * Return: Return 0 if operation was successful. + */ +int security_bdev_alloc(struct block_device *bdev) +{ + int rc = 0; + + rc = lsm_bdev_alloc(bdev); + if (unlikely(rc)) + return rc; + + rc = call_int_hook(bdev_alloc_security, bdev); + if (unlikely(rc)) + security_bdev_free(bdev); + + return rc; +} +EXPORT_SYMBOL(security_bdev_alloc); + +/** + * security_bdev_free() - Free a block device's LSM blob + * @bdev: block device + * + * Deallocate the bdev security structure and set @bdev->bd_security to NULL. + */ +void security_bdev_free(struct block_device *bdev) +{ + if (!bdev->bd_security) + return; + + call_void_hook(bdev_free_security, bdev); + + kfree(bdev->bd_security); + bdev->bd_security = NULL; +} +EXPORT_SYMBOL(security_bdev_free); + +/** + * security_bdev_setintegrity() - Set the device's integrity data + * @bdev: block device + * @type: type of integrity, e.g. hash digest, signature, etc + * @value: the integrity value + * @size: size of the integrity value + * + * Register a verified integrity measurement of a bdev with LSMs. + * LSMs should free the previously saved data if @value is NULL. + * Please note that the new hook should be invoked every time the security + * information is updated to keep these data current. For example, in dm-verity, + * if the mapping table is reloaded and configured to use a different dm-verity + * target with a new roothash and signing information, the previously stored + * data in the LSM blob will become obsolete. It is crucial to re-invoke the + * hook to refresh these data and ensure they are up to date. This necessity + * arises from the design of device-mapper, where a device-mapper device is + * first created, and then targets are subsequently loaded into it. These + * targets can be modified multiple times during the device's lifetime. + * Therefore, while the LSM blob is allocated during the creation of the block + * device, its actual contents are not initialized at this stage and can change + * substantially over time. This includes alterations from data that the LSMs + * 'trusts' to those they do not, making it essential to handle these changes + * correctly. Failure to address this dynamic aspect could potentially allow + * for bypassing LSM checks. + * + * Return: Returns 0 on success, negative values on failure. + */ +int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, const void *value, + size_t size) +{ + return call_int_hook(bdev_setintegrity, bdev, type, value, size); +} +EXPORT_SYMBOL(security_bdev_setintegrity); + #ifdef CONFIG_PERF_EVENTS /** * security_perf_event_open() - Check if a perf event open is allowed @@@ -5871,19 -5621,7 +5871,19 @@@ int security_perf_event_open(struct per */ int security_perf_event_alloc(struct perf_event *event) { - return call_int_hook(perf_event_alloc, event); + int rc; + + rc = lsm_blob_alloc(&event->security, blob_sizes.lbs_perf_event, + GFP_KERNEL); + if (rc) + return rc; + + rc = call_int_hook(perf_event_alloc, event); + if (rc) { + kfree(event->security); + event->security = NULL; + } + return rc; }
/** @@@ -5894,8 -5632,7 +5894,8 @@@ */ void security_perf_event_free(struct perf_event *event) { - call_void_hook(perf_event_free, event); + kfree(event->security); + event->security = NULL; }
/** @@@ -5966,13 -5703,3 +5966,13 @@@ int security_uring_cmd(struct io_uring_ return call_int_hook(uring_cmd, ioucmd); } #endif /* CONFIG_IO_URING */ + +/** + * security_initramfs_populated() - Notify LSMs that initramfs has been loaded + * + * Tells the LSMs the initramfs has been unpacked into the rootfs. + */ +void security_initramfs_populated(void) +{ + call_void_hook(initramfs_populated); +} diff --combined security/selinux/hooks.c index 94c5231401259,0eec141a8f37e..fc926d3cac6e2 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@@ -282,13 -282,8 +282,13 @@@ static int __inode_security_revalidate(
might_sleep_if(may_sleep);
+ /* + * The check of isec->initialized below is racy but + * inode_doinit_with_dentry() will recheck with + * isec->lock held. + */ if (selinux_initialized() && - isec->initialized != LABEL_INITIALIZED) { + data_race(isec->initialized != LABEL_INITIALIZED)) { if (!may_sleep) return -ECHILD;
@@@ -2207,16 -2202,23 +2207,16 @@@ static int selinux_syslog(int type }
/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 0 means there is enough memory for the allocation to - * succeed and -ENOMEM implies there is not. + * Check permission for allocating a new virtual mapping. Returns + * 0 if permission is granted, negative error code if not. * * Do not audit the selinux permission check, as this is applied to all * processes that allocate mappings. */ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) { - int rc, cap_sys_admin = 0; - - rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN, - CAP_OPT_NOAUDIT, true); - if (rc == 0) - cap_sys_admin = 1; - - return cap_sys_admin; + return cred_has_capability(current_cred(), CAP_SYS_ADMIN, + CAP_OPT_NOAUDIT, true); }
/* binprm security operations */ @@@ -3536,8 -3538,8 +3536,8 @@@ static int selinux_inode_copy_up_xattr( * xattrs up. Instead, filter out SELinux-related xattrs following * policy load. */ - if (selinux_initialized() && strcmp(name, XATTR_NAME_SELINUX) == 0) - return 1; /* Discard */ + if (selinux_initialized() && !strcmp(name, XATTR_NAME_SELINUX)) + return -ECANCELED; /* Discard */ /* * Any other attribute apart from SELINUX is not claimed, supported * by selinux. @@@ -3850,17 -3852,7 +3850,17 @@@ static int selinux_file_mprotect(struc if (default_noexec && (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { int rc = 0; - if (vma_is_initial_heap(vma)) { + /* + * We don't use the vma_is_initial_heap() helper as it has + * a history of problems and is currently broken on systems + * where there is no heap, e.g. brk == start_brk. Before + * replacing the conditional below with vma_is_initial_heap(), + * or something similar, please ensure that the logic is the + * same as what we have below or you have tested every possible + * corner case you can think to test. + */ + if (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && (vma_is_initial_stack(vma) || @@@ -3948,7 -3940,7 +3948,7 @@@ static int selinux_file_send_sigiotask( struct file_security_struct *fsec;
/* struct fown_struct is never outside the context of a struct file */ - file = container_of(fown, struct file, f_owner); + file = fown->file;
fsec = selinux_file(file);
@@@ -4592,7 -4584,7 +4592,7 @@@ static int socket_sockcreate_sid(const
static int sock_has_perm(struct sock *sk, u32 perms) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); struct common_audit_data ad; struct lsm_network_audit net;
@@@ -4660,7 -4652,7 +4660,7 @@@ static int selinux_socket_post_create(s isec->initialized = LABEL_INITIALIZED;
if (sock->sk) { - sksec = sock->sk->sk_security; + sksec = selinux_sock(sock->sk); sksec->sclass = sclass; sksec->sid = sid; /* Allows detection of the first association on this socket */ @@@ -4676,8 -4668,8 +4676,8 @@@ static int selinux_socket_socketpair(struct socket *socka, struct socket *sockb) { - struct sk_security_struct *sksec_a = socka->sk->sk_security; - struct sk_security_struct *sksec_b = sockb->sk->sk_security; + struct sk_security_struct *sksec_a = selinux_sock(socka->sk); + struct sk_security_struct *sksec_b = selinux_sock(sockb->sk);
sksec_a->peer_sid = sksec_b->sid; sksec_b->peer_sid = sksec_a->sid; @@@ -4692,7 -4684,7 +4692,7 @@@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { struct sock *sk = sock->sk; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); u16 family; int err;
@@@ -4832,7 -4824,7 +4832,7 @@@ static int selinux_socket_connect_helpe struct sockaddr *address, int addrlen) { struct sock *sk = sock->sk; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); int err;
err = sock_has_perm(sk, SOCKET__CONNECT); @@@ -5010,9 -5002,9 +5010,9 @@@ static int selinux_socket_unix_stream_c struct sock *other, struct sock *newsk) { - struct sk_security_struct *sksec_sock = sock->sk_security; - struct sk_security_struct *sksec_other = other->sk_security; - struct sk_security_struct *sksec_new = newsk->sk_security; + struct sk_security_struct *sksec_sock = selinux_sock(sock); + struct sk_security_struct *sksec_other = selinux_sock(other); + struct sk_security_struct *sksec_new = selinux_sock(newsk); struct common_audit_data ad; struct lsm_network_audit net; int err; @@@ -5041,8 -5033,8 +5041,8 @@@ static int selinux_socket_unix_may_send(struct socket *sock, struct socket *other) { - struct sk_security_struct *ssec = sock->sk->sk_security; - struct sk_security_struct *osec = other->sk->sk_security; + struct sk_security_struct *ssec = selinux_sock(sock->sk); + struct sk_security_struct *osec = selinux_sock(other->sk); struct common_audit_data ad; struct lsm_network_audit net;
@@@ -5079,7 -5071,7 +5079,7 @@@ static int selinux_sock_rcv_skb_compat( u16 family) { int err = 0; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); u32 sk_sid = sksec->sid; struct common_audit_data ad; struct lsm_network_audit net; @@@ -5108,7 -5100,7 +5108,7 @@@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err, peerlbl_active, secmark_active; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); u16 family = sk->sk_family; u32 sk_sid = sksec->sid; struct common_audit_data ad; @@@ -5176,7 -5168,7 +5176,7 @@@ static int selinux_socket_getpeersec_st int err = 0; char *scontext = NULL; u32 scontext_len; - struct sk_security_struct *sksec = sock->sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sock->sk); u32 peer_sid = SECSID_NULL;
if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET || @@@ -5236,27 -5228,34 +5236,27 @@@ static int selinux_socket_getpeersec_dg
static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) { - struct sk_security_struct *sksec; - - sksec = kzalloc(sizeof(*sksec), priority); - if (!sksec) - return -ENOMEM; + struct sk_security_struct *sksec = selinux_sock(sk);
sksec->peer_sid = SECINITSID_UNLABELED; sksec->sid = SECINITSID_UNLABELED; sksec->sclass = SECCLASS_SOCKET; selinux_netlbl_sk_security_reset(sksec); - sk->sk_security = sksec;
return 0; }
static void selinux_sk_free_security(struct sock *sk) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk);
- sk->sk_security = NULL; selinux_netlbl_sk_security_free(sksec); - kfree(sksec); }
static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) { - struct sk_security_struct *sksec = sk->sk_security; - struct sk_security_struct *newsksec = newsk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); + struct sk_security_struct *newsksec = selinux_sock(newsk);
newsksec->sid = sksec->sid; newsksec->peer_sid = sksec->peer_sid; @@@ -5270,7 -5269,7 +5270,7 @@@ static void selinux_sk_getsecid(const s if (!sk) *secid = SECINITSID_ANY_SOCKET; else { - const struct sk_security_struct *sksec = sk->sk_security; + const struct sk_security_struct *sksec = selinux_sock(sk);
*secid = sksec->sid; } @@@ -5280,7 -5279,7 +5280,7 @@@ static void selinux_sock_graft(struct s { struct inode_security_struct *isec = inode_security_novalidate(SOCK_INODE(parent)); - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk);
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 || sk->sk_family == PF_UNIX) @@@ -5297,7 -5296,7 +5297,7 @@@ static int selinux_sctp_process_new_ass { struct sock *sk = asoc->base.sk; u16 family = sk->sk_family; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); struct common_audit_data ad; struct lsm_network_audit net; int err; @@@ -5352,7 -5351,7 +5352,7 @@@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(asoc->base.sk); u32 conn_sid; int err;
@@@ -5385,7 -5384,7 +5385,7 @@@ static int selinux_sctp_assoc_established(struct sctp_association *asoc, struct sk_buff *skb) { - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
if (!selinux_policycap_extsockclass()) return 0; @@@ -5484,8 -5483,8 +5484,8 @@@ static int selinux_sctp_bind_connect(st static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk) { - struct sk_security_struct *sksec = sk->sk_security; - struct sk_security_struct *newsksec = newsk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); + struct sk_security_struct *newsksec = selinux_sock(newsk);
/* If policy does not support SECCLASS_SCTP_SOCKET then call * the non-sctp clone version. @@@ -5501,8 -5500,8 +5501,8 @@@
static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk) { - struct sk_security_struct *ssksec = ssk->sk_security; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *ssksec = selinux_sock(ssk); + struct sk_security_struct *sksec = selinux_sock(sk);
ssksec->sclass = sksec->sclass; ssksec->sid = sksec->sid; @@@ -5517,7 -5516,7 +5517,7 @@@ static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); int err; u16 family = req->rsk_ops->family; u32 connsid; @@@ -5538,7 -5537,7 +5538,7 @@@ static void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) { - struct sk_security_struct *newsksec = newsk->sk_security; + struct sk_security_struct *newsksec = selinux_sock(newsk);
newsksec->sid = req->secid; newsksec->peer_sid = req->peer_secid; @@@ -5555,7 -5554,7 +5555,7 @@@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) { u16 family = sk->sk_family; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk);
/* handle mapped IPv4 packets arriving via IPv6 sockets */ if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) @@@ -5586,14 -5585,24 +5586,14 @@@ static void selinux_req_classify_flow(c flic->flowic_secid = req->secid; }
-static int selinux_tun_dev_alloc_security(void **security) +static int selinux_tun_dev_alloc_security(void *security) { - struct tun_security_struct *tunsec; + struct tun_security_struct *tunsec = selinux_tun_dev(security);
- tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL); - if (!tunsec) - return -ENOMEM; tunsec->sid = current_sid(); - - *security = tunsec; return 0; }
-static void selinux_tun_dev_free_security(void *security) -{ - kfree(security); -} - static int selinux_tun_dev_create(void) { u32 sid = current_sid(); @@@ -5611,7 -5620,7 +5611,7 @@@
static int selinux_tun_dev_attach_queue(void *security) { - struct tun_security_struct *tunsec = security; + struct tun_security_struct *tunsec = selinux_tun_dev(security);
return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__ATTACH_QUEUE, NULL); @@@ -5619,8 -5628,8 +5619,8 @@@
static int selinux_tun_dev_attach(struct sock *sk, void *security) { - struct tun_security_struct *tunsec = security; - struct sk_security_struct *sksec = sk->sk_security; + struct tun_security_struct *tunsec = selinux_tun_dev(security); + struct sk_security_struct *sksec = selinux_sock(sk);
/* we don't currently perform any NetLabel based labeling here and it * isn't clear that we would want to do so anyway; while we could apply @@@ -5637,7 -5646,7 +5637,7 @@@
static int selinux_tun_dev_open(void *security) { - struct tun_security_struct *tunsec = security; + struct tun_security_struct *tunsec = selinux_tun_dev(security); u32 sid = current_sid(); int err;
@@@ -5743,7 -5752,7 +5743,7 @@@ static unsigned int selinux_ip_output(v return NF_ACCEPT;
/* standard practice, label using the parent socket */ - sksec = sk->sk_security; + sksec = selinux_sock(sk); sid = sksec->sid; } else sid = SECINITSID_KERNEL; @@@ -5766,7 -5775,7 +5766,7 @@@ static unsigned int selinux_ip_postrout sk = skb_to_full_sk(skb); if (sk == NULL) return NF_ACCEPT; - sksec = sk->sk_security; + sksec = selinux_sock(sk);
ad_net_init_from_iif(&ad, &net, state->out->ifindex, state->pf); if (selinux_parse_skb(skb, &ad, NULL, 0, &proto)) @@@ -5855,7 -5864,7 +5855,7 @@@ static unsigned int selinux_ip_postrout u32 skb_sid; struct sk_security_struct *sksec;
- sksec = sk->sk_security; + sksec = selinux_sock(sk); if (selinux_skb_peerlbl_sid(skb, family, &skb_sid)) return NF_DROP; /* At this point, if the returned skb peerlbl is SECSID_NULL @@@ -5884,7 -5893,7 +5884,7 @@@ } else { /* Locally generated packet, fetch the security label from the * associated socket. */ - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); peer_sid = sksec->sid; secmark_perm = PACKET__SEND; } @@@ -5927,7 -5936,7 +5927,7 @@@ static int selinux_netlink_send(struct unsigned int data_len = skb->len; unsigned char *data = skb->data; struct nlmsghdr *nlh; - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); u16 sclass = sksec->sclass; u32 perm;
@@@ -6641,8 -6650,8 +6641,8 @@@ static int selinux_inode_notifysecctx(s */ static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) { - return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX, - ctx, ctxlen, 0); + return __vfs_setxattr_locked(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX, + ctx, ctxlen, 0, NULL); }
static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) @@@ -6661,7 -6670,11 +6661,7 @@@ static int selinux_key_alloc(struct ke unsigned long flags) { const struct task_security_struct *tsec; - struct key_security_struct *ksec; - - ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); - if (!ksec) - return -ENOMEM; + struct key_security_struct *ksec = selinux_key(k);
tsec = selinux_cred(cred); if (tsec->keycreate_sid) @@@ -6669,9 -6682,18 +6669,9 @@@ else ksec->sid = tsec->sid;
- k->security = ksec; return 0; }
-static void selinux_key_free(struct key *k) -{ - struct key_security_struct *ksec = k->security; - - k->security = NULL; - kfree(ksec); -} - static int selinux_key_permission(key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) @@@ -6712,14 -6734,14 +6712,14 @@@
sid = cred_sid(cred); key = key_ref_to_ptr(key_ref); - ksec = key->security; + ksec = selinux_key(key);
return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL); }
static int selinux_key_getsecurity(struct key *key, char **_buffer) { - struct key_security_struct *ksec = key->security; + struct key_security_struct *ksec = selinux_key(key); char *context = NULL; unsigned len; int rc; @@@ -6735,7 -6757,7 +6735,7 @@@ #ifdef CONFIG_KEY_NOTIFICATIONS static int selinux_watch_key(struct key *key) { - struct key_security_struct *ksec = key->security; + struct key_security_struct *ksec = selinux_key(key); u32 sid = current_sid();
return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, KEY__VIEW, NULL); @@@ -6789,13 -6811,23 +6789,13 @@@ static int selinux_ib_endport_manage_su INFINIBAND_ENDPORT__MANAGE_SUBNET, &ad); }
-static int selinux_ib_alloc_security(void **ib_sec) +static int selinux_ib_alloc_security(void *ib_sec) { - struct ib_security_struct *sec; + struct ib_security_struct *sec = selinux_ib(ib_sec);
- sec = kzalloc(sizeof(*sec), GFP_KERNEL); - if (!sec) - return -ENOMEM; sec->sid = current_sid(); - - *ib_sec = sec; return 0; } - -static void selinux_ib_free_security(void *ib_sec) -{ - kfree(ib_sec); -} #endif
#ifdef CONFIG_BPF_SYSCALL @@@ -6933,7 -6965,7 +6933,7 @@@ static void selinux_bpf_prog_free(struc }
static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) { struct bpf_security_struct *bpfsec;
@@@ -6961,16 -6993,9 +6961,16 @@@ struct lsm_blob_sizes selinux_blob_size .lbs_file = sizeof(struct file_security_struct), .lbs_inode = sizeof(struct inode_security_struct), .lbs_ipc = sizeof(struct ipc_security_struct), + .lbs_key = sizeof(struct key_security_struct), .lbs_msg_msg = sizeof(struct msg_security_struct), +#ifdef CONFIG_PERF_EVENTS + .lbs_perf_event = sizeof(struct perf_event_security_struct), +#endif + .lbs_sock = sizeof(struct sk_security_struct), .lbs_superblock = sizeof(struct superblock_security_struct), .lbs_xattr_count = SELINUX_INODE_INIT_XATTRS, + .lbs_tun_dev = sizeof(struct tun_security_struct), + .lbs_ib = sizeof(struct ib_security_struct), };
#ifdef CONFIG_PERF_EVENTS @@@ -6997,12 -7022,24 +6997,12 @@@ static int selinux_perf_event_alloc(str { struct perf_event_security_struct *perfsec;
- perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL); - if (!perfsec) - return -ENOMEM; - + perfsec = selinux_perf_event(event->security); perfsec->sid = current_sid(); - event->security = perfsec;
return 0; }
-static void selinux_perf_event_free(struct perf_event *event) -{ - struct perf_event_security_struct *perfsec = event->security; - - event->security = NULL; - kfree(perfsec); -} - static int selinux_perf_event_read(struct perf_event *event) { struct perf_event_security_struct *perfsec = event->security; @@@ -7270,6 -7307,7 +7270,6 @@@ static struct security_hook_list selinu LSM_HOOK_INIT(secmark_refcount_inc, selinux_secmark_refcount_inc), LSM_HOOK_INIT(secmark_refcount_dec, selinux_secmark_refcount_dec), LSM_HOOK_INIT(req_classify_flow, selinux_req_classify_flow), - LSM_HOOK_INIT(tun_dev_free_security, selinux_tun_dev_free_security), LSM_HOOK_INIT(tun_dev_create, selinux_tun_dev_create), LSM_HOOK_INIT(tun_dev_attach_queue, selinux_tun_dev_attach_queue), LSM_HOOK_INIT(tun_dev_attach, selinux_tun_dev_attach), @@@ -7278,6 -7316,7 +7278,6 @@@ LSM_HOOK_INIT(ib_pkey_access, selinux_ib_pkey_access), LSM_HOOK_INIT(ib_endport_manage_subnet, selinux_ib_endport_manage_subnet), - LSM_HOOK_INIT(ib_free_security, selinux_ib_free_security), #endif #ifdef CONFIG_SECURITY_NETWORK_XFRM LSM_HOOK_INIT(xfrm_policy_free_security, selinux_xfrm_policy_free), @@@ -7291,6 -7330,7 +7291,6 @@@ #endif
#ifdef CONFIG_KEYS - LSM_HOOK_INIT(key_free, selinux_key_free), LSM_HOOK_INIT(key_permission, selinux_key_permission), LSM_HOOK_INIT(key_getsecurity, selinux_key_getsecurity), #ifdef CONFIG_KEY_NOTIFICATIONS @@@ -7315,6 -7355,7 +7315,6 @@@
#ifdef CONFIG_PERF_EVENTS LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open), - LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free), LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read), LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), #endif
linux-merge@lists.open-mesh.org