The following commit has been merged in the master branch: commit a49468240e89628236b738b5ab9416eae8f90c15 Merge: 8c06da67d0bd3139a97f301b4aa9c482b9d4f29e 2c9e5d4a008293407836d29d35dfd4353615bd2f Author: Linus Torvalds torvalds@linux-foundation.org Date: Wed May 15 14:05:08 2024 -0700
Merge tag 'modules-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux
Pull modules updates from Luis Chamberlain: "Finally something fun. Mike Rapoport does some cleanup to allow us to take out module_alloc() out of modules into a new paint shedded execmem_alloc() and execmem_free() so to make emphasis these helpers are actually used outside of modules.
It starts with a non-functional changes API rename / placeholders to then allow architectures to define their requirements into a new shiny struct execmem_info with ranges, and requirements for those ranges.
Archs now can intitialize this execmem_info as the last part of mm_core_init() if they have to diverge from the norm. Each range is a known type clearly articulated and spelled out in enum execmem_type.
Although a lot of this is major cleanup and prep work for future enhancements an immediate clear gain is we get to enable KPROBES without MODULES now. That is ultimately what motiviated to pick this work up again, now with smaller goal as concrete stepping stone"
* tag 'modules-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux: bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of kprobes: remove dependency on CONFIG_MODULES powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate x86/ftrace: enable dynamic ftrace without CONFIG_MODULES arch: make execmem setup available regardless of CONFIG_MODULES powerpc: extend execmem_params for kprobes allocations arm64: extend execmem_info for generated code allocations riscv: extend execmem_params for generated code allocations mm/execmem, arch: convert remaining overrides of module_alloc to execmem mm/execmem, arch: convert simple overrides of module_alloc to execmem mm: introduce execmem_alloc() and execmem_free() module: make module_memory_{alloc,free} more self-contained sparc: simplify module_alloc() nios2: define virtual address space for modules mips: module: rename MODULE_START to MODULES_VADDR arm64: module: remove unneeded call to kasan_alloc_module_shadow() kallsyms: replace deprecated strncpy with strscpy module: allow UNUSED_KSYMS_WHITELIST to be relative against objtree.
diff --combined arch/arm64/Kconfig index a04059c31abab,74b34a78b7ac1..2df0818c3ca96 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@@ -105,6 -105,7 +105,7 @@@ config ARM6 select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_EXECMEM_LATE if EXECMEM select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN @@@ -255,11 -256,9 +256,11 @@@ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK + select USER_STACKTRACE_SUPPORT help ARM 64-bit (AArch64) Linux support.
diff --combined arch/arm64/net/bpf_jit_comp.c index 1d88711467bb8,074b1d156223f..720336d288568 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@@ -29,7 -29,6 +29,7 @@@ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) +#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
#define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@@ -68,8 -67,6 +68,8 @@@ static const int bpf2a64[] = /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), [FP_BOTTOM] = A64_R(27), + /* callee saved register for kern_vm_start address */ + [ARENA_VM_START] = A64_R(28), };
struct jit_ctx { @@@ -82,7 -79,6 +82,7 @@@ __le32 *ro_image; u32 stack_size; int fpb_offset; + u64 user_vm_start; };
struct bpf_plt { @@@ -299,7 -295,7 +299,7 @@@ static bool is_lsi_offset(int offset, i #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, - bool is_exception_cb) + bool is_exception_cb, u64 arena_vm_start) { const struct bpf_prog *prog = ctx->prog; const bool is_main_prog = !bpf_is_subprog(prog); @@@ -310,7 -306,6 +310,7 @@@ const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tcc = bpf2a64[TCALL_CNT]; const u8 fpb = bpf2a64[FP_BOTTOM]; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const int idx0 = ctx->idx; int cur_offset;
@@@ -416,10 -411,6 +416,10 @@@
/* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + + if (arena_vm_start) + emit_a64_mov_i64(arena_vm_base, arena_vm_start, ctx); + return 0; }
@@@ -494,26 -485,20 +494,26 @@@ static int emit_bpf_tail_call(struct ji static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 dst = bpf2a64[insn->dst_reg]; const u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; const bool isdw = BPF_SIZE(code) == BPF_DW; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; const s16 off = insn->off; - u8 reg; + u8 reg = dst;
- if (!off) { - reg = dst; - } else { - emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_ADD(1, tmp, tmp, dst), ctx); - reg = tmp; + if (off || arena) { + if (off) { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } }
switch (insn->imm) { @@@ -582,12 -567,6 +582,12 @@@ static int emit_ll_sc_atomic(const stru u8 reg; s32 jmp_offset;
+ if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { + /* ll_sc based atomics don't support unsafe pointers yet. */ + pr_err_once("unknown atomic opcode %02x\n", code); + return -EINVAL; + } + if (!off) { reg = dst; } else { @@@ -759,7 -738,6 +759,7 @@@ static void build_epilogue(struct jit_c
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) @@@ -767,8 -745,7 +767,8 @@@ off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
- regs->regs[dst_reg] = 0; + if (dst_reg != DONT_CLEAR) + regs->regs[dst_reg] = 0; regs->pc = (unsigned long)&ex->fixup - offset; return true; } @@@ -788,9 -765,7 +788,9 @@@ static int add_exception_handler(const return 0;
if (BPF_MODE(insn->code) != BPF_PROBE_MEM && - BPF_MODE(insn->code) != BPF_PROBE_MEMSX) + BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32 && + BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) return 0;
if (!ctx->prog->aux->extable || @@@ -835,9 -810,6 +835,9 @@@
ex->insn = ins_offset;
+ if (BPF_CLASS(insn->code) != BPF_LDX) + dst_reg = DONT_CLEAR; + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
@@@ -857,13 -829,12 +857,13 @@@ static int build_insn(const struct bpf_ bool extra_pass) { const u8 code = insn->code; - const u8 dst = bpf2a64[insn->dst_reg]; - const u8 src = bpf2a64[insn->src_reg]; + u8 dst = bpf2a64[insn->dst_reg]; + u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fpb = bpf2a64[FP_BOTTOM]; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@@ -882,24 -853,6 +882,24 @@@ /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits + emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); + emit(A64_LSL(1, dst, dst, 32), ctx); + emit(A64_CBZ(1, tmp, 2), ctx); + emit(A64_ORR(1, tmp, dst, tmp), ctx); + emit(A64_MOV(1, dst, tmp), ctx); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (dst != src) + emit(A64_MOV(1, dst, src), ctx); + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + emit(A64_MRS_TPIDR_EL2(tmp), ctx); + else + emit(A64_MRS_TPIDR_EL1(tmp), ctx); + emit(A64_ADD(1, dst, dst, tmp), ctx); + break; + } switch (insn->off) { case 0: emit(A64_MOV(is64, dst, src), ctx); @@@ -1228,21 -1181,6 +1228,21 @@@ emit_cond_jmp const u8 r0 = bpf2a64[BPF_REG_0]; bool func_addr_fixed; u64 func_addr; + u32 cpu_offset; + + /* Implement helper call to bpf_get_smp_processor_id() inline */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + cpu_offset = offsetof(struct thread_info, cpu); + + emit(A64_MRS_SP_EL0(tmp), ctx); + if (is_lsi_offset(cpu_offset, 2)) { + emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); + } else { + emit_a64_mov_i(1, tmp2, cpu_offset, ctx); + emit(A64_LDR32(r0, tmp, tmp2), ctx); + } + break; + }
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); @@@ -1299,15 -1237,7 +1299,15 @@@ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: - if (ctx->fpb_offset > 0 && src == fp) { + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); + src = tmp2; + } + if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { src_adj = fpb; off_adj = off + ctx->fpb_offset; } else { @@@ -1392,15 -1322,7 +1392,15 @@@ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: - if (ctx->fpb_offset > 0 && dst == fp) { + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); + dst = tmp2; + } + if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { dst_adj = fpb; off_adj = off + ctx->fpb_offset; } else { @@@ -1443,10 -1365,6 +1443,10 @@@ } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break;
/* STX: *(size *)(dst + off) = src */ @@@ -1454,15 -1372,7 +1454,15 @@@ case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - if (ctx->fpb_offset > 0 && dst == fp) { + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); + dst = tmp2; + } + if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { dst_adj = fpb; off_adj = off + ctx->fpb_offset; } else { @@@ -1503,26 -1413,16 +1503,26 @@@ } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break;
case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) ret = emit_lse_atomic(insn, ctx); else ret = emit_ll_sc_atomic(insn, ctx); if (ret) return ret; + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break;
default: @@@ -1694,7 -1594,6 +1694,7 @@@ struct bpf_prog *bpf_int_jit_compile(st bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; + u64 arena_vm_start; u8 *image_ptr; u8 *ro_image_ptr;
@@@ -1712,7 -1611,6 +1712,7 @@@ prog = tmp; }
+ arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); @@@ -1743,7 -1641,6 +1743,7 @@@ }
ctx.fpb_offset = find_fpb_offset(prog); + ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
/* * 1. Initial fake pass to compute ctx->idx and ctx->offset. @@@ -1751,8 -1648,7 +1751,8 @@@ * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first. */ - if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) { + if (build_prologue(&ctx, was_classic, prog->aux->exception_cb, + arena_vm_start)) { prog = orig_prog; goto out_off; } @@@ -1800,7 -1696,7 +1800,7 @@@ skip_init_ctx ctx.idx = 0; ctx.exentry_idx = 0;
- build_prologue(&ctx, was_classic, prog->aux->exception_cb); + build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start);
if (build_body(&ctx, extra_pass)) { prog = orig_prog; @@@ -1897,17 -1793,6 +1897,6 @@@ u64 bpf_jit_alloc_exec_limit(void return VMALLOC_END - VMALLOC_START; }
- void *bpf_jit_alloc_exec(unsigned long size) - { - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(vmalloc(size)); - } - - void bpf_jit_free_exec(void *addr) - { - return vfree(addr); - } - /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { @@@ -2280,9 -2165,12 +2269,9 @@@ void arch_free_bpf_trampoline(void *ima bpf_prog_pack_free(image, size); }
-void arch_protect_bpf_trampoline(void *image, unsigned int size) -{ -} - -void arch_unprotect_bpf_trampoline(void *image, unsigned int size) +int arch_protect_bpf_trampoline(void *image, unsigned int size) { + return 0; }
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, @@@ -2565,39 -2453,6 +2554,39 @@@ bool bpf_jit_supports_exceptions(void return true; }
+bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + return false; + } + return true; +} + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + default: + return false; + } +} + void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { diff --combined arch/riscv/net/bpf_jit_core.c index 8a69d6d81e322,e238fdbd5dbc5..0a96abdaca65f --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@@ -80,8 -80,6 +80,8 @@@ struct bpf_prog *bpf_int_jit_compile(st goto skip_init_ctx; }
+ ctx->arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + ctx->user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx->prog = prog; ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); if (!ctx->offset) { @@@ -221,19 -219,6 +221,6 @@@ u64 bpf_jit_alloc_exec_limit(void return BPF_JIT_REGION_SIZE; }
- void *bpf_jit_alloc_exec(unsigned long size) - { - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - void bpf_jit_free_exec(void *addr) - { - return vfree(addr); - } - void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; diff --combined arch/x86/Kconfig index 9d16fee6bdb89,965d65edbae04..bc47bc9841ff8 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@ -34,6 -34,7 +34,7 @@@ config X86_6 select SWIOTLB select ARCH_HAS_ELFCORE_COMPAT select ZONE_DMA32 + select EXECMEM if DYNAMIC_FTRACE
config FORCE_DYNAMIC_FTRACE def_bool y @@@ -169,7 -170,6 +170,7 @@@ config X8 select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY select GENERIC_VDSO_TIME_NS + select GENERIC_VDSO_OVERFLOW_PROTECT select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 @@@ -466,17 -466,6 +467,17 @@@ config X86_X2API
If you don't know what to do here, say N.
+config X86_POSTED_MSI + bool "Enable MSI and MSI-x delivery by posted interrupts" + depends on X86_64 && IRQ_REMAP + help + This enables MSIs that are under interrupt remapping to be delivered as + posted interrupts to the host kernel. Interrupt throughput can + potentially be improved by coalescing CPU notifications during high + frequency bursts. + + If you don't know what to do here, say N. + config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --combined kernel/bpf/core.c index 733fca2634b78,892e50afda593..1a6c3faa6e4a1 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@@ -22,11 -22,9 +22,10 @@@ #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/random.h> - #include <linux/moduleloader.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/objtool.h> +#include <linux/overflow.h> #include <linux/rbtree_latch.h> #include <linux/kallsyms.h> #include <linux/rcupdate.h> @@@ -38,6 -36,7 +37,7 @@@ #include <linux/nospec.h> #include <linux/bpf_mem_alloc.h> #include <linux/memcontrol.h> + #include <linux/execmem.h>
#include <asm/barrier.h> #include <asm/unaligned.h> @@@ -748,7 -747,7 +748,7 @@@ const char *__bpf_address_lookup(unsign unsigned long symbol_start = ksym->start; unsigned long symbol_end = ksym->end;
- strncpy(sym, ksym->name, KSYM_NAME_LEN); + strscpy(sym, ksym->name, KSYM_NAME_LEN);
ret = sym; if (size) @@@ -814,7 -813,7 +814,7 @@@ int bpf_get_kallsym(unsigned int symnum if (it++ != symnum) continue;
- strncpy(sym, ksym->name, KSYM_NAME_LEN); + strscpy(sym, ksym->name, KSYM_NAME_LEN);
*value = ksym->start; *type = BPF_SYM_ELF_TYPE; @@@ -850,7 -849,7 +850,7 @@@ int bpf_jit_add_poke_descriptor(struct return -EINVAL; }
- tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL); + tab = krealloc_array(tab, size, sizeof(*poke), GFP_KERNEL); if (!tab) return -ENOMEM;
@@@ -909,30 -908,23 +909,30 @@@ static LIST_HEAD(pack_list) static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; + int err;
pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)), GFP_KERNEL); if (!pack) return NULL; pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE); - if (!pack->ptr) { - kfree(pack); - return NULL; - } + if (!pack->ptr) + goto out; bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); - list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr); - set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + err = set_memory_rox((unsigned long)pack->ptr, + BPF_PROG_PACK_SIZE / PAGE_SIZE); + if (err) + goto out; + list_add_tail(&pack->list, &pack_list); return pack; + +out: + bpf_jit_free_exec(pack->ptr); + kfree(pack); + return NULL; }
void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) @@@ -947,16 -939,9 +947,16 @@@ size = round_up(size, PAGE_SIZE); ptr = bpf_jit_alloc_exec(size); if (ptr) { + int err; + bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); + err = set_memory_rox((unsigned long)ptr, + size / PAGE_SIZE); + if (err) { + bpf_jit_free_exec(ptr); + ptr = NULL; + } } goto out; } @@@ -1065,12 -1050,12 +1065,12 @@@ void bpf_jit_uncharge_modmem(u32 size
void *__weak bpf_jit_alloc_exec(unsigned long size) { - return module_alloc(size); + return execmem_alloc(EXECMEM_BPF, size); }
void __weak bpf_jit_free_exec(void *addr) { - module_memfree(addr); + execmem_free(addr); }
struct bpf_binary_header * @@@ -2219,7 -2204,6 +2219,7 @@@ static unsigned int PROG_NAME(stack_siz u64 stack[stack_size / sizeof(u64)]; \ u64 regs[MAX_BPF_EXT_REG] = {}; \ \ + kmsan_unpoison_memory(stack, sizeof(stack)); \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ return ___bpf_prog_run(regs, insn); \ @@@ -2233,7 -2217,6 +2233,7 @@@ static u64 PROG_NAME_ARGS(stack_size)(u u64 stack[stack_size / sizeof(u64)]; \ u64 regs[MAX_BPF_EXT_REG]; \ \ + kmsan_unpoison_memory(stack, sizeof(stack)); \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ BPF_R1 = r1; \ BPF_R2 = r2; \ @@@ -2420,9 -2403,7 +2420,9 @@@ struct bpf_prog *bpf_prog_select_runtim }
finalize: - bpf_prog_lock_ro(fp); + *err = bpf_prog_lock_ro(fp); + if (*err) + return fp;
/* The tail call compatibility check can only be done at * this late stage as we need to determine, if we deal @@@ -2456,14 -2437,13 +2456,14 @@@ EXPORT_SYMBOL(bpf_empty_prog_array)
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) { + struct bpf_prog_array *p; + if (prog_cnt) - return kzalloc(sizeof(struct bpf_prog_array) + - sizeof(struct bpf_prog_array_item) * - (prog_cnt + 1), - flags); + p = kzalloc(struct_size(p, items, prog_cnt + 1), flags); + else + p = &bpf_empty_prog_array.hdr;
- return &bpf_empty_prog_array.hdr; + return p; }
void bpf_prog_array_free(struct bpf_prog_array *progs) @@@ -2816,7 -2796,7 +2816,7 @@@ void bpf_prog_free(struct bpf_prog *fp } EXPORT_SYMBOL_GPL(bpf_prog_free);
-/* RNG for unpriviledged user space with separated state from prandom_u32(). */ +/* RNG for unprivileged user space with separated state from prandom_u32(). */ static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
void bpf_user_rnd_init_once(void) @@@ -2941,28 -2921,12 +2941,28 @@@ bool __weak bpf_jit_needs_zext(void return false; }
+/* Return true if the JIT inlines the call to the helper corresponding to + * the imm. + * + * The verifier will not patch the insn->imm for the call to the helper if + * this returns true. + */ +bool __weak bpf_jit_inlines_helper_call(s32 imm) +{ + return false; +} + /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ bool __weak bpf_jit_supports_subprog_tailcalls(void) { return false; }
+bool __weak bpf_jit_supports_percpu_insn(void) +{ + return false; +} + bool __weak bpf_jit_supports_kfunc_call(void) { return false; @@@ -2978,11 -2942,6 +2978,11 @@@ bool __weak bpf_jit_supports_arena(void return false; }
+bool __weak bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + return false; +} + u64 __weak bpf_arch_uaddress_limit(void) { #if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)