The following commit has been merged in the master branch: commit 4276c819751a4d930359db6f72a1ab11bb646615 Merge: a5c208600eaf0e6b4cd5648e925418dd53e19bf9 e7b813b32a42a3a6281a4fd9ae7700a0257c1d50 Author: Stephen Rothwell sfr@canb.auug.org.au Date: Fri Nov 25 13:09:34 2022 +1100
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
diff --combined Documentation/admin-guide/kernel-parameters.txt index a6b45f721435,78493797460f..bcc6bf53797e --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@@ -703,17 -703,6 +703,17 @@@ condev= [HW,S390] console device conmode=
+ con3215_drop= [S390] 3215 console drop mode. + Format: y|n|Y|N|1|0 + When set to true, drop data on the 3215 console when + the console buffer is full. In this case the + operator using a 3270 terminal emulator (for example + x3270) does not have to enter the clear key for the + console output to advance and the kernel to continue. + This leads to a much faster boot time when a 3270 + terminal emulator is active. If no 3270 terminal + emulator is used, this parameter has no effect. + console= [KNL] Output console device and options.
tty<n> Use the virtual console device <n>. @@@ -842,7 -831,7 +842,7 @@@ memory region [offset, offset + size] for that kernel image. If '@offset' is omitted, then a suitable offset is selected automatically. - [KNL, X86-64] Select a region under 4G first, and + [KNL, X86-64, ARM64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. See Documentation/admin-guide/kdump/kdump.rst for further details. @@@ -862,23 -851,26 +862,23 @@@ available. It will be ignored if crashkernel=X is specified. crashkernel=size[KMG],low - [KNL, X86-64] range under 4G. When crashkernel=X,high + [KNL, X86-64, ARM64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate - at least 256M below 4G automatically. + default size of memory below 4G automatically. The default + size is platform dependent. + --> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB) + --> arm64: 128MiB This one lets the user specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G.
- [KNL, ARM64] range in low memory. - This one lets the user specify a low range in the - DMA zone for the crash dump kernel. - It will be ignored when crashkernel=X,high is not used - or memory reserved is located in the DMA zones. - cryptomgr.notests [KNL] Disable crypto self-tests
@@@ -892,10 -884,8 +892,10 @@@ handling. When switched on, additional debug data is printed to the console in case a hanging CPU is detected, and that CPU is pinged again in order to try - to resolve the hang situation. - 0: disable csdlock debugging (default) + to resolve the hang situation. The default value of + this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT + Kconfig option. + 0: disable csdlock debugging 1: enable basic csdlock debugging (minor impact) ext: enable extended csdlock debugging (more impact, but more data) @@@ -2310,13 -2300,7 +2310,13 @@@ Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map IOAPIC-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_ioapic=10@0001:00:14.0 + + Deprecated formats: * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 @@@ -2328,13 -2312,7 +2328,13 @@@ Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map HPET-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_hpet=10@0001:00:14.0 + + Deprecated formats: * To map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 @@@ -2345,20 -2323,15 +2345,20 @@@ ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted.
For example, to map UART-HID:UID AMD0020:0 to PCI segment 0x1 and PCI device ID 00:14.5, write the parameter as: - ivrs_acpihid[0001:00:14.5]=AMD0020:0 + ivrs_acpihid=AMD0020:0@0001:00:14.5
- By default, PCI segment is 0, and can be omitted. - For example, PCI device 00:14.5 write the parameter as: + Deprecated formats: + * To map UART-HID:UID AMD0020:0 to PCI segment is 0, + PCI device ID 00:14.5, write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 + * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and + PCI device ID 00:14.5, write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0
js= [HW,JOY] Analog joystick See Documentation/input/joydev/joystick.rst. @@@ -3804,15 -3777,12 +3804,15 @@@ shutdown the other cpus. Instead use the REBOOT_VECTOR irq.
- nomodeset Disable kernel modesetting. DRM drivers will not perform - display-mode changes or accelerated rendering. Only the - system framebuffer will be available for use if this was - set-up by the firmware or boot loader. + nomodeset Disable kernel modesetting. Most systems' firmware + sets up a display mode and provides framebuffer memory + for output. With nomodeset, DRM and fbdev drivers will + not load if they could possibly displace the pre- + initialized output. Only the system framebuffer will + be available for use. The respective drivers will not + perform display-mode changes or accelerated rendering.
- Useful as fallback, or for testing and debugging. + Useful as error fallback, or for testing and debugging.
nomodule Disable module load
@@@ -4596,17 -4566,15 +4596,15 @@@
ramdisk_start= [RAM] RAM disk image start address
- random.trust_cpu={on,off} - [KNL] Enable or disable trusting the use of the - CPU's random number generator (if available) to - fully seed the kernel's CRNG. Default is controlled - by CONFIG_RANDOM_TRUST_CPU. - - random.trust_bootloader={on,off} - [KNL] Enable or disable trusting the use of a - seed passed by the bootloader (if available) to - fully seed the kernel's CRNG. Default is controlled - by CONFIG_RANDOM_TRUST_BOOTLOADER. + random.trust_cpu=off + [KNL] Disable trusting the use of the CPU's + random number generator (if available) to + initialize the kernel's RNG. + + random.trust_bootloader=off + [KNL] Disable trusting the use of the a seed + passed by the bootloader (if available) to + initialize the kernel's RNG.
randomize_kstack_offset= [KNL] Enable or disable kernel stack offset @@@ -5112,12 -5080,6 +5110,12 @@@ rcupdate.rcu_cpu_stall_timeout to be used (after conversion from seconds to milliseconds).
+ rcupdate.rcu_cpu_stall_cputime= [KNL] + Provide statistics on the cputime and count of + interrupts and tasks during the sampling period. For + multiple continuous RCU stalls, all sampling periods + begin at half of the first RCU stall timeout. + rcupdate.rcu_expedited= [KNL] Use expedited grace-period primitives, for example, synchronize_rcu_expedited() instead @@@ -6293,25 -6255,6 +6291,25 @@@ See also Documentation/trace/ftrace.rst "trace options" section.
+ trace_trigger=[trigger-list] + [FTRACE] Add a event trigger on specific events. + Set a trigger on top of a specific event, with an optional + filter. + + The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..." + Where more than one trigger may be specified that are comma deliminated. + + For example: + + trace_trigger="sched_switch.stacktrace if prev_state == 2" + + The above will enable the "stacktrace" trigger on the "sched_switch" + event but only trigger it if the "prev_state" of the "sched_switch" + event is "2" (TASK_UNINTERUPTIBLE). + + See also "Event triggers" in Documentation/trace/events.rst + + traceoff_on_warning [FTRACE] enable this option to disable tracing when a warning is hit. This turns off "tracing_on". Tracing can @@@ -7014,14 -6957,3 +7012,14 @@@ memory, and other data can't be written using xmon commands. off xmon is disabled. + + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) diff --combined arch/loongarch/kernel/process.c index ddb8ba4eb399,77952d814bad..d61c9f465b95 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@@ -152,7 -152,7 +152,7 @@@ int copy_thread(struct task_struct *p, childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; }
/* user thread */ @@@ -171,15 -171,14 +171,15 @@@ */ childregs->csr_euen = 0;
+ if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
- if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; }
@@@ -294,7 -293,7 +294,7 @@@ unsigned long stack_top(void unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= prandom_u32_max(PAGE_SIZE); + sp -= get_random_u32_below(PAGE_SIZE);
return sp & STACK_ALIGN; } diff --combined arch/powerpc/kernel/process.c index e3e1feaa536a,fcf604370c66..77bfaa528911 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@@ -1359,7 -1359,7 +1359,7 @@@ static void show_instructions(struct pt unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- printk("Instruction dump:"); + printk("Code: ");
/* * If we were executing with the MMU off for instructions, adjust pc @@@ -1373,6 -1373,9 +1373,6 @@@ for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr;
- if (!(i % 8)) - pr_cont("\n"); - if (!__kernel_text_address(pc) || get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); @@@ -2300,6 -2303,6 +2300,6 @@@ void notrace __ppc64_runlatch_off(void unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= prandom_u32_max(PAGE_SIZE); + sp -= get_random_u32_below(PAGE_SIZE); return sp & ~0xf; } diff --combined arch/x86/entry/vdso/vma.c index 6976416b2c9f,d45c5fcfeac2..cbed790551db --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@@ -210,10 -210,11 +210,10 @@@ static vm_fault_t vvar_fault(const stru pgprot_decrypted(vma->vm_page_prot)); } } else if (sym_offset == image->sym_hvclock_page) { - struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); + pfn = hv_get_tsc_pfn();
- if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) - return vmf_insert_pfn(vma, vmf->address, - virt_to_phys(tsc_pg) >> PAGE_SHIFT); + if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) + return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_timens_page) { struct page *timens_page = find_timens_vvar_page(vma);
@@@ -326,7 -327,7 +326,7 @@@ static unsigned long vdso_addr(unsigne end -= len;
if (end > start) { - offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1); + offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1); addr = start + (offset << PAGE_SHIFT); } else { addr = start; diff --combined arch/x86/kernel/cpu/common.c index 73cc546e024d,3f66dd03c091..9cfca3d7d0e2 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@@ -22,9 -22,9 +22,9 @@@ #include <linux/io.h> #include <linux/syscore_ops.h> #include <linux/pgtable.h> + #include <linux/stackprotector.h>
#include <asm/cmdline.h> - #include <asm/stackprotector.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> #include <asm/doublefault.h> @@@ -52,7 -52,6 +52,7 @@@ #include <asm/cpu.h> #include <asm/mce.h> #include <asm/msr.h> +#include <asm/cacheinfo.h> #include <asm/memtype.h> #include <asm/microcode.h> #include <asm/microcode_intel.h> @@@ -610,7 -609,6 +610,7 @@@ static __always_inline void setup_cet(s
if (!ibt_selftest()) { pr_err("IBT selftest: Failed!\n"); + wrmsrl(MSR_IA32_S_CET, 0); setup_clear_cpu_cap(X86_FEATURE_IBT); return; } @@@ -703,6 -701,16 +703,6 @@@ static const char *table_lookup_model(s __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
-void load_percpu_segment(int cpu) -{ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); -#endif -} - #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); @@@ -730,45 -738,16 +730,45 @@@ void load_fixmap_gdt(int cpu } EXPORT_SYMBOL_GPL(load_fixmap_gdt);
-/* - * Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. +/** + * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base + * @cpu: The CPU number for which this is invoked + * + * Invoked during early boot to switch from early GDT and early per CPU to + * the direct GDT and the runtime per CPU area. On 32-bit the percpu base + * switch is implicit by loading the direct GDT. On 64bit this requires + * to update GSBASE. */ -void switch_to_new_gdt(int cpu) +void __init switch_gdt_and_percpu_base(int cpu) { - /* Load the original GDT */ load_direct_gdt(cpu); - /* Reload the per-cpu base */ - load_percpu_segment(cpu); + +#ifdef CONFIG_X86_64 + /* + * No need to load %gs. It is already correct. + * + * Writing %gs on 64bit would zero GSBASE which would make any per + * CPU operation up to the point of the wrmsrl() fault. + * + * Set GSBASE to the new offset. Until the wrmsrl() happens the + * early mapping is still valid. That means the GSBASE update will + * lose any prior per CPU data which was not copied over in + * setup_per_cpu_areas(). + * + * This works even with stackprotector enabled because the + * per CPU stack canary is 0 in both per CPU areas. + */ + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); +#else + /* + * %fs is already set to __KERNEL_PERCPU, but after switching GDT + * it is required to load FS again so that the 'hidden' part is + * updated from the new GDT. Up to this point the early per CPU + * translation is active. Any content of the early per CPU data + * which was not copied over in setup_per_cpu_areas() is lost. + */ + loadsegment(fs, __KERNEL_PERCPU); +#endif }
static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; @@@ -1969,6 -1948,7 +1969,6 @@@ void identify_secondary_cpu(struct cpui #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - mtrr_ap_init(); validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); @@@ -2013,18 -1993,27 +2013,18 @@@ static __init int setup_clearcpuid(cha } __setup("clearcpuid=", setup_clearcpuid);
+DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { + .current_task = &init_task, + .preempt_count = INIT_PREEMPT_COUNT, + .top_of_stack = TOP_OF_INIT_STACK, +}; +EXPORT_PER_CPU_SYMBOL(pcpu_hot); + #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __aligned(PAGE_SIZE) __visible; EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-/* - * The following percpu variables are hot. Align current_task to - * cacheline size such that they fall in the same cacheline. - */ -DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = - &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - -DEFINE_PER_CPU(void *, hardirq_stack_ptr); -DEFINE_PER_CPU(bool, hardirq_stack_inuse); - -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; - static void wrmsrl_cstar(unsigned long val) { /* @@@ -2075,6 -2064,20 +2075,6 @@@ void syscall_init(void
#else /* CONFIG_X86_64 */
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - -/* - * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find - * the top of the kernel stack. Use an extra percpu variable to track the - * top of the kernel stack directly. - */ -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); - #ifdef CONFIG_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, __stack_chk_guard); EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); @@@ -2245,6 -2248,12 +2245,6 @@@ void cpu_init(void boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - switch_to_new_gdt(cpu); - if (IS_ENABLED(CONFIG_X86_64)) { loadsegment(fs, 0); memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --combined arch/x86/kernel/module.c index 9f9626af2175,a98687642dd0..705fb2a41d7d --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@@ -53,7 -53,7 +53,7 @@@ static unsigned long int get_module_loa */ if (module_load_offset == 0) module_load_offset = - (prandom_u32_max(1024) + 1) * PAGE_SIZE; + get_random_u32_inclusive(1, 1024) * PAGE_SIZE; mutex_unlock(&module_kaslr_mutex); } return module_load_offset; @@@ -74,11 -74,10 +74,11 @@@ void *module_alloc(unsigned long size return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, - PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, - __builtin_return_address(0)); + MODULES_VADDR + get_module_load_offset(), + MODULES_END, gfp_mask, PAGE_KERNEL, + VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; @@@ -252,13 -251,14 +252,13 @@@ int module_finalize(const Elf_Ehdr *hdr const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + const Elf_Shdr *s, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, + *calls = NULL, *cfi = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".text", secstrings + s->sh_name)) - text = s; if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) @@@ -273,10 -273,6 +273,10 @@@ retpolines = s; if (!strcmp(".return_sites", secstrings + s->sh_name)) returns = s; + if (!strcmp(".call_sites", secstrings + s->sh_name)) + calls = s; + if (!strcmp(".cfi_sites", secstrings + s->sh_name)) + cfi = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@@ -289,22 -285,6 +289,22 @@@ void *pseg = (void *)para->sh_addr; apply_paravirt(pseg, pseg + para->sh_size); } + if (retpolines || cfi) { + void *rseg = NULL, *cseg = NULL; + unsigned int rsize = 0, csize = 0; + + if (retpolines) { + rseg = (void *)retpolines->sh_addr; + rsize = retpolines->sh_size; + } + + if (cfi) { + cseg = (void *)cfi->sh_addr; + csize = cfi->sh_size; + } + + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); @@@ -318,32 -298,16 +318,32 @@@ void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (calls || para) { + struct callthunk_sites cs = {}; + + if (calls) { + cs.call_start = (void *)calls->sh_addr; + cs.call_end = (void *)calls->sh_addr + calls->sh_size; + } + + if (para) { + cs.pv_start = (void *)para->sh_addr; + cs.pv_end = (void *)para->sh_addr + para->sh_size; + } + + callthunks_patch_module_calls(&cs, me); + } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); } - if (locks && text) { + if (locks) { void *lseg = (void *)locks->sh_addr; - void *tseg = (void *)text->sh_addr; + void *text = me->core_layout.base; + void *text_end = text + me->core_layout.text_size; alternatives_smp_module_add(me, me->name, lseg, lseg + locks->sh_size, - tseg, tseg + text->sh_size); + text, text_end); }
if (orc && orc_ip) diff --combined arch/x86/kernel/process.c index d1e83ba21130,62671ccf0404..26e8f57c75ad --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@@ -47,7 -47,6 +47,7 @@@ #include <asm/frame.h> #include <asm/unwind.h> #include <asm/tdx.h> +#include <asm/mmu_context.h>
#include "process.h"
@@@ -368,8 -367,6 +368,8 @@@ void arch_setup_new_exec(void task_clear_spec_ssb_noexec(current); speculation_ctrl_update(read_thread_flags()); } + + mm_reset_untag_mask(current->mm); }
#ifdef CONFIG_X86_IOPL_IOPERM @@@ -968,7 -965,7 +968,7 @@@ early_param("idle", idle_setup) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= prandom_u32_max(8192); + sp -= get_random_u32_below(8192); return sp & ~0xf; }
diff --combined arch/x86/kernel/setup_percpu.c index c2fc4c41c164,b26123c90b4f..c242dc47e9cb --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@@ -11,6 -11,7 +11,7 @@@ #include <linux/smp.h> #include <linux/topology.h> #include <linux/pfn.h> + #include <linux/stackprotector.h> #include <asm/sections.h> #include <asm/processor.h> #include <asm/desc.h> @@@ -21,8 -22,10 +22,7 @@@ #include <asm/proto.h> #include <asm/cpumask.h> #include <asm/cpu.h> - #include <asm/stackprotector.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else @@@ -169,7 -172,7 +169,7 @@@ void __init setup_per_cpu_areas(void for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(cpu_number, cpu) = cpu; + per_cpu(pcpu_hot.cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the @@@ -208,7 -211,7 +208,7 @@@ * area. Reload any changed state for the boot CPU. */ if (!cpu) - switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu); }
/* indicate the early static arrays will soon be gone */ diff --combined arch/x86/kernel/smpboot.c index 26937f28000b,5a742b6ec46d..55cad72715d9 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@@ -56,9 -56,9 +56,10 @@@ #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> + #include <linux/stackprotector.h>
#include <asm/acpi.h> +#include <asm/cacheinfo.h> #include <asm/desc.h> #include <asm/nmi.h> #include <asm/irq.h> @@@ -1047,7 -1047,7 +1048,7 @@@ int common_cpu_up(unsigned int cpu, str /* Just in case we booted with a single CPU. */ alternatives_enable_smp();
- per_cpu(current_task, cpu) = idle; + per_cpu(pcpu_hot.current_task, cpu) = idle; cpu_init_stack_canary(cpu, idle);
/* Initialize the interrupt stack(s) */ @@@ -1057,7 -1057,7 +1058,7 @@@
#ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); + per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif @@@ -1429,6 -1429,8 +1430,6 @@@ void __init native_smp_prepare_cpus(uns
uv_system_init();
- set_mtrr_aps_delayed_init(); - smp_quirk_init_udelay();
speculative_store_bypass_ht_init(); @@@ -1438,12 -1440,12 +1439,12 @@@
void arch_thaw_secondary_cpus_begin(void) { - set_mtrr_aps_delayed_init(); + set_cache_aps_delayed_init(true); }
void arch_thaw_secondary_cpus_end(void) { - mtrr_aps_init(); + cache_aps_init(); }
/* @@@ -1452,11 -1454,7 +1453,11 @@@ void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(me); + + /* SMP handles this from setup_per_cpu_areas() */ + if (!IS_ENABLED(CONFIG_SMP)) + switch_gdt_and_percpu_base(me); + /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); cpu_set_state_online(me); @@@ -1490,7 -1488,7 +1491,7 @@@ void __init native_smp_cpus_done(unsign
nmi_selftest(); impress_friends(); - mtrr_aps_init(); + cache_aps_init(); }
static int __initdata setup_possible_cpus = -1; diff --combined arch/x86/xen/enlighten_pv.c index ebb6958ebb05,745420853a7c..9e1ac6a281e4 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@@ -23,7 -23,6 +23,7 @@@ #include <linux/start_kernel.h> #include <linux/sched.h> #include <linux/kprobes.h> +#include <linux/kstrtox.h> #include <linux/memblock.h> #include <linux/export.h> #include <linux/mm.h> @@@ -33,6 -32,7 +33,7 @@@ #include <linux/edd.h> #include <linux/reboot.h> #include <linux/virtio_anchor.h> + #include <linux/stackprotector.h>
#include <xen/xen.h> #include <xen/events.h> @@@ -65,7 -65,6 +66,6 @@@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/reboot.h> - #include <asm/stackprotector.h> #include <asm/hypervisor.h> #include <asm/mach_traps.h> #include <asm/mwait.h> @@@ -114,7 -113,7 +114,7 @@@ static __read_mostly bool xen_msr_safe static int __init parse_xen_msr_safe(char *str) { if (str) - return strtobool(str, &xen_msr_safe); + return kstrtobool(str, &xen_msr_safe); return -EINVAL; } early_param("xen_msr_safe", parse_xen_msr_safe); @@@ -1210,7 -1209,7 +1210,7 @@@ static void __init xen_setup_gdt(int cp pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; pv_ops.cpu.load_gdt = xen_load_gdt_boot;
- switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu);
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; pv_ops.cpu.load_gdt = xen_load_gdt; diff --combined crypto/testmgr.c index e2806ef044fd,e669acd2ebdd..5492520cdabf --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@@ -855,9 -855,9 +855,9 @@@ static int prepare_keybuf(const u8 *key /* Generate a random length in range [0, max_len], but prefer smaller values */ static unsigned int generate_random_length(unsigned int max_len) { - unsigned int len = prandom_u32_max(max_len + 1); + unsigned int len = get_random_u32_below(max_len + 1);
- switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: return len % 64; case 1: @@@ -874,14 -874,14 +874,14 @@@ static void flip_random_bit(u8 *buf, si { size_t bitpos;
- bitpos = prandom_u32_max(size * 8); + bitpos = get_random_u32_below(size * 8); buf[bitpos / 8] ^= 1 << (bitpos % 8); }
/* Flip a random byte in the given nonempty data buffer */ static void flip_random_byte(u8 *buf, size_t size) { - buf[prandom_u32_max(size)] ^= 0xff; + buf[get_random_u32_below(size)] ^= 0xff; }
/* Sometimes make some random changes to the given nonempty data buffer */ @@@ -891,15 -891,15 +891,15 @@@ static void mutate_buffer(u8 *buf, size size_t i;
/* Sometimes flip some bits */ - if (prandom_u32_max(4) == 0) { - num_flips = min_t(size_t, 1 << prandom_u32_max(8), size * 8); + if (get_random_u32_below(4) == 0) { + num_flips = min_t(size_t, 1 << get_random_u32_below(8), size * 8); for (i = 0; i < num_flips; i++) flip_random_bit(buf, size); }
/* Sometimes flip some bytes */ - if (prandom_u32_max(4) == 0) { - num_flips = min_t(size_t, 1 << prandom_u32_max(8), size); + if (get_random_u32_below(4) == 0) { + num_flips = min_t(size_t, 1 << get_random_u32_below(8), size); for (i = 0; i < num_flips; i++) flip_random_byte(buf, size); } @@@ -915,11 -915,11 +915,11 @@@ static void generate_random_bytes(u8 *b if (count == 0) return;
- switch (prandom_u32_max(8)) { /* Choose a generation strategy */ + switch (get_random_u32_below(8)) { /* Choose a generation strategy */ case 0: case 1: /* All the same byte, plus optional mutations */ - switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: b = 0x00; break; @@@ -959,24 -959,24 +959,24 @@@ static char *generate_random_sgl_divisi unsigned int this_len; const char *flushtype_str;
- if (div == &divs[max_divs - 1] || prandom_u32_max(2) == 0) + if (div == &divs[max_divs - 1] || get_random_u32_below(2) == 0) this_len = remaining; else - this_len = 1 + prandom_u32_max(remaining); + this_len = get_random_u32_inclusive(1, remaining); div->proportion_of_total = this_len;
- if (prandom_u32_max(4) == 0) - div->offset = (PAGE_SIZE - 128) + prandom_u32_max(128); - else if (prandom_u32_max(2) == 0) - div->offset = prandom_u32_max(32); + if (get_random_u32_below(4) == 0) + div->offset = get_random_u32_inclusive(PAGE_SIZE - 128, PAGE_SIZE - 1); + else if (get_random_u32_below(2) == 0) + div->offset = get_random_u32_below(32); else - div->offset = prandom_u32_max(PAGE_SIZE); - if (prandom_u32_max(8) == 0) + div->offset = get_random_u32_below(PAGE_SIZE); + if (get_random_u32_below(8) == 0) div->offset_relative_to_alignmask = true;
div->flush_type = FLUSH_TYPE_NONE; if (gen_flushes) { - switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: div->flush_type = FLUSH_TYPE_REIMPORT; break; @@@ -988,7 -988,7 +988,7 @@@
if (div->flush_type != FLUSH_TYPE_NONE && !(req_flags & CRYPTO_TFM_REQ_MAY_SLEEP) && - prandom_u32_max(2) == 0) + get_random_u32_below(2) == 0) div->nosimd = true;
switch (div->flush_type) { @@@ -1035,7 -1035,7 +1035,7 @@@ static void generate_random_testvec_con
p += scnprintf(p, end - p, "random:");
- switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: case 1: cfg->inplace_mode = OUT_OF_PLACE; @@@ -1050,12 -1050,12 +1050,12 @@@ break; }
- if (prandom_u32_max(2) == 0) { + if (get_random_u32_below(2) == 0) { cfg->req_flags |= CRYPTO_TFM_REQ_MAY_SLEEP; p += scnprintf(p, end - p, " may_sleep"); }
- switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: cfg->finalization_type = FINALIZATION_TYPE_FINAL; p += scnprintf(p, end - p, " use_final"); @@@ -1071,7 -1071,7 +1071,7 @@@ }
if (!(cfg->req_flags & CRYPTO_TFM_REQ_MAY_SLEEP) && - prandom_u32_max(2) == 0) { + get_random_u32_below(2) == 0) { cfg->nosimd = true; p += scnprintf(p, end - p, " nosimd"); } @@@ -1084,7 -1084,7 +1084,7 @@@ cfg->req_flags); p += scnprintf(p, end - p, "]");
- if (cfg->inplace_mode == OUT_OF_PLACE && prandom_u32_max(2) == 0) { + if (cfg->inplace_mode == OUT_OF_PLACE && get_random_u32_below(2) == 0) { p += scnprintf(p, end - p, " dst_divs=["); p = generate_random_sgl_divisions(cfg->dst_divs, ARRAY_SIZE(cfg->dst_divs), @@@ -1093,13 -1093,13 +1093,13 @@@ p += scnprintf(p, end - p, "]"); }
- if (prandom_u32_max(2) == 0) { - cfg->iv_offset = 1 + prandom_u32_max(MAX_ALGAPI_ALIGNMASK); + if (get_random_u32_below(2) == 0) { + cfg->iv_offset = get_random_u32_inclusive(1, MAX_ALGAPI_ALIGNMASK); p += scnprintf(p, end - p, " iv_offset=%u", cfg->iv_offset); }
- if (prandom_u32_max(2) == 0) { - cfg->key_offset = 1 + prandom_u32_max(MAX_ALGAPI_ALIGNMASK); + if (get_random_u32_below(2) == 0) { + cfg->key_offset = get_random_u32_inclusive(1, MAX_ALGAPI_ALIGNMASK); p += scnprintf(p, end - p, " key_offset=%u", cfg->key_offset); }
@@@ -1652,8 -1652,8 +1652,8 @@@ static void generate_random_hash_testve vec->ksize = 0; if (maxkeysize) { vec->ksize = maxkeysize; - if (prandom_u32_max(4) == 0) - vec->ksize = 1 + prandom_u32_max(maxkeysize); + if (get_random_u32_below(4) == 0) + vec->ksize = get_random_u32_inclusive(1, maxkeysize); generate_random_bytes((u8 *)vec->key, vec->ksize);
vec->setkey_error = crypto_shash_setkey(desc->tfm, vec->key, @@@ -2218,13 -2218,13 +2218,13 @@@ static void mutate_aead_message(struct const unsigned int aad_tail_size = aad_iv ? ivsize : 0; const unsigned int authsize = vec->clen - vec->plen;
- if (prandom_u32_max(2) == 0 && vec->alen > aad_tail_size) { + if (get_random_u32_below(2) == 0 && vec->alen > aad_tail_size) { /* Mutate the AAD */ flip_random_bit((u8 *)vec->assoc, vec->alen - aad_tail_size); - if (prandom_u32_max(2) == 0) + if (get_random_u32_below(2) == 0) return; } - if (prandom_u32_max(2) == 0) { + if (get_random_u32_below(2) == 0) { /* Mutate auth tag (assuming it's at the end of ciphertext) */ flip_random_bit((u8 *)vec->ctext + vec->plen, authsize); } else { @@@ -2249,7 -2249,7 +2249,7 @@@ static void generate_aead_message(struc const unsigned int ivsize = crypto_aead_ivsize(tfm); const unsigned int authsize = vec->clen - vec->plen; const bool inauthentic = (authsize >= MIN_COLLISION_FREE_AUTHSIZE) && - (prefer_inauthentic || prandom_u32_max(4) == 0); + (prefer_inauthentic || get_random_u32_below(4) == 0);
/* Generate the AAD. */ generate_random_bytes((u8 *)vec->assoc, vec->alen); @@@ -2257,7 -2257,7 +2257,7 @@@ /* Avoid implementation-defined behavior. */ memcpy((u8 *)vec->assoc + vec->alen - ivsize, vec->iv, ivsize);
- if (inauthentic && prandom_u32_max(2) == 0) { + if (inauthentic && get_random_u32_below(2) == 0) { /* Generate a random ciphertext. */ generate_random_bytes((u8 *)vec->ctext, vec->clen); } else { @@@ -2321,8 -2321,8 +2321,8 @@@ static void generate_random_aead_testve
/* Key: length in [0, maxkeysize], but usually choose maxkeysize */ vec->klen = maxkeysize; - if (prandom_u32_max(4) == 0) - vec->klen = prandom_u32_max(maxkeysize + 1); + if (get_random_u32_below(4) == 0) + vec->klen = get_random_u32_below(maxkeysize + 1); generate_random_bytes((u8 *)vec->key, vec->klen); vec->setkey_error = crypto_aead_setkey(tfm, vec->key, vec->klen);
@@@ -2331,8 -2331,8 +2331,8 @@@
/* Tag length: in [0, maxauthsize], but usually choose maxauthsize */ authsize = maxauthsize; - if (prandom_u32_max(4) == 0) - authsize = prandom_u32_max(maxauthsize + 1); + if (get_random_u32_below(4) == 0) + authsize = get_random_u32_below(maxauthsize + 1); if (prefer_inauthentic && authsize < MIN_COLLISION_FREE_AUTHSIZE) authsize = MIN_COLLISION_FREE_AUTHSIZE; if (WARN_ON(authsize > maxdatasize)) @@@ -2342,7 -2342,7 +2342,7 @@@
/* AAD, plaintext, and ciphertext lengths */ total_len = generate_random_length(maxdatasize); - if (prandom_u32_max(4) == 0) + if (get_random_u32_below(4) == 0) vec->alen = 0; else vec->alen = generate_random_length(total_len); @@@ -2958,8 -2958,8 +2958,8 @@@ static void generate_random_cipher_test
/* Key: length in [0, maxkeysize], but usually choose maxkeysize */ vec->klen = maxkeysize; - if (prandom_u32_max(4) == 0) - vec->klen = prandom_u32_max(maxkeysize + 1); + if (get_random_u32_below(4) == 0) + vec->klen = get_random_u32_below(maxkeysize + 1); generate_random_bytes((u8 *)vec->key, vec->klen); vec->setkey_error = crypto_skcipher_setkey(tfm, vec->key, vec->klen);
@@@ -4712,12 -4712,6 +4712,12 @@@ static const struct alg_test_desc alg_t .alg = "cts(cbc(paes))", .test = alg_test_null, .fips_allowed = 1, + }, { + .alg = "cts(cbc(sm4))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_cts_tv_template) + } }, { .alg = "curve25519", .test = alg_test_kpp, @@@ -5592,12 -5586,6 +5592,12 @@@ .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xcbc(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_xcbc128_tv_template) + } }, { .alg = "xchacha12", .test = alg_test_skcipher, @@@ -5652,13 -5640,6 +5652,13 @@@ .suite = { .cipher = __VECS(serpent_xts_tv_template) } + }, { + .alg = "xts(sm4)", + .generic_driver = "xts(ecb(sm4-generic))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_xts_tv_template) + } }, { .alg = "xts(twofish)", .generic_driver = "xts(ecb(twofish-generic))", diff --combined drivers/block/drbd/drbd_receiver.c index e045fb55f3bf,3eccc6cd5004..3686c8e1d961 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@@ -1,4 -1,4 +1,4 @@@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_receiver.c
@@@ -781,7 -781,7 +781,7 @@@ static struct socket *drbd_wait_for_con
timeo = connect_int * HZ; /* 28.5% random jitter */ - timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7; + timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); if (err <= 0) @@@ -1004,7 -1004,7 +1004,7 @@@ retry drbd_warn(connection, "Error receiving initial packet\n"); sock_release(s); randomize: - if (prandom_u32_max(2)) + if (get_random_u32_below(2)) goto retry; } } @@@ -1603,19 -1603,9 +1603,19 @@@ static void drbd_issue_peer_discard_or_ drbd_endio_write_sec_final(peer_req); }
+static int peer_request_fault_type(struct drbd_peer_request *peer_req) +{ + if (peer_req_op(peer_req) == REQ_OP_READ) { + return peer_req->flags & EE_APPLICATION ? + DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD; + } else { + return peer_req->flags & EE_APPLICATION ? + DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR; + } +} + /** * drbd_submit_peer_request() - * @device: DRBD device. * @peer_req: peer request * * May spread the pages to multiple bios, @@@ -1629,9 -1619,10 +1629,9 @@@ * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_peer_request(struct drbd_device *device, - struct drbd_peer_request *peer_req, - const blk_opf_t opf, const int fault_type) +int drbd_submit_peer_request(struct drbd_peer_request *peer_req) { + struct drbd_device *device = peer_req->peer_device->device; struct bio *bios = NULL; struct bio *bio; struct page *page = peer_req->pages; @@@ -1676,18 -1667,7 +1676,18 @@@ * generated bio, but a bio allocated on behalf of the peer. */ next_bio: - bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO); + /* _DISCARD, _WRITE_ZEROES handled above. + * REQ_OP_FLUSH (empty flush) not expected, + * should have been mapped to a "drbd protocol barrier". + * REQ_OP_SECURE_ERASE: I don't see how we could ever support that. + */ + if (!(peer_req_op(peer_req) == REQ_OP_WRITE || + peer_req_op(peer_req) == REQ_OP_READ)) { + drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf); + return -EINVAL; + } + + bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO); /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; bio->bi_private = peer_req; @@@ -1717,7 -1697,7 +1717,7 @@@ bios = bios->bi_next; bio->bi_next = NULL;
- drbd_submit_bio_noacct(device, fault_type, bio); + drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio); } while (bios); return 0; } @@@ -2071,7 -2051,6 +2071,7 @@@ static int recv_resync_read(struct drbd * respective _drbd_clear_done_ee */
peer_req->w.cb = e_end_resync_block; + peer_req->opf = REQ_OP_WRITE; peer_req->submit_jif = jiffies;
spin_lock_irq(&device->resource->req_lock); @@@ -2079,7 -2058,8 +2079,7 @@@ spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, - DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0;
/* don't care for the reason here */ @@@ -2395,6 -2375,16 +2395,6 @@@ static int wait_for_and_update_peer_seq return ret; }
-/* see also bio_flags_to_wire() - * DRBD_REQ_*, because we need to semantically map the flags to data packet - * flags and back. We may replicate to other kernel versions. */ -static blk_opf_t wire_flags_to_bio_flags(u32 dpf) -{ - return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | - (dpf & DP_FUA ? REQ_FUA : 0) | - (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); -} - static enum req_op wire_flags_to_bio_op(u32 dpf) { if (dpf & DP_ZEROES) @@@ -2405,15 -2395,6 +2405,15 @@@ return REQ_OP_WRITE; }
+/* see also bio_flags_to_wire() */ +static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) +{ + return wire_flags_to_bio_op(dpf) | + (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | + (dpf & DP_FUA ? REQ_FUA : 0) | + (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); +} + static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { @@@ -2557,6 -2538,8 +2557,6 @@@ static int receive_Data(struct drbd_con struct drbd_peer_request *peer_req; struct p_data *p = pi->data; u32 peer_seq = be32_to_cpu(p->seq_num); - enum req_op op; - blk_opf_t op_flags; u32 dp_flags; int err, tp;
@@@ -2595,10 -2578,11 +2595,10 @@@ peer_req->flags |= EE_APPLICATION;
dp_flags = be32_to_cpu(p->dp_flags); - op = wire_flags_to_bio_op(dp_flags); - op_flags = wire_flags_to_bio_flags(dp_flags); + peer_req->opf = wire_flags_to_bio(connection, dp_flags); if (pi->cmd == P_TRIM) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_DISCARD); + D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD); D_ASSERT(peer_device, peer_req->pages == NULL); /* need to play safe: an older DRBD sender * may mean zero-out while sending P_TRIM. */ @@@ -2606,7 -2590,7 +2606,7 @@@ peer_req->flags |= EE_ZEROOUT; } else if (pi->cmd == P_ZEROES) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); + D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES); D_ASSERT(peer_device, peer_req->pages == NULL); /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ if (dp_flags & DP_DISCARD) @@@ -2693,7 -2677,8 +2693,7 @@@ peer_req->flags |= EE_CALL_AL_COMPLETE_IO; }
- err = drbd_submit_peer_request(device, peer_req, op | op_flags, - DRBD_FAULT_DT_WR); + err = drbd_submit_peer_request(peer_req); if (!err) return 0;
@@@ -2804,6 -2789,7 +2804,6 @@@ static int receive_DataRequest(struct d struct drbd_peer_request *peer_req; struct digest_info *di = NULL; int size, verb; - unsigned int fault_type; struct p_block_req *p = pi->data;
peer_device = conn_peer_device(connection, pi->vnr); @@@ -2863,11 -2849,11 +2863,11 @@@ put_ldev(device); return -ENOMEM; } + peer_req->opf = REQ_OP_READ;
switch (pi->cmd) { case P_DATA_REQUEST: peer_req->w.cb = w_e_end_data_req; - fault_type = DRBD_FAULT_DT_RD; /* application IO, don't drbd_rs_begin_io */ peer_req->flags |= EE_APPLICATION; goto submit; @@@ -2881,12 -2867,14 +2881,12 @@@ fallthrough; case P_RS_DATA_REQUEST: peer_req->w.cb = w_e_end_rsdata_req; - fault_type = DRBD_FAULT_RS_RD; /* used in the sector offset progress display */ device->bm_resync_fo = BM_SECT_TO_BIT(sector); break;
case P_OV_REPLY: case P_CSUM_RS_REQUEST: - fault_type = DRBD_FAULT_RS_RD; di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); if (!di) goto out_free_e; @@@ -2935,6 -2923,7 +2935,6 @@@ (unsigned long long)sector); } peer_req->w.cb = w_e_end_ov_req; - fault_type = DRBD_FAULT_RS_RD; break;
default: @@@ -2986,7 -2975,8 +2986,7 @@@ submit_for_resync submit: update_receiver_timing_details(connection, drbd_submit_peer_request); inc_unacked(device); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, - fault_type) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0;
/* don't care for the reason here */ @@@ -4957,6 -4947,7 +4957,6 @@@ static int receive_rs_deallocated(struc
if (get_ldev(device)) { struct drbd_peer_request *peer_req; - const enum req_op op = REQ_OP_WRITE_ZEROES;
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, size, 0, GFP_NOIO); @@@ -4966,7 -4957,6 +4966,7 @@@ }
peer_req->w.cb = e_end_resync_block; + peer_req->opf = REQ_OP_DISCARD; peer_req->submit_jif = jiffies; peer_req->flags |= EE_TRIM;
@@@ -4975,7 -4965,8 +4975,7 @@@ spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev); - err = drbd_submit_peer_request(device, peer_req, op, - DRBD_FAULT_RS_WR); + err = drbd_submit_peer_request(peer_req);
if (err) { spin_lock_irq(&device->resource->req_lock); diff --combined drivers/char/hw_random/core.c index afde685f5e0a,63a0a8e4505d..f34d356fe2c0 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@@ -41,14 -41,14 +41,14 @@@ static DEFINE_MUTEX(reading_mutex) static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; -static unsigned short default_quality; /* = 0; default to "off" */ +static unsigned short default_quality = 1024; /* default to maximum */
module_param(current_quality, ushort, 0644); MODULE_PARM_DESC(current_quality, "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead"); module_param(default_quality, ushort, 0644); MODULE_PARM_DESC(default_quality, - "default entropy content of hwrng per 1024 bits of input"); + "default maximum entropy content of hwrng per 1024 bits of input");
static void drop_current_rng(void); static int hwrng_init(struct hwrng *rng); @@@ -69,8 -69,10 +69,10 @@@ static void add_early_randomness(struc mutex_lock(&reading_mutex); bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0); mutex_unlock(&reading_mutex); - if (bytes_read > 0) - add_device_randomness(rng_fillbuf, bytes_read); + if (bytes_read > 0) { + size_t entropy = bytes_read * 8 * rng->quality / 1024; + add_hwgenerator_randomness(rng_fillbuf, bytes_read, entropy, false); + } }
static inline void cleanup_rng(struct kref *kref) @@@ -170,7 -172,10 +172,7 @@@ static int hwrng_init(struct hwrng *rng reinit_completion(&rng->cleanup_done);
skip_init: - if (!rng->quality) - rng->quality = default_quality; - if (rng->quality > 1024) - rng->quality = 1024; + rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024); current_quality = rng->quality; /* obsolete */
return 0; @@@ -525,7 -530,7 +527,7 @@@ static int hwrng_fillfn(void *unused
/* Outside lock, sure, but y'know: randomness. */ add_hwgenerator_randomness((void *)rng_fillbuf, rc, - entropy >> 10); + entropy >> 10, true); } hwrng_fill = NULL; return 0; diff --combined drivers/firmware/efi/efi.c index 297bf3fd9acc,16dae588f0e3..31a4090c66b3 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@@ -58,8 -58,6 +58,8 @@@ static unsigned long __initdata mem_res static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR; static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR;
+extern unsigned long screen_info_table; + struct mm_struct efi_mm = { .mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock), .mm_users = ATOMIC_INIT(2), @@@ -339,6 -337,24 +339,24 @@@ static void __init efi_debugfs_init(voi static inline void efi_debugfs_init(void) {} #endif
+ static void refresh_nv_rng_seed(struct work_struct *work) + { + u8 seed[EFI_RANDOM_SEED_SIZE]; + + get_random_bytes(seed, sizeof(seed)); + efi.set_variable(L"RandomSeed", &LINUX_EFI_RANDOM_SEED_TABLE_GUID, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, sizeof(seed), seed); + memzero_explicit(seed, sizeof(seed)); + } + static int refresh_nv_rng_seed_notification(struct notifier_block *nb, unsigned long action, void *data) + { + static DECLARE_WORK(work, refresh_nv_rng_seed); + schedule_work(&work); + return NOTIFY_DONE; + } + static struct notifier_block refresh_nv_rng_seed_nb = { .notifier_call = refresh_nv_rng_seed_notification }; + /* * We register the efi subsystem with the firmware subsystem and the * efivars subsystem with the efi subsystem, if the system was booted with @@@ -396,6 -412,10 +414,6 @@@ static int __init efisubsys_init(void goto err_unregister; }
- error = efi_runtime_map_init(efi_kobj); - if (error) - goto err_remove_group; - /* and the standard mountpoint for efivarfs */ error = sysfs_create_mount_point(efi_kobj, "efivars"); if (error) { @@@ -411,6 -431,7 +429,7 @@@ platform_device_register_simple("efi_secret", 0, NULL, 0); #endif
+ execute_with_initialized_rng(&refresh_nv_rng_seed_nb); return 0;
err_remove_group: @@@ -421,7 -442,6 +440,7 @@@ err_unregister generic_ops_unregister(); err_put: kobject_put(efi_kobj); + efi_kobj = NULL; destroy_workqueue(efi_rts_wq); return error; } @@@ -545,9 -565,6 +564,9 @@@ static const efi_config_table_type_t co #endif #ifdef CONFIG_EFI_COCO_SECRET {LINUX_EFI_COCO_SECRET_AREA_GUID, &efi.coco_secret, "CocoSecret" }, +#endif +#ifdef CONFIG_EFI_GENERIC_STUB + {LINUX_EFI_SCREEN_INFO_TABLE_GUID, &screen_info_table }, #endif {}, }; @@@ -613,7 -630,7 +632,7 @@@ int __init efi_config_parse_tables(cons
seed = early_memremap(efi_rng_seed, sizeof(*seed)); if (seed != NULL) { - size = min(seed->size, EFI_RANDOM_SEED_SIZE); + size = min_t(u32, seed->size, SZ_1K); // sanity check early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); @@@ -622,8 -639,8 +641,8 @@@ seed = early_memremap(efi_rng_seed, sizeof(*seed) + size); if (seed != NULL) { - pr_notice("seeding entropy pool\n"); add_bootloader_randomness(seed->bits, size); + memzero_explicit(seed->bits, size); early_memunmap(seed, sizeof(*seed) + size); } else { pr_err("Could not map UEFI random seed!\n"); diff --combined drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 29e9e8d5b6fe,29d2459bcc90..da09767fda07 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@@ -30,7 -30,6 +30,7 @@@ #include "i915_gem_context.h" #include "i915_gem_evict.h" #include "i915_gem_ioctls.h" +#include "i915_reg.h" #include "i915_trace.h" #include "i915_user_extensions.h"
@@@ -54,13 -53,13 +54,13 @@@ enum #define DBG_FORCE_RELOC 0 /* choose one of the above! */ };
-/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_USERPTR_INIT BIT(28) -#define __EXEC_OBJECT_NEEDS_MAP BIT(27) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(26) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ +/* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */ +#define __EXEC_OBJECT_HAS_PIN BIT(29) +#define __EXEC_OBJECT_HAS_FENCE BIT(28) +#define __EXEC_OBJECT_USERPTR_INIT BIT(27) +#define __EXEC_OBJECT_NEEDS_MAP BIT(26) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(25) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31) @@@ -2102,8 -2101,7 +2102,8 @@@ static int eb_move_to_gpu(struct i915_e eb->composite_fence ? eb->composite_fence : &eb->requests[j]->fence, - flags | __EXEC_OBJECT_NO_RESERVE); + flags | __EXEC_OBJECT_NO_RESERVE | + __EXEC_OBJECT_NO_REQUEST_AWAIT); } }
@@@ -2150,8 -2148,7 +2150,8 @@@ err_skip return err; }
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static int i915_gem_check_execbuffer(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) return -EINVAL; @@@ -2164,7 -2161,7 +2164,7 @@@ }
if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n"); exec->DR4 = 0; } if (exec->DR1 || exec->DR4) @@@ -2427,7 -2424,7 +2427,7 @@@ gen8_dispatch_bsd_engine(struct drm_i91 /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - prandom_u32_max(num_vcs_engines(dev_priv)); + get_random_u32_below(num_vcs_engines(dev_priv));
return file_priv->bsd_engine; } @@@ -2802,8 -2799,7 +2802,8 @@@ add_timeline_fence_array(struct i915_ex
syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); + drm_dbg(&eb->i915->drm, + "Invalid syncobj handle provided\n"); return -ENOENT; }
@@@ -2811,8 -2807,7 +2811,8 @@@
if (!fence && user_fence.flags && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { - DRM_DEBUG("Syncobj handle has no fence\n"); + drm_dbg(&eb->i915->drm, + "Syncobj handle has no fence\n"); drm_syncobj_put(syncobj); return -EINVAL; } @@@ -2821,9 -2816,7 +2821,9 @@@ err = dma_fence_chain_find_seqno(&fence, point);
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { - DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + drm_dbg(&eb->i915->drm, + "Syncobj handle missing requested point %llu\n", + point); dma_fence_put(fence); drm_syncobj_put(syncobj); return err; @@@ -2849,8 -2842,7 +2849,8 @@@ * 0) would break the timeline. */ if (user_fence.flags & I915_EXEC_FENCE_WAIT) { - DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); + drm_dbg(&eb->i915->drm, + "Trying to wait & signal the same timeline point.\n"); dma_fence_put(fence); drm_syncobj_put(syncobj); return -EINVAL; @@@ -2921,16 -2913,14 +2921,16 @@@ static int add_fence_array(struct i915_
syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); + drm_dbg(&eb->i915->drm, + "Invalid syncobj handle provided\n"); return -ENOENT; }
if (user_fence.flags & I915_EXEC_FENCE_WAIT) { fence = drm_syncobj_fence_get(syncobj); if (!fence) { - DRM_DEBUG("Syncobj handle has no fence\n"); + drm_dbg(&eb->i915->drm, + "Syncobj handle has no fence\n"); drm_syncobj_put(syncobj); return -EINVAL; } @@@ -2964,6 -2954,11 +2964,6 @@@ await_fence_array(struct i915_execbuffe int err;
for (n = 0; n < eb->num_fences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - if (!eb->fences[n].dma_fence) continue;
@@@ -3525,7 -3520,7 +3525,7 @@@ i915_gem_execbuffer2_ioctl(struct drm_d return -EINVAL; }
- err = i915_gem_check_execbuffer(args); + err = i915_gem_check_execbuffer(i915, args); if (err) return err;
diff --combined drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 49a8f10d76c7,45b605e32c87..2daffa7c7dfd --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@@ -110,7 -110,6 +110,7 @@@ #include <linux/string_helpers.h>
#include "i915_drv.h" +#include "i915_reg.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "gen8_engine_cs.h" @@@ -3472,9 -3471,9 +3472,9 @@@ logical_ring_default_vfuncs(struct inte
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen125_emit_bb_start; + engine->emit_bb_start = xehp_emit_bb_start; else - engine->emit_bb_start = gen125_emit_bb_start_noarb; + engine->emit_bb_start = xehp_emit_bb_start_noarb; } else { if (intel_engine_has_preemption(engine)) engine->emit_bb_start = gen8_emit_bb_start; @@@ -3690,7 -3689,7 +3690,7 @@@ static void virtual_engine_initial_hint * NB This does not force us to execute on this engine, it will just * typically be the first we inspect for submission. */ - swp = prandom_u32_max(ve->num_siblings); + swp = get_random_u32_below(ve->num_siblings); if (swp) swap(ve->siblings[swp], ve->siblings[0]); } @@@ -3922,7 -3921,6 +3922,7 @@@ static struct intel_context execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, unsigned long flags) { + struct drm_i915_private *i915 = siblings[0]->i915; struct virtual_engine *ve; unsigned int n; int err; @@@ -3931,7 -3929,7 +3931,7 @@@ if (!ve) return ERR_PTR(-ENOMEM);
- ve->base.i915 = siblings[0]->i915; + ve->base.i915 = i915; ve->base.gt = siblings[0]->gt; ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; @@@ -3990,9 -3988,8 +3990,9 @@@
GEM_BUG_ON(!is_power_of_2(sibling->mask)); if (sibling->mask & ve->base.mask) { - DRM_DEBUG("duplicate %s entry in load balancer\n", - sibling->name); + drm_dbg(&i915->drm, + "duplicate %s entry in load balancer\n", + sibling->name); err = -EINVAL; goto err_put; } @@@ -4026,9 -4023,8 +4026,9 @@@ */ if (ve->base.class != OTHER_CLASS) { if (ve->base.class != sibling->class) { - DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", - sibling->class, ve->base.class); + drm_dbg(&i915->drm, + "invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); err = -EINVAL; goto err_put; } diff --combined drivers/infiniband/ulp/rtrs/rtrs-clt.c index be7c8480f947,ab75b690ad08..8e7c51712bd0 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@@ -1064,8 -1064,10 +1064,8 @@@ static int rtrs_map_sg_fr(struct rtrs_c
/* Align the MR to a 4K page size to match the block virt boundary */ nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); - if (nr < 0) - return nr; - if (nr < req->sg_cnt) - return -EINVAL; + if (nr != count) + return nr < 0 ? nr : -EINVAL; ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
return nr; @@@ -1515,7 -1517,7 +1515,7 @@@ static void rtrs_clt_err_recovery_work( rtrs_clt_stop_and_destroy_conns(clt_path); queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, msecs_to_jiffies(delay_ms + - prandom_u32_max(RTRS_RECONNECT_SEED))); + get_random_u32_below(RTRS_RECONNECT_SEED))); }
static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, diff --combined drivers/mmc/core/core.c index d87b8a28f56a,a1efda85c6f2..6853f67952c6 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@@ -56,7 -56,7 +56,7 @@@ static const unsigned freqs[] = { 40000 /* * Enabling software CRCs on the data blocks can be a significant (30%) * performance cost, and for other reasons may not always be desired. - * So we allow it it to be disabled. + * So we allow it to be disabled. */ bool use_spi_crc = 1; module_param(use_spi_crc, bool, 0); @@@ -97,8 -97,8 +97,8 @@@ static void mmc_should_fail_request(str !should_fail(&host->fail_mmc_request, data->blksz * data->blocks)) return;
- data->error = data_errors[prandom_u32_max(ARRAY_SIZE(data_errors))]; - data->bytes_xfered = prandom_u32_max(data->bytes_xfered >> 9) << 9; + data->error = data_errors[get_random_u32_below(ARRAY_SIZE(data_errors))]; + data->bytes_xfered = get_random_u32_below(data->bytes_xfered >> 9) << 9; }
#else /* CONFIG_FAIL_MMC_REQUEST */ @@@ -527,7 -527,7 +527,7 @@@ EXPORT_SYMBOL(mmc_cqe_post_req) * mmc_cqe_recovery - Recover from CQE errors. * @host: MMC host to recover * - * Recovery consists of stopping CQE, stopping eMMC, discarding the queue in + * Recovery consists of stopping CQE, stopping eMMC, discarding the queue * in eMMC, and discarding the queue in CQE. CQE must call * mmc_cqe_request_done() on all requests. An error is returned if the eMMC * fails to discard its queue. @@@ -1134,13 -1134,7 +1134,13 @@@ u32 mmc_select_voltage(struct mmc_host mmc_power_cycle(host, ocr); } else { bit = fls(ocr) - 1; - ocr &= 3 << bit; + /* + * The bit variable represents the highest voltage bit set in + * the OCR register. + * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V), + * we must shift the mask '3' with (bit - 1). + */ + ocr &= 3 << (bit - 1); if (bit != host->ios.vdd) dev_warn(mmc_dev(host), "exceeding card's volts\n"); } @@@ -1484,11 -1478,6 +1484,11 @@@ void mmc_init_erase(struct mmc_card *ca card->pref_erase = 0; }
+static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@@ -1771,7 -1760,7 +1771,7 @@@ int mmc_erase(struct mmc_card *card, un !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP;
- if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP;
@@@ -1801,7 -1790,7 +1801,7 @@@ * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --combined drivers/mmc/host/dw_mmc.c index d35607128322,6ef410053037..e4d09c439051 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@@ -1363,7 -1363,7 +1363,7 @@@ static void __dw_mci_start_request(stru * is just about to roll over. * * We do this whole thing under spinlock and only if the - * command hasn't already completed (indicating the the irq + * command hasn't already completed (indicating the irq * already ran so we don't want the timeout). */ spin_lock_irqsave(&host->irq_lock, irqflags); @@@ -1858,7 -1858,7 +1858,7 @@@ static void dw_mci_start_fault_timer(st * Try to inject the error at random points during the data transfer. */ hrtimer_start(&host->fault_timer, - ms_to_ktime(prandom_u32_max(25)), + ms_to_ktime(get_random_u32_below(25)), HRTIMER_MODE_REL); }
diff --combined drivers/net/ethernet/broadcom/cnic.c index ad74b488a80a,74bc053a2078..7926aaef8f0c --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@@ -1027,14 -1027,16 +1027,14 @@@ static int __cnic_alloc_uio_rings(struc
udev->l2_ring_size = pages * CNIC_PAGE_SIZE; udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size, - &udev->l2_ring_map, - GFP_KERNEL | __GFP_COMP); + &udev->l2_ring_map, GFP_KERNEL); if (!udev->l2_ring) return -ENOMEM;
udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; udev->l2_buf_size = CNIC_PAGE_ALIGN(udev->l2_buf_size); udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size, - &udev->l2_buf_map, - GFP_KERNEL | __GFP_COMP); + &udev->l2_buf_map, GFP_KERNEL); if (!udev->l2_buf) { __cnic_free_uio_rings(udev); return -ENOMEM; @@@ -4103,7 -4105,7 +4103,7 @@@ static int cnic_cm_alloc_mem(struct cni for (i = 0; i < MAX_CM_SK_TBL_SZ; i++) atomic_set(&cp->csk_tbl[i].ref_count, 0);
- port_id = prandom_u32_max(CNIC_LOCAL_PORT_RANGE); + port_id = get_random_u32_below(CNIC_LOCAL_PORT_RANGE); if (cnic_init_id_tbl(&cp->csk_port_tbl, CNIC_LOCAL_PORT_RANGE, CNIC_LOCAL_PORT_MIN, port_id)) { cnic_cm_free_mem(dev); diff --combined drivers/net/phy/at803x.c index d49965907561,b07513c61c35..22f4458274aa --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@@ -870,10 -870,8 +870,10 @@@ static int at803x_probe(struct phy_devi .wolopts = 0, };
- if (ccr < 0) + if (ccr < 0) { + ret = ccr; goto err; + } mode_cfg = ccr & AT803X_MODE_CFG_MASK;
switch (mode_cfg) { @@@ -1760,7 -1758,7 +1760,7 @@@ static int qca808x_phy_fast_retrain_con
static int qca808x_phy_ms_random_seed_set(struct phy_device *phydev) { - u16 seed_value = prandom_u32_max(QCA808X_MASTER_SLAVE_SEED_RANGE); + u16 seed_value = get_random_u32_below(QCA808X_MASTER_SLAVE_SEED_RANGE);
return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED, QCA808X_MASTER_SLAVE_SEED_CFG, diff --combined drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index af8843507f3d,c704ca752138..e975d10e6009 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@@ -1128,7 -1128,7 +1128,7 @@@ static void brcmf_p2p_afx_handler(struc if (afx_hdl->is_listen && afx_hdl->my_listen_chan) /* 100ms ~ 300ms */ err = brcmf_p2p_discover_listen(p2p, afx_hdl->my_listen_chan, - 100 * (1 + prandom_u32_max(3))); + 100 * get_random_u32_inclusive(1, 3)); else err = brcmf_p2p_act_frm_search(p2p, afx_hdl->peer_listen_chan);
@@@ -2424,12 -2424,8 +2424,12 @@@ int brcmf_p2p_del_vif(struct wiphy *wip brcmf_remove_interface(vif->ifp, true);
brcmf_cfg80211_arm_vif_event(cfg, NULL); - if (iftype != NL80211_IFTYPE_P2P_DEVICE) - p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL; + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { + if (vif == p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif) + p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL; + if (vif == p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION2].vif) + p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION2].vif = NULL; + }
return err; } diff --combined drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 83abfe996138,3a7a44bb3c60..aa791dbc3066 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@@ -788,40 -788,14 +788,40 @@@ static u32 iwl_mvm_find_ie_offset(u8 *b return ie - beacon; }
-u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct ieee80211_tx_info *info, - struct ieee80211_vif *vif) +static u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm, + struct ieee80211_tx_info *info, + struct ieee80211_vif *vif) { + struct ieee80211_supported_band *sband; + unsigned long basic = vif->bss_conf.basic_rates; + u16 lowest_cck = IWL_RATE_COUNT, lowest_ofdm = IWL_RATE_COUNT; u8 rate; - if (info->band == NL80211_BAND_2GHZ && !vif->p2p) - rate = IWL_FIRST_CCK_RATE; - else - rate = IWL_FIRST_OFDM_RATE; + u32 i; + + sband = mvm->hw->wiphy->bands[info->band]; + for_each_set_bit(i, &basic, BITS_PER_LONG) { + u16 hw = sband->bitrates[i].hw_value; + + if (hw >= IWL_FIRST_OFDM_RATE) { + if (lowest_ofdm > hw) + lowest_ofdm = hw; + } else if (lowest_cck > hw) { + lowest_cck = hw; + } + } + + if (info->band == NL80211_BAND_2GHZ && !vif->p2p) { + if (lowest_cck != IWL_RATE_COUNT) + rate = lowest_cck; + else if (lowest_ofdm != IWL_RATE_COUNT) + rate = lowest_ofdm; + else + rate = IWL_RATE_1M_INDEX; + } else if (lowest_ofdm != IWL_RATE_COUNT) { + rate = lowest_ofdm; + } else { + rate = IWL_RATE_6M_INDEX; + }
return rate; } @@@ -838,24 -812,6 +838,24 @@@ u16 iwl_mvm_mac_ctxt_get_beacon_flags(c return flags; }
+u8 iwl_mvm_mac_ctxt_get_beacon_rate(struct iwl_mvm *mvm, + struct ieee80211_tx_info *info, + struct ieee80211_vif *vif) +{ + struct ieee80211_supported_band *sband = + mvm->hw->wiphy->bands[info->band]; + u32 legacy = vif->bss_conf.beacon_tx_rate.control[info->band].legacy; + + /* if beacon rate was configured try using it */ + if (hweight32(legacy) == 1) { + u32 rate = ffs(legacy) - 1; + + return sband->bitrates[rate].hw_value; + } + + return iwl_mvm_mac_ctxt_get_lowest_rate(mvm, info, vif); +} + static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct sk_buff *beacon, @@@ -886,7 -842,7 +886,7 @@@ cpu_to_le32(BIT(mvm->mgmt_last_antenna_idx) << RATE_MCS_ANT_POS);
- rate = iwl_mvm_mac_ctxt_get_lowest_rate(info, vif); + rate = iwl_mvm_mac_ctxt_get_beacon_rate(mvm, info, vif);
tx->rate_n_flags |= cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate)); @@@ -970,7 -926,7 +970,7 @@@ static int iwl_mvm_mac_ctxt_send_beacon struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(beacon); struct iwl_mac_beacon_cmd beacon_cmd = {}; - u8 rate = iwl_mvm_mac_ctxt_get_lowest_rate(info, vif); + u8 rate = iwl_mvm_mac_ctxt_get_beacon_rate(mvm, info, vif); u16 flags; struct ieee80211_chanctx_conf *ctx; int channel; @@@ -1143,7 -1099,7 +1143,7 @@@ static void iwl_mvm_mac_ctxt_cmd_fill_a iwl_mvm_mac_ap_iterator, &data);
if (data.beacon_device_ts) { - u32 rand = prandom_u32_max(64 - 36) + 36; + u32 rand = get_random_u32_inclusive(36, 63); mvmvif->ap_beacon_time = data.beacon_device_ts + ieee80211_tu_to_usec(data.beacon_int * rand / 100); diff --combined drivers/pci/p2pdma.c index 27539770a613,5565f67d6537..86812d2073ea --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@@ -89,90 -89,6 +89,90 @@@ static ssize_t published_show(struct de } static DEVICE_ATTR_RO(published);
+static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); + size_t len = vma->vm_end - vma->vm_start; + struct pci_p2pdma *p2pdma; + struct percpu_ref *ref; + unsigned long vaddr; + void *kaddr; + int ret; + + /* prevent private mappings from being established */ + if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { + pci_info_ratelimited(pdev, + "%s: fail, attempted private mapping\n", + current->comm); + return -EINVAL; + } + + if (vma->vm_pgoff) { + pci_info_ratelimited(pdev, + "%s: fail, attempted mapping with non-zero offset\n", + current->comm); + return -EINVAL; + } + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (!p2pdma) { + ret = -ENODEV; + goto out; + } + + kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref); + if (!kaddr) { + ret = -ENOMEM; + goto out; + } + + /* + * vm_insert_page() can sleep, so a reference is taken to mapping + * such that rcu_read_unlock() can be done before inserting the + * pages + */ + if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { + ret = -ENODEV; + goto out_free_mem; + } + rcu_read_unlock(); + + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr)); + if (ret) { + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); + return ret; + } + percpu_ref_get(ref); + put_page(virt_to_page(kaddr)); + kaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + + percpu_ref_put(ref); + + return 0; +out_free_mem: + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); +out: + rcu_read_unlock(); + return ret; +} + +static struct bin_attribute p2pmem_alloc_attr = { + .attr = { .name = "allocate", .mode = 0660 }, + .mmap = p2pmem_alloc_mmap, + /* + * Some places where we want to call mmap (ie. python) will check + * that the file size is greater than the mmap size before allowing + * the mmap to continue. To work around this, just set the size + * to be very large. + */ + .size = SZ_1T, +}; + static struct attribute *p2pmem_attrs[] = { &dev_attr_size.attr, &dev_attr_available.attr, @@@ -180,32 -96,11 +180,32 @@@ NULL, };
+static struct bin_attribute *p2pmem_bin_attrs[] = { + &p2pmem_alloc_attr, + NULL, +}; + static const struct attribute_group p2pmem_group = { .attrs = p2pmem_attrs, + .bin_attrs = p2pmem_bin_attrs, .name = "p2pmem", };
+static void p2pdma_page_free(struct page *page) +{ + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap); + struct percpu_ref *ref; + + gen_pool_free_owner(pgmap->provider->p2pdma->pool, + (uintptr_t)page_to_virt(page), PAGE_SIZE, + (void **)&ref); + percpu_ref_put(ref); +} + +static const struct dev_pagemap_ops p2pdma_pgmap_ops = { + .page_free = p2pdma_page_free, +}; + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; @@@ -257,19 -152,6 +257,19 @@@ out return error; }
+static void pci_p2pdma_unmap_mappings(void *data) +{ + struct pci_dev *pdev = data; + + /* + * Removing the alloc attribute from sysfs will call + * unmap_mapping_range() on the inode, teardown any existing userspace + * mappings and prevent new ones from being created. + */ + sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr, + p2pmem_group.name); +} + /** * pci_p2pdma_add_resource - add memory for use as p2p memory * @pdev: the device to add the memory to @@@ -316,7 -198,6 +316,7 @@@ int pci_p2pdma_add_resource(struct pci_ pgmap->range.end = pgmap->range.start + size - 1; pgmap->nr_range = 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; + pgmap->ops = &p2pdma_pgmap_ops;
p2p_pgmap->provider = pdev; p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - @@@ -328,11 -209,6 +328,11 @@@ goto pgmap_free; }
+ error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, + pdev); + if (error) + goto pages_free; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, @@@ -797,7 -673,7 +797,7 @@@ struct pci_dev *pci_p2pmem_find_many(st }
if (dev_cnt) - pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]); + pdev = pci_dev_get(closest_pdevs[get_random_u32_below(dev_cnt)]);
for (i = 0; i < dev_cnt; i++) pci_dev_put(closest_pdevs[i]); diff --combined drivers/scsi/scsi_debug.c index 57b091f1767f,a86db9761d00..8b6b1ce58a48 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@@ -3785,7 -3785,7 +3785,7 @@@ static int resp_write_scat(struct scsi_ mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); return illegal_condition_result; } - lrdp = kzalloc(lbdof_blen, GFP_ATOMIC); + lrdp = kzalloc(lbdof_blen, GFP_ATOMIC | __GFP_NOWARN); if (lrdp == NULL) return SCSI_MLQUEUE_HOST_BUSY; if (sdebug_verbose) @@@ -5702,16 -5702,16 +5702,16 @@@ static int schedule_resp(struct scsi_cm u64 ns = jiffies_to_nsecs(delta_jiff);
if (sdebug_random && ns < U32_MAX) { - ns = prandom_u32_max((u32)ns); + ns = get_random_u32_below((u32)ns); } else if (sdebug_random) { ns >>= 12; /* scale to 4 usec precision */ if (ns < U32_MAX) /* over 4 hours max */ - ns = prandom_u32_max((u32)ns); + ns = get_random_u32_below((u32)ns); ns <<= 12; } kt = ns_to_ktime(ns); } else { /* ndelay has a 4.2 second max */ - kt = sdebug_random ? prandom_u32_max((u32)ndelay) : + kt = sdebug_random ? get_random_u32_below((u32)ndelay) : (u32)ndelay; if (ndelay < INCLUSIVE_TIMING_MAX_NS) { u64 d = ktime_get_boottime_ns() - ns_from_boot; @@@ -7323,12 -7323,8 +7323,12 @@@ static int sdebug_add_host_helper(int p dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts);
error = device_register(&sdbg_host->dev); - if (error) + if (error) { + spin_lock(&sdebug_host_list_lock); + list_del(&sdbg_host->host_list); + spin_unlock(&sdebug_host_list_lock); goto clean; + }
++sdebug_num_hosts; return 0; diff --combined fs/ceph/inode.c index 7bff2855aab6,fb255988dee8..29a6629f6299 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@@ -126,7 -126,7 +126,7 @@@ const struct inode_operations ceph_file .setattr = ceph_setattr, .getattr = ceph_getattr, .listxattr = ceph_listxattr, - .get_acl = ceph_get_acl, + .get_inode_acl = ceph_get_acl, .set_acl = ceph_set_acl, };
@@@ -362,7 -362,7 +362,7 @@@ static int ceph_fill_fragtree(struct in if (nsplits != ci->i_fragtree_nsplits) { update = true; } else if (nsplits) { - i = prandom_u32_max(nsplits); + i = get_random_u32_below(nsplits); id = le32_to_cpu(fragtree->splits[i].frag); if (!__ceph_find_frag(ci, id)) update = true; @@@ -2255,7 -2255,7 +2255,7 @@@ int ceph_setattr(struct user_namespace err = __ceph_setattr(inode, attr);
if (err >= 0 && (attr->ia_valid & ATTR_MODE)) - err = posix_acl_chmod(&init_user_ns, inode, attr->ia_mode); + err = posix_acl_chmod(&init_user_ns, dentry, attr->ia_mode);
return err; } @@@ -2417,10 -2417,10 +2417,10 @@@ static int statx_to_caps(u32 want, umod { int mask = 0;
- if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) + if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_AUTH_SHARED;
- if (want & (STATX_NLINK|STATX_CTIME)) { + if (want & (STATX_NLINK|STATX_CTIME|STATX_CHANGE_COOKIE)) { /* * The link count for directories depends on inode->i_subdirs, * and that is only updated when Fs caps are held. @@@ -2431,10 -2431,11 +2431,10 @@@ mask |= CEPH_CAP_LINK_SHARED; }
- if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| - STATX_BLOCKS)) + if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|STATX_BLOCKS|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_FILE_SHARED;
- if (want & (STATX_CTIME)) + if (want & (STATX_CTIME|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_XATTR_SHARED;
return mask; @@@ -2477,11 -2478,6 +2477,11 @@@ int ceph_getattr(struct user_namespace valid_mask |= STATX_BTIME; }
+ if (request_mask & STATX_CHANGE_COOKIE) { + stat->change_cookie = inode_peek_iversion_raw(inode); + valid_mask |= STATX_CHANGE_COOKIE; + } + if (ceph_snap(inode) == CEPH_NOSNAP) stat->dev = sb->s_dev; else @@@ -2496,7 -2492,7 +2496,7 @@@ struct inode *parent;
parent = ceph_lookup_inode(sb, ceph_ino(inode)); - if (!parent) + if (IS_ERR(parent)) return PTR_ERR(parent);
pci = ceph_inode(parent); @@@ -2523,8 -2519,6 +2523,8 @@@ stat->nlink = 1 + 1 + ci->i_subdirs; }
+ stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; stat->result_mask = request_mask & valid_mask; return err; } diff --combined fs/ext4/ialloc.c index 67a257a69758,9fc1af8e19a3..9aa8b18bdac1 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@@ -465,7 -465,7 +465,7 @@@ static int find_group_orlov(struct supe ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo); parent_group = hinfo.hash % ngroups; } else - parent_group = prandom_u32_max(ngroups); + parent_group = get_random_u32_below(ngroups); for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; get_orlov_stats(sb, g, flex_size, &stats); @@@ -870,7 -870,7 +870,7 @@@ static int ext4_xattr_credits_for_new_i struct super_block *sb = dir->i_sb; int nblocks = 0; #ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); + struct posix_acl *p = get_inode_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(p)) return PTR_ERR(p); diff --combined fs/f2fs/gc.c index f1a46519a5fe,536d332d9e2e..0a8cd2ca1090 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@@ -96,6 -96,16 +96,6 @@@ static int gc_thread_func(void *data * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (sbi->gc_mode == GC_URGENT_HIGH) { - spin_lock(&sbi->gc_urgent_high_lock); - if (sbi->gc_urgent_high_remaining) { - sbi->gc_urgent_high_remaining--; - if (!sbi->gc_urgent_high_remaining) - sbi->gc_mode = GC_NORMAL; - } - spin_unlock(&sbi->gc_urgent_high_lock); - } - if (sbi->gc_mode == GC_URGENT_HIGH || sbi->gc_mode == GC_URGENT_MID) { wait_ms = gc_th->urgent_sleep_time; @@@ -152,15 -162,6 +152,15 @@@ do_gc /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi, true); next: + if (sbi->gc_mode != GC_NORMAL) { + spin_lock(&sbi->gc_remaining_trials_lock); + if (sbi->gc_remaining_trials) { + sbi->gc_remaining_trials--; + if (!sbi->gc_remaining_trials) + sbi->gc_mode = GC_NORMAL; + } + spin_unlock(&sbi->gc_remaining_trials_lock); + } sb_end_write(sbi->sb);
} while (!kthread_should_stop()); @@@ -171,10 -172,13 +171,10 @@@ int f2fs_start_gc_thread(struct f2fs_sb { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; - int err = 0;
gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) { - err = -ENOMEM; - goto out; - } + if (!gc_th) + return -ENOMEM;
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; @@@ -189,14 -193,12 +189,14 @@@ sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { - err = PTR_ERR(gc_th->f2fs_gc_task); + int err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); sbi->gc_thread = NULL; + return err; } -out: - return err; + + return 0; }
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) @@@ -280,7 -282,7 +280,7 @@@ static void select_policy(struct f2fs_s
/* let's select beginning hot/small space first in no_heap mode*/ if (f2fs_need_rand_seg(sbi)) - p->offset = prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); + p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); else if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; @@@ -1108,7 -1110,6 +1108,7 @@@ static bool is_alive(struct f2fs_sb_inf if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", ofs_in_node, dni->ino, dni->nid, max_addrs); + f2fs_put_page(node_page, 1); return false; }
@@@ -1562,8 -1563,8 +1562,8 @@@ next_step continue; }
- data_page = f2fs_get_read_data_page(inode, - start_bidx, REQ_RAHEAD, true); + data_page = f2fs_get_read_data_page(inode, start_bidx, + REQ_RAHEAD, true, NULL); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); @@@ -2132,6 -2133,8 +2132,6 @@@ out_unlock if (err) return err;
- set_sbi_flag(sbi, SBI_IS_RESIZEFS); - freeze_super(sbi->sb); f2fs_down_write(&sbi->gc_lock); f2fs_down_write(&sbi->cp_global_sem); @@@ -2147,7 -2150,6 +2147,7 @@@ if (err) goto out_err;
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS); err = free_segment_range(sbi, secs, false); if (err) goto recover_out; @@@ -2171,7 -2173,6 +2171,7 @@@ f2fs_commit_super(sbi, false); } recover_out: + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); @@@ -2184,5 -2185,6 +2184,5 @@@ out_err f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); - clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; } diff --combined fs/f2fs/segment.c index 8aa81238c770,b304692c0cf5..52210e7e5b30 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@@ -192,18 -192,14 +192,18 @@@ void f2fs_abort_atomic_write(struct ino if (!f2fs_is_atomic_file(inode)) return;
- if (clean) - truncate_inode_pages_final(inode->i_mapping); clear_inode_flag(fi->cow_inode, FI_COW_FILE); iput(fi->cow_inode); fi->cow_inode = NULL; release_atomic_write_cnt(inode); + clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_FILE); stat_dec_atomic_inode(inode); + + if (clean) { + truncate_inode_pages_final(inode->i_mapping); + f2fs_i_size_write(inode, fi->original_i_size); + } }
static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@@ -339,12 -335,10 +339,12 @@@ next }
out: - if (ret) + if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; - else + } else { sbi->committed_atomic_block += fi->atomic_write_cnt; + set_inode_flag(inode, FI_ATOMIC_COMMITTED); + }
__complete_revoke_list(inode, &revoke_list, ret ? true : false);
@@@ -626,11 -620,12 +626,11 @@@ int f2fs_create_flush_cmd_control(struc { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; - int err = 0;
if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; if (fcc->f2fs_issue_flush) - return err; + return 0; goto init_thread; }
@@@ -643,20 -638,19 +643,20 @@@ init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; if (!test_opt(sbi, FLUSH_MERGE)) - return err; + return 0;
init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { - err = PTR_ERR(fcc->f2fs_issue_flush); + int err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; }
- return err; + return 0; }
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) @@@ -862,7 -856,7 +862,7 @@@ block_t f2fs_get_unusable_blocks(struc } mutex_unlock(&dirty_i->seglist_lock);
- unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + unusable = max(holes[DATA], holes[NODE]); if (unusable > ovp_holes) return unusable - ovp_holes; return 0; @@@ -1454,7 -1448,7 +1454,7 @@@ retry if (i + 1 < dpolicy->granularity) break;
- if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) return __issue_discard_cmd_orderly(sbi, dpolicy);
pend_list = &dcc->pend_list[i]; @@@ -2031,10 -2025,8 +2031,10 @@@ int f2fs_start_discard_thread(struct f2
dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(dcc->f2fs_issue_discard)) + if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); + dcc->f2fs_issue_discard = NULL; + }
return err; } @@@ -2054,7 -2046,6 +2054,7 @@@ static int create_discard_cmd_control(s return -ENOMEM;
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) dcc->discard_granularity = sbi->blocks_per_seg; else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) @@@ -2543,7 -2534,7 +2543,7 @@@ static unsigned int __get_next_segno(st
sanity_check_seg_type(sbi, seg_type); if (f2fs_need_rand_seg(sbi)) - return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); + return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */ if (__is_large_section(sbi)) @@@ -2597,7 -2588,7 +2597,7 @@@ static void new_curseg(struct f2fs_sb_i curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = - prandom_u32_max(sbi->max_fragment_chunk) + 1; + get_random_u32_inclusive(1, sbi->max_fragment_chunk); }
static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@@ -2634,9 -2625,9 +2634,9 @@@ static void __refresh_next_blkoff(struc /* To allocate block chunks in different sizes, use random number */ if (--seg->fragment_remained_chunk <= 0) { seg->fragment_remained_chunk = - prandom_u32_max(sbi->max_fragment_chunk) + 1; + get_random_u32_inclusive(1, sbi->max_fragment_chunk); seg->next_blkoff += - prandom_u32_max(sbi->max_fragment_hole) + 1; + get_random_u32_inclusive(1, sbi->max_fragment_hole); } } } diff --combined kernel/bpf/core.c index 708e908d8807,38159f39e2af..6cca66b39d01 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@@ -34,7 -34,6 +34,7 @@@ #include <linux/log2.h> #include <linux/bpf_verifier.h> #include <linux/nodemask.h> +#include <linux/bpf_mem_alloc.h>
#include <asm/barrier.h> #include <asm/unaligned.h> @@@ -61,9 -60,6 +61,9 @@@ #define CTX regs[BPF_REG_CTX] #define IMM insn->imm
+struct bpf_mem_alloc bpf_global_ma; +bool bpf_global_ma_set; + /* No hurry in this branch * * Exported for the bpf jit load helper. @@@ -868,7 -864,8 +868,7 @@@ static struct bpf_prog_pack *alloc_new_ list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr); - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); return pack; }
@@@ -886,7 -883,8 +886,7 @@@ void *bpf_prog_pack_alloc(u32 size, bpf if (ptr) { bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); - set_memory_x((unsigned long)ptr, size / PAGE_SIZE); + set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); } goto out; } @@@ -1034,7 -1032,7 +1034,7 @@@ bpf_jit_binary_alloc(unsigned int progl hdr->size = size; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); - start = prandom_u32_max(hole) & ~(alignment - 1); + start = get_random_u32_below(hole) & ~(alignment - 1);
/* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; @@@ -1096,7 -1094,7 +1096,7 @@@ bpf_jit_binary_pack_alloc(unsigned int
hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)), BPF_PROG_CHUNK_SIZE - sizeof(*ro_header)); - start = prandom_u32_max(hole) & ~(alignment - 1); + start = get_random_u32_below(hole) & ~(alignment - 1);
*image_ptr = &ro_header->image[start]; *rw_image = &(*rw_header)->image[start]; @@@ -2253,14 -2251,8 +2253,14 @@@ static void __bpf_prog_array_free_sleep { struct bpf_prog_array *progs;
+ /* If RCU Tasks Trace grace period implies RCU grace period, there is + * no need to call kfree_rcu(), just call kfree() directly. + */ progs = container_of(rcu, struct bpf_prog_array, rcu); - kfree_rcu(progs, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(progs); + else + kfree_rcu(progs, rcu); }
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs) @@@ -2748,18 -2740,6 +2748,18 @@@ int __weak bpf_arch_text_invalidate(voi return -ENOTSUPP; }
+#ifdef CONFIG_BPF_SYSCALL +static int __init bpf_global_ma_init(void) +{ + int ret; + + ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); + bpf_global_ma_set = !ret; + return ret; +} +late_initcall(bpf_global_ma_init); +#endif + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --combined kernel/fork.c index 4439f7ff78f6,ec57cae58ff1..cfff237094aa --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -75,7 -75,6 +75,6 @@@ #include <linux/freezer.h> #include <linux/delayacct.h> #include <linux/taskstats_kern.h> - #include <linux/random.h> #include <linux/tty.h> #include <linux/fs_struct.h> #include <linux/magic.h> @@@ -97,6 -96,7 +96,7 @@@ #include <linux/scs.h> #include <linux/io_uring.h> #include <linux/bpf.h> + #include <linux/stackprotector.h>
#include <asm/pgalloc.h> #include <linux/uaccess.h> @@@ -535,9 -535,6 +535,9 @@@ void put_task_stack(struct task_struct
void free_task(struct task_struct *tsk) { +#ifdef CONFIG_SECCOMP + WARN_ON_ONCE(tsk->seccomp.filter); +#endif release_user_cpus_ptr(tsk); scs_release(tsk);
@@@ -2409,6 -2406,12 +2409,6 @@@ static __latent_entropy struct task_str
spin_lock(¤t->sighand->siglock);
- /* - * Copy seccomp details explicitly here, in case they were changed - * before holding sighand lock. - */ - copy_seccomp(p); - rv_task_fork(p);
rseq_fork(p, clone_flags); @@@ -2425,14 -2428,6 +2425,14 @@@ goto bad_fork_cancel_cgroup; }
+ /* No more failure paths after this point. */ + + /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@@ -2597,6 -2592,11 +2597,6 @@@ struct task_struct * __init fork_idle(i return task; }
-struct mm_struct *copy_init_mm(void) -{ - return dup_mm(NULL, &init_mm); -} - /* * This is like kernel_clone(), but shaved down and tailored to just * creating io_uring workers. It returns a created task, or an error pointer. @@@ -3015,27 -3015,10 +3015,27 @@@ static void sighand_ctor(void *data init_waitqueue_head(&sighand->signalfd_wqh); }
-void __init proc_caches_init(void) +void __init mm_cache_init(void) { unsigned int mm_size;
+ /* + * The mm_cpumask is located at the end of mm_struct, and is + * dynamically sized based on the maximum CPU number this system + * can have, taking hotplug into account (nr_cpu_ids). + */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + + mm_cachep = kmem_cache_create_usercopy("mm_struct", + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), + NULL); +} + +void __init proc_caches_init(void) +{ sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@@ -3053,6 -3036,19 +3053,6 @@@ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
- /* - * The mm_cpumask is located at the end of mm_struct, and is - * dynamically sized based on the maximum CPU number this system - * can have, taking hotplug into account (nr_cpu_ids). - */ - mm_size = sizeof(struct mm_struct) + cpumask_size(); - - mm_cachep = kmem_cache_create_usercopy("mm_struct", - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, - offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), - NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); diff --combined kernel/time/clocksource.c index fac8c0d90e61,9cf32ccda715..4a2c3bb92e2e --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@@ -257,8 -257,8 +257,8 @@@ static enum wd_read_status cs_watchdog_ goto skip_test; }
- pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n", - smp_processor_id(), watchdog->name, wd_delay, nretries); + pr_warn("timekeeping watchdog on CPU%d: wd-%s-wd read-back delay of %lldns, attempt %d, marking unstable\n", + smp_processor_id(), cs->name, wd_delay, nretries); return WD_READ_UNSTABLE;
skip_test: @@@ -310,7 -310,7 +310,7 @@@ static void clocksource_verify_choose_c * CPUs that are currently online. */ for (i = 1; i < n; i++) { - cpu = prandom_u32_max(nr_cpu_ids); + cpu = get_random_u32_below(nr_cpu_ids); cpu = cpumask_next(cpu - 1, cpu_online_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_online_mask); diff --combined lib/fault-inject.c index adb2f9355ee6,9f53408c545d..1421818c9ef7 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@@ -41,6 -41,9 +41,6 @@@ EXPORT_SYMBOL_GPL(setup_fault_attr)
static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@@ -100,7 -103,7 +100,7 @@@ static inline bool fail_stacktrace(stru * http://www.nongnu.org/failmalloc/ */
-bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); @@@ -136,26 -139,20 +136,26 @@@ return false; }
- if (attr->probability <= prandom_u32_max(100)) + if (attr->probability <= get_random_u32_below(100)) return false;
if (!fail_stacktrace(attr)) return false;
fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times);
return true; } + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --combined lib/kobject.c index 6e0bf03f4f36,af1f5f2954d4..985ee1c4f2c6 --- a/lib/kobject.c +++ b/lib/kobject.c @@@ -25,7 -25,7 +25,7 @@@ * and thus @kobj should have a namespace tag associated with it. Returns * %NULL otherwise. */ -const void *kobject_namespace(struct kobject *kobj) +const void *kobject_namespace(const struct kobject *kobj) { const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj);
@@@ -45,7 -45,7 +45,7 @@@ * representation of given kobject. Normally used to adjust ownership of * objects in a container. */ -void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; @@@ -94,10 -94,10 +94,10 @@@ static int create_dir(struct kobject *k return 0; }
-static int get_kobj_path_length(struct kobject *kobj) +static int get_kobj_path_length(const struct kobject *kobj) { int length = 1; - struct kobject *parent = kobj; + const struct kobject *parent = kobj;
/* walk up the ancestors until we hit the one pointing to the * root. @@@ -112,9 -112,9 +112,9 @@@ return length; }
-static void fill_kobj_path(struct kobject *kobj, char *path, int length) +static void fill_kobj_path(const struct kobject *kobj, char *path, int length) { - struct kobject *parent; + const struct kobject *parent;
--length; for (parent = kobj; parent; parent = parent->parent) { @@@ -136,7 -136,7 +136,7 @@@ * * Return: The newly allocated memory, caller must free with kfree(). */ -char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) +char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask) { char *path; int len; @@@ -694,7 -694,7 +694,7 @@@ static void kobject_release(struct kre { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - unsigned long delay = HZ + HZ * prandom_u32_max(4); + unsigned long delay = HZ + HZ * get_random_u32_below(4); pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); @@@ -834,9 -834,6 +834,9 @@@ EXPORT_SYMBOL_GPL(kobj_sysfs_ops) /** * kset_register() - Initialize and add a kset. * @k: kset. + * + * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name() + * is freed, it can not be used any more. */ int kset_register(struct kset *k) { @@@ -847,12 -844,8 +847,12 @@@
kset_init(k); err = kobject_add_internal(&k->kobj); - if (err) + if (err) { + kfree_const(k->kobj.name); + /* Set it to NULL to avoid accessing bad pointer in callers. */ + k->kobj.name = NULL; return err; + } kobject_uevent(&k->kobj, KOBJ_ADD); return 0; } @@@ -907,7 -900,7 +907,7 @@@ static void kset_release(struct kobjec kfree(kset); }
-static void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +static void kset_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { if (kobj->parent) kobject_get_ownership(kobj->parent, uid, gid); @@@ -1039,7 -1032,7 +1039,7 @@@ int kobj_ns_type_registered(enum kobj_n return registered; }
-const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent) { const struct kobj_ns_type_operations *ops = NULL;
@@@ -1049,7 -1042,7 +1049,7 @@@ return ops; }
-const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj) { return kobj_child_ns_ops(kobj->parent); } diff --combined lib/sbitmap.c index 586deb333237,58de526ff051..1fcede228fa2 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@@ -21,7 -21,7 +21,7 @@@ static int init_alloc_hint(struct sbitm int i;
for_each_possible_cpu(i) - *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32_max(depth); + *per_cpu_ptr(sb->alloc_hint, i) = get_random_u32_below(depth); } return 0; } @@@ -33,7 -33,7 +33,7 @@@ static inline unsigned update_alloc_hin
hint = this_cpu_read(*sb->alloc_hint); if (unlikely(hint >= depth)) { - hint = depth ? prandom_u32_max(depth) : 0; + hint = depth ? get_random_u32_below(depth) : 0; this_cpu_write(*sb->alloc_hint, hint); }
@@@ -434,8 -434,6 +434,8 @@@ int sbitmap_queue_init_node(struct sbit sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); + atomic_set(&sbq->completion_cnt, 0); + atomic_set(&sbq->wakeup_cnt, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { @@@ -443,21 -441,40 +443,21 @@@ return -ENOMEM; }
- for (i = 0; i < SBQ_WAIT_QUEUES; i++) { + for (i = 0; i < SBQ_WAIT_QUEUES; i++) init_waitqueue_head(&sbq->ws[i].wait); - atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); - }
return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int wake_batch) -{ - int i; - - if (sbq->wake_batch != wake_batch) { - WRITE_ONCE(sbq->wake_batch, wake_batch); - /* - * Pairs with the memory barrier in sbitmap_queue_wake_up() - * to ensure that the batch size is updated before the wait - * counts. - */ - smp_mb(); - for (i = 0; i < SBQ_WAIT_QUEUES; i++) - atomic_set(&sbq->ws[i].wait_cnt, 1); - } -} - static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch;
wake_batch = sbq_calc_wake_batch(sbq, depth); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + if (sbq->wake_batch != wake_batch) + WRITE_ONCE(sbq->wake_batch, wake_batch); }
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, @@@ -471,8 -488,7 +471,8 @@@
wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, min_batch, SBQ_WAKE_BATCH); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + + WRITE_ONCE(sbq->wake_batch, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
@@@ -560,56 -576,106 +560,56 @@@ void sbitmap_queue_min_shallow_depth(st } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
-static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) +static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { int i, wake_index;
if (!atomic_read(&sbq->ws_active)) - return NULL; + return;
wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index];
- if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) { - if (wake_index != atomic_read(&sbq->wake_index)) - atomic_set(&sbq->wake_index, wake_index); - return ws; - } - + /* + * Advance the index before checking the current queue. + * It improves fairness, by ensuring the queue doesn't + * need to be fully emptied before trying to wake up + * from the next one. + */ wake_index = sbq_index_inc(wake_index); + + /* + * It is sufficient to wake up at least one waiter to + * guarantee forward progress. + */ + if (waitqueue_active(&ws->wait) && + wake_up_nr(&ws->wait, nr)) + break; }
- return NULL; + if (wake_index != atomic_read(&sbq->wake_index)) + atomic_set(&sbq->wake_index, wake_index); }
-static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr) +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { - struct sbq_wait_state *ws; - unsigned int wake_batch; - int wait_cnt, cur, sub; - bool ret; + unsigned int wake_batch = READ_ONCE(sbq->wake_batch); + unsigned int wakeups;
- if (*nr <= 0) - return false; + if (!atomic_read(&sbq->ws_active)) + return;
- ws = sbq_wake_ptr(sbq); - if (!ws) - return false; + atomic_add(nr, &sbq->completion_cnt); + wakeups = atomic_read(&sbq->wakeup_cnt);
- cur = atomic_read(&ws->wait_cnt); do { - /* - * For concurrent callers of this, callers should call this - * function again to wakeup a new batch on a different 'ws'. - */ - if (cur == 0) - return true; - sub = min(*nr, cur); - wait_cnt = cur - sub; - } while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt)); - - /* - * If we decremented queue without waiters, retry to avoid lost - * wakeups. - */ - if (wait_cnt > 0) - return !waitqueue_active(&ws->wait); - - *nr -= sub; - - /* - * When wait_cnt == 0, we have to be particularly careful as we are - * responsible to reset wait_cnt regardless whether we've actually - * woken up anybody. But in case we didn't wakeup anybody, we still - * need to retry. - */ - ret = !waitqueue_active(&ws->wait); - wake_batch = READ_ONCE(sbq->wake_batch); - - /* - * Wake up first in case that concurrent callers decrease wait_cnt - * while waitqueue is empty. - */ - wake_up_nr(&ws->wait, wake_batch); + if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) + return; + } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, + &wakeups, wakeups + wake_batch));
- /* - * Pairs with the memory barrier in sbitmap_queue_resize() to - * ensure that we see the batch size update before the wait - * count is reset. - * - * Also pairs with the implicit barrier between decrementing wait_cnt - * and checking for waitqueue_active() to make sure waitqueue_active() - * sees result of the wakeup if atomic_dec_return() has seen the result - * of atomic_set(). - */ - smp_mb__before_atomic(); - - /* - * Increase wake_index before updating wait_cnt, otherwise concurrent - * callers can see valid wait_cnt in old waitqueue, which can cause - * invalid wakeup on the old waitqueue. - */ - sbq_index_atomic_inc(&sbq->wake_index); - atomic_set(&ws->wait_cnt, wake_batch); - - return ret || *nr; -} - -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) -{ - while (__sbq_wake_up(sbq, &nr)) - ; + __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
@@@ -726,7 -792,9 +726,7 @@@ void sbitmap_queue_show(struct sbitmap_ seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; - - seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", - atomic_read(&ws->wait_cnt), + seq_printf(m, "\t{.wait=%s},\n", waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); diff --combined lib/test_printf.c index 1fb12c1a0fa9,f098914a48d5..000e0e00e3dd --- a/lib/test_printf.c +++ b/lib/test_printf.c @@@ -126,7 -126,7 +126,7 @@@ __test(const char *expect, int elen, co * be able to print it as expected. */ failed_tests += do_test(BUF_SIZE, expect, elen, fmt, ap); - rand = 1 + prandom_u32_max(elen+1); + rand = get_random_u32_inclusive(1, elen + 1); /* Since elen < BUF_SIZE, we have 1 <= rand <= BUF_SIZE. */ failed_tests += do_test(rand, expect, elen, fmt, ap); failed_tests += do_test(0, expect, elen, fmt, ap); @@@ -704,29 -704,31 +704,29 @@@ flags(void
static void __init fwnode_pointer(void) { - const struct software_node softnodes[] = { - { .name = "first", }, - { .name = "second", .parent = &softnodes[0], }, - { .name = "third", .parent = &softnodes[1], }, - { NULL /* Guardian */ } - }; - const char * const full_name = "first/second/third"; + const struct software_node first = { .name = "first" }; + const struct software_node second = { .name = "second", .parent = &first }; + const struct software_node third = { .name = "third", .parent = &second }; + const struct software_node *group[] = { &first, &second, &third, NULL }; const char * const full_name_second = "first/second"; + const char * const full_name_third = "first/second/third"; const char * const second_name = "second"; const char * const third_name = "third"; int rval;
- rval = software_node_register_nodes(softnodes); + rval = software_node_register_node_group(group); if (rval) { pr_warn("cannot register softnodes; rval %d\n", rval); return; }
- test(full_name_second, "%pfw", software_node_fwnode(&softnodes[1])); - test(full_name, "%pfw", software_node_fwnode(&softnodes[2])); - test(full_name, "%pfwf", software_node_fwnode(&softnodes[2])); - test(second_name, "%pfwP", software_node_fwnode(&softnodes[1])); - test(third_name, "%pfwP", software_node_fwnode(&softnodes[2])); + test(full_name_second, "%pfw", software_node_fwnode(&second)); + test(full_name_third, "%pfw", software_node_fwnode(&third)); + test(full_name_third, "%pfwf", software_node_fwnode(&third)); + test(second_name, "%pfwP", software_node_fwnode(&second)); + test(third_name, "%pfwP", software_node_fwnode(&third));
- software_node_unregister_nodes(softnodes); + software_node_unregister_node_group(group); }
static void __init fourcc_pointer(void) diff --combined lib/vsprintf.c index 5b0611c00956,2d11541ee561..be71a03c936a --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@@ -41,6 -41,7 +41,7 @@@ #include <linux/siphash.h> #include <linux/compiler.h> #include <linux/property.h> + #include <linux/notifier.h> #ifdef CONFIG_BLOCK #include <linux/blkdev.h> #endif @@@ -752,26 -753,21 +753,21 @@@ early_param("debug_boot_weak_hash", deb
static bool filled_random_ptr_key __read_mostly; static siphash_key_t ptr_key __read_mostly; - static void fill_ptr_key_workfn(struct work_struct *work); - static DECLARE_DELAYED_WORK(fill_ptr_key_work, fill_ptr_key_workfn);
- static void fill_ptr_key_workfn(struct work_struct *work) + static int fill_ptr_key(struct notifier_block *nb, unsigned long action, void *data) { - if (!rng_is_initialized()) { - queue_delayed_work(system_unbound_wq, &fill_ptr_key_work, HZ * 2); - return; - } - get_random_bytes(&ptr_key, sizeof(ptr_key));
/* Pairs with smp_rmb() before reading ptr_key. */ smp_wmb(); WRITE_ONCE(filled_random_ptr_key, true); + return NOTIFY_DONE; }
static int __init vsprintf_init_hashval(void) { - fill_ptr_key_workfn(NULL); + static struct notifier_block fill_ptr_key_nb = { .notifier_call = fill_ptr_key }; + execute_with_initialized_rng(&fill_ptr_key_nb); return 0; } subsys_initcall(vsprintf_init_hashval) @@@ -866,7 -862,7 +862,7 @@@ char *restricted_pointer(char *buf, cha * kptr_restrict==1 cannot be used in IRQ context * because its test for CAP_SYSLOG would be meaningless. */ - if (in_irq() || in_serving_softirq() || in_nmi()) { + if (in_hardirq() || in_serving_softirq() || in_nmi()) { if (spec.field_width == -1) spec.field_width = 2 * sizeof(ptr); return error_string(buf, end, "pK-error", spec); diff --combined mm/slub.c index a24b71041b26,7cd2c657030a..f9a5f821b894 --- a/mm/slub.c +++ b/mm/slub.c @@@ -829,17 -829,6 +829,17 @@@ static inline void set_orig_size(struc if (!slub_debug_orig_size(s)) return;
+#ifdef CONFIG_KASAN_GENERIC + /* + * KASAN could save its free meta data in object's data area at + * offset 0, if the size is larger than 'orig_size', it will + * overlap the data redzone in [orig_size+1, object_size], and + * the check should be skipped. + */ + if (kasan_metadata_size(s, true) > orig_size) + orig_size = s->object_size; +#endif + p += get_info_end(s); p += sizeof(struct track) * 2;
@@@ -859,11 -848,6 +859,11 @@@ static inline unsigned int get_orig_siz return *(unsigned int *)p; }
+void skip_orig_size_check(struct kmem_cache *s, const void *object) +{ + set_orig_size(s, (void *)object, s->object_size); +} + static void slab_bug(struct kmem_cache *s, char *fmt, ...) { struct va_format vaf; @@@ -926,7 -910,7 +926,7 @@@ static void print_trailer(struct kmem_c if (slub_debug_orig_size(s)) off += sizeof(unsigned int);
- off += kasan_metadata_size(s); + off += kasan_metadata_size(s, false);
if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ @@@ -982,28 -966,17 +982,28 @@@ static __printf(3, 4) void slab_err(str static void init_object(struct kmem_cache *s, void *object, u8 val) { u8 *p = kasan_reset_tag(object); + unsigned int poison_size = s->object_size;
- if (s->flags & SLAB_RED_ZONE) + if (s->flags & SLAB_RED_ZONE) { memset(p - s->red_left_pad, val, s->red_left_pad);
+ if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { + /* + * Redzone the extra allocated space by kmalloc than + * requested, and the poison size will be limited to + * the original request size accordingly. + */ + poison_size = get_orig_size(s, object); + } + } + if (s->flags & __OBJECT_POISON) { - memset(p, POISON_FREE, s->object_size - 1); - p[s->object_size - 1] = POISON_END; + memset(p, POISON_FREE, poison_size - 1); + p[poison_size - 1] = POISON_END; }
if (s->flags & SLAB_RED_ZONE) - memset(p + s->object_size, val, s->inuse - s->object_size); + memset(p + poison_size, val, s->inuse - poison_size); }
static void restore_bytes(struct kmem_cache *s, char *message, u8 data, @@@ -1097,7 -1070,7 +1097,7 @@@ static int check_pad_bytes(struct kmem_ off += sizeof(unsigned int); }
- off += kasan_metadata_size(s); + off += kasan_metadata_size(s, false);
if (size_from_object(s) == off) return 1; @@@ -1147,7 -1120,6 +1147,7 @@@ static int check_object(struct kmem_cac { u8 *p = object; u8 *endobject = object + s->object_size; + unsigned int orig_size;
if (s->flags & SLAB_RED_ZONE) { if (!check_bytes_and_report(s, slab, object, "Left Redzone", @@@ -1157,17 -1129,6 +1157,17 @@@ if (!check_bytes_and_report(s, slab, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; + + if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { + orig_size = get_orig_size(s, object); + + if (s->object_size > orig_size && + !check_bytes_and_report(s, slab, object, + "kmalloc Redzone", p + orig_size, + val, s->object_size - orig_size)) { + return 0; + } + } } else { if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { check_bytes_and_report(s, slab, p, "Alignment padding", @@@ -1839,8 -1800,6 +1839,8 @@@ static inline struct slab *alloc_slab_p
slab = folio_slab(folio); __folio_set_slab(folio); + /* Make the flag visible before any changes to folio->mapping */ + smp_wmb(); if (page_is_pfmemalloc(folio_page(folio, 0))) slab_set_pfmemalloc(slab);
@@@ -1922,7 -1881,7 +1922,7 @@@ static bool shuffle_freelist(struct kme return false;
freelist_count = oo_objects(s->oo); - pos = prandom_u32_max(freelist_count); + pos = get_random_u32_below(freelist_count);
page_limit = slab->objects * s->size; start = fixup_red_left(s, slab_address(slab)); @@@ -2040,11 -1999,17 +2040,11 @@@ static void __free_slab(struct kmem_cac int order = folio_order(folio); int pages = 1 << order;
- if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { - void *p; - - slab_pad_check(s, slab); - for_each_object(p, s, slab_address(slab), slab->objects) - check_object(s, slab, p, SLUB_RED_INACTIVE); - } - __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); folio->mapping = NULL; + /* Make the mapping reset visible before clearing the flag */ + smp_wmb(); + __folio_clear_slab(folio); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; unaccount_slab(slab, order, s); @@@ -2060,17 -2025,9 +2060,17 @@@ static void rcu_free_slab(struct rcu_he
static void free_slab(struct kmem_cache *s, struct slab *slab) { - if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { + if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { + void *p; + + slab_pad_check(s, slab); + for_each_object(p, s, slab_address(slab), slab->objects) + check_object(s, slab, p, SLUB_RED_INACTIVE); + } + + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&slab->rcu_head, rcu_free_slab); - } else + else __free_slab(s, slab); }
@@@ -2454,7 -2411,7 +2454,7 @@@ static void init_kmem_cache_cpus(struc static void deactivate_slab(struct kmem_cache *s, struct slab *slab, void *freelist) { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST }; + enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST }; struct kmem_cache_node *n = get_node(s, slab_nid(slab)); int free_delta = 0; enum slab_modes mode = M_NONE; @@@ -2530,6 -2487,14 +2530,6 @@@ redo * acquire_slab() will see a slab that is frozen */ spin_lock_irqsave(&n->list_lock, flags); - } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) { - mode = M_FULL; - /* - * This also ensures that the scanning of full - * slabs from diagnostic functions will not see - * any frozen slabs. - */ - spin_lock_irqsave(&n->list_lock, flags); } else { mode = M_FULL_NOLIST; } @@@ -2539,7 -2504,7 +2539,7 @@@ old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")) { - if (mode == M_PARTIAL || mode == M_FULL) + if (mode == M_PARTIAL) spin_unlock_irqrestore(&n->list_lock, flags); goto redo; } @@@ -2553,6 -2518,10 +2553,6 @@@ stat(s, DEACTIVATE_EMPTY); discard_slab(s, slab); stat(s, FREE_SLAB); - } else if (mode == M_FULL) { - add_full(s, n, slab); - spin_unlock_irqrestore(&n->list_lock, flags); - stat(s, DEACTIVATE_FULL); } else if (mode == M_FULL_NOLIST) { stat(s, DEACTIVATE_FULL); } @@@ -3426,11 -3395,7 +3426,11 @@@ redo init = slab_want_init_on_alloc(gfpflags, s);
out: - slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init); + /* + * When init equals 'true', like for kzalloc() family, only + * @orig_size bytes might be zeroed instead of s->object_size + */ + slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
return object; } @@@ -3887,11 -3852,11 +3887,11 @@@ int kmem_cache_alloc_bulk(struct kmem_c * Done outside of the IRQ disabled fastpath loop. */ slab_post_alloc_hook(s, objcg, flags, size, p, - slab_want_init_on_alloc(flags, s)); + slab_want_init_on_alloc(flags, s), s->object_size); return i; error: slub_put_cpu_ptr(s->cpu_slab); - slab_post_alloc_hook(s, objcg, flags, i, p, false); + slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); kmem_cache_free_bulk(s, i, p); return 0; } @@@ -4052,8 -4017,7 +4052,8 @@@ init_kmem_cache_node(struct kmem_cache_ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); + NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH * + sizeof(struct kmem_cache_cpu));
/* * Must align to double word boundary for the double cmpxchg @@@ -4238,8 -4202,7 +4238,8 @@@ static int calculate_sizes(struct kmem_ */ s->inuse = size;
- if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || + if (slub_debug_orig_size(s) || + (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || s->ctor) { /* @@@ -5623,21 -5586,7 +5623,21 @@@ static ssize_t failslab_show(struct kme { return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); } -SLAB_ATTR_RO(failslab); + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + if (s->refcount > 1) + return -EINVAL; + + if (buf[0] == '1') + WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB); + else + WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB); + + return length; +} +SLAB_ATTR(failslab); #endif
static ssize_t shrink_show(struct kmem_cache *s, char *buf) @@@ -5971,6 -5920,11 +5971,6 @@@ static int sysfs_slab_add(struct kmem_c struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s);
- if (!kset) { - kobject_init(&s->kobj, &slab_ktype); - return 0; - } - if (!unmergeable && disable_higher_order_debug && (slub_debug & DEBUG_METADATA_FLAGS)) unmergeable = 1; @@@ -6100,7 -6054,8 +6100,7 @@@ static int __init slab_sysfs_init(void mutex_unlock(&slab_mutex); return 0; } - -__initcall(slab_sysfs_init); +late_initcall(slab_sysfs_init); #endif /* CONFIG_SYSFS */
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) diff --combined mm/swapfile.c index 72e481aacd5d,4ee31056d3f8..3eedf7ae957f --- a/mm/swapfile.c +++ b/mm/swapfile.c @@@ -772,8 -772,7 +772,7 @@@ static void set_cluster_next(struct swa /* No free swap slots available */ if (si->highest_bit <= si->lowest_bit) return; - next = si->lowest_bit + - prandom_u32_max(si->highest_bit - si->lowest_bit + 1); + next = get_random_u32_inclusive(si->lowest_bit, si->highest_bit); next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES); next = max_t(unsigned int, next, si->lowest_bit); } @@@ -973,23 -972,23 +972,23 @@@ done scan: spin_unlock(&si->lock); while (++offset <= READ_ONCE(si->highest_bit)) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; } offset = si->lowest_bit; while (offset < scan_base) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; offset++; } spin_lock(&si->lock); @@@ -3089,7 -3088,7 +3088,7 @@@ SYSCALL_DEFINE2(swapon, const char __us */ for_each_possible_cpu(cpu) { per_cpu(*p->cluster_next_cpu, cpu) = - 1 + prandom_u32_max(p->highest_bit); + get_random_u32_inclusive(1, p->highest_bit); } nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
diff --combined net/802/mrp.c index 6c927d4b35f0,8c6f0381023b..66fcbf23b486 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@@ -592,7 -592,7 +592,7 @@@ static void mrp_join_timer_arm(struct m { unsigned long delay;
- delay = prandom_u32_max(msecs_to_jiffies(mrp_join_time)); + delay = get_random_u32_below(msecs_to_jiffies(mrp_join_time)); mod_timer(&app->join_timer, jiffies + delay); }
@@@ -606,10 -606,7 +606,10 @@@ static void mrp_join_timer(struct timer spin_unlock(&app->lock);
mrp_queue_xmit(app); - mrp_join_timer_arm(app); + spin_lock(&app->lock); + if (likely(app->active)) + mrp_join_timer_arm(app); + spin_unlock(&app->lock); }
static void mrp_periodic_timer_arm(struct mrp_applicant *app) @@@ -623,12 -620,11 +623,12 @@@ static void mrp_periodic_timer(struct t struct mrp_applicant *app = from_timer(app, t, periodic_timer);
spin_lock(&app->lock); - mrp_mad_event(app, MRP_EVENT_PERIODIC); - mrp_pdu_queue(app); + if (likely(app->active)) { + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + mrp_periodic_timer_arm(app); + } spin_unlock(&app->lock); - - mrp_periodic_timer_arm(app); }
static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) @@@ -876,7 -872,6 +876,7 @@@ int mrp_init_applicant(struct net_devic app->dev = dev; app->app = appl; app->mad = RB_ROOT; + app->active = true; spin_lock_init(&app->lock); skb_queue_head_init(&app->queue); rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); @@@ -905,9 -900,6 +905,9 @@@ void mrp_uninit_applicant(struct net_de
RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+ spin_lock_bh(&app->lock); + app->active = false; + spin_unlock_bh(&app->lock); /* Delete timer and generate a final TX event to flush out * all pending messages before the applicant is gone. */ diff --combined net/bluetooth/mgmt.c index 0dd30a3beb77,81ce668b0b77..d2ea8e19aa1b --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@@ -7373,9 -7373,8 +7373,8 @@@ static int get_conn_info(struct sock *s /* To avoid client trying to guess when to poll again for information we * calculate conn info age as random value between min/max set in hdev. */ - conn_info_age = hdev->conn_info_min_age + - prandom_u32_max(hdev->conn_info_max_age - - hdev->conn_info_min_age); + conn_info_age = get_random_u32_inclusive(hdev->conn_info_min_age, + hdev->conn_info_max_age - 1);
/* Query controller to refresh cached values if they are too old or were * never read. @@@ -8859,7 -8858,7 +8858,7 @@@ static int add_ext_adv_params(struct so * extra parameters we don't know about will be ignored in this request. */ if (data_len < MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_INVALID_PARAMS);
flags = __le32_to_cpu(cp->flags); diff --combined net/can/j1939/transport.c index f26f4cfa9e63,67d36776aff4..5c722b55fe23 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@@ -987,7 -987,7 +987,7 @@@ static int j1939_session_tx_eoma(struc /* wait for the EOMA packet to come in */ j1939_tp_set_rxtimeout(session, 1250);
- netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
return 0; } @@@ -1168,7 -1168,7 +1168,7 @@@ static enum hrtimer_restart j1939_tp_tx if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { session->tx_retry++; j1939_tp_schedule_txtimer(session, - 10 + prandom_u32_max(16)); + 10 + get_random_u32_below(16)); } else { netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", __func__, session); diff --combined net/core/neighbour.c index 952a54763358,ba92762de525..f00a79fc301b --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@@ -111,7 -111,7 +111,7 @@@ static void neigh_cleanup_and_release(s
unsigned long neigh_rand_reach_time(unsigned long base) { - return base ? prandom_u32_max(base) + (base >> 1) : 0; + return base ? get_random_u32_below(base) + (base >> 1) : 0; } EXPORT_SYMBOL(neigh_rand_reach_time);
@@@ -307,31 -307,7 +307,31 @@@ static int neigh_del_timer(struct neigh return 0; }
-static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_parms_qlen_dec(struct net_device *dev, int family) +{ + struct neigh_parms *p; + + rcu_read_lock(); + p = neigh_get_dev_parms_rcu(dev, family); + if (p) + p->qlen--; + rcu_read_unlock(); +} + +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, + int family) { struct sk_buff_head tmp; unsigned long flags; @@@ -345,7 -321,13 +345,7 @@@ struct net_device *dev = skb->dev;
if (net == NULL || net_eq(dev_net(dev), net)) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, family); __skb_unlink(skb, list); __skb_queue_tail(&tmp, skb); } @@@ -427,8 -409,7 +427,8 @@@ static int __neigh_ifdown(struct neigh_ write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, + tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; @@@ -1640,8 -1621,13 +1640,8 @@@ static void neigh_proxy_process(struct
if (tdif <= 0) { struct net_device *dev = skb->dev; - struct in_device *in_dev;
- rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, tbl->family); __skb_unlink(skb, &tbl->proxy_queue);
if (tbl->proxy_redo && netif_running(dev)) { @@@ -1666,7 -1652,7 +1666,7 @@@ void pneigh_enqueue(struct neigh_table struct sk_buff *skb) { unsigned long sched_next = jiffies + - prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); + get_random_u32_below(NEIGH_VAR(p, PROXY_DELAY));
if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); @@@ -1835,7 -1821,7 +1835,7 @@@ int neigh_table_clear(int index, struc cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue, NULL); + pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); @@@ -3553,6 -3539,18 +3553,6 @@@ static int proc_unres_qlen(struct ctl_t return ret; }
-static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, - int family) -{ - switch (family) { - case AF_INET: - return __in_dev_arp_parms_get_rcu(dev); - case AF_INET6: - return __in6_dev_nd_parms_get_rcu(dev); - } - return NULL; -} - static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, int index) { diff --combined net/ipv4/inet_connection_sock.c index 4a34bc7cb15e,f22051219b50..b366ab9148f2 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@@ -314,7 -314,7 +314,7 @@@ other_half_scan if (likely(remaining > 1)) remaining &= ~1U;
- offset = prandom_u32_max(remaining); + offset = get_random_u32_below(remaining); /* __inet_hash_connect() favors ports having @low parity * We do the opposite to not pollute connect() users. */ @@@ -471,11 -471,11 +471,11 @@@ int inet_csk_get_port(struct sock *sk, bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; bool found_port = false, check_bind_conflict = true; bool bhash_created = false, bhash2_created = false; + int ret = -EADDRINUSE, port = snum, l3mdev; struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2 = NULL; struct inet_bind_bucket *tb = NULL; bool head2_lock_acquired = false; - int ret = 1, port = snum, l3mdev; struct net *net = sock_net(sk);
l3mdev = inet_sk_bound_l3mdev(sk); @@@ -1186,7 -1186,7 +1186,7 @@@ int inet_csk_listen_start(struct sock * { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - int err = -EADDRINUSE; + int err;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@@ -1202,8 -1202,7 +1202,8 @@@ * after validation is complete. */ inet_sk_state_store(sk, TCP_LISTEN); - if (!sk->sk_prot->get_port(sk, inet->inet_num)) { + err = sk->sk_prot->get_port(sk, inet->inet_num); + if (!err) { inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk); diff --combined net/ipv4/inet_hashtables.c index 3cec471a2cd2,a879ec1a267d..d039b4e732a3 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@@ -858,80 -858,34 +858,80 @@@ inet_bhash2_addr_any_hashbucket(const s return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; }
-int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) +static void inet_update_saddr(struct sock *sk, void *saddr, int family) +{ + if (family == AF_INET) { + inet_sk(sk)->inet_saddr = *(__be32 *)saddr; + sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else { + sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; + } +#endif +} + +static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); - struct inet_bind_hashbucket *head2; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + int bhash; + + if (!inet_csk(sk)->icsk_bind2_hash) { + /* Not bind()ed before. */ + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + return 0; + }
/* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); - if (!new_tb2) + if (!new_tb2) { + if (reset) { + /* The (INADDR_ANY, port) bucket might have already + * been freed, then we cannot fixup icsk_bind2_hash, + * so we give up and unlink sk from bhash/bhash2 not + * to leave inconsistency in bhash2. + */ + inet_put_port(sk); + inet_reset_saddr(sk); + } + return -ENOMEM; + }
+ bhash = inet_bhashfn(net, port, hinfo->bhash_size); + head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
- if (prev_saddr) { - spin_lock_bh(&prev_saddr->lock); - __sk_del_bind2_node(sk); - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - inet_csk(sk)->icsk_bind2_hash); - spin_unlock_bh(&prev_saddr->lock); - } + /* If we change saddr locklessly, another thread + * iterating over bhash might see corrupted address. + */ + spin_lock_bh(&head->lock);
- spin_lock_bh(&head2->lock); + spin_lock(&head2->lock); + __sk_del_bind2_node(sk); + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); + spin_unlock(&head2->lock); + + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + + spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; @@@ -939,40 -893,26 +939,40 @@@ } sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; - spin_unlock_bh(&head2->lock); + spin_unlock(&head2->lock); + + spin_unlock_bh(&head->lock);
if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2);
return 0; } + +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) +{ + return __inet_bhash2_update_saddr(sk, saddr, family, false); +} EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
+void inet_bhash2_reset_saddr(struct sock *sk) +{ + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + __inet_bhash2_update_saddr(sk, NULL, 0, true); +} +EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. + * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though - * attacks were since demonstrated, thus we use 65536 instead to really - * give more isolation and privacy, at the expense of 256kB of kernel - * memory. + * attacks were since demonstrated, thus we use 65536 by default instead + * to really give more isolation and privacy, at the expense of 256kB + * of kernel memory. */ -#define INET_TABLE_PERTURB_SHIFT 16 -#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb;
int __inet_hash_connect(struct inet_timewait_death_row *death_row, @@@ -1097,7 -1037,7 +1097,7 @@@ ok * on low contention the randomness is maximal and on high contention * it may be inexistent. */ - i = max_t(int, i, prandom_u32_max(8) * 2); + i = max_t(int, i, get_random_u32_below(8) * 2); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */ diff --combined net/ipv4/tcp_input.c index 1efacbe948da,23cf418efe4f..cc072d2cfcd8 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@@ -3646,7 -3646,8 +3646,8 @@@ static void tcp_send_challenge_ack(stru u32 half = (ack_limit + 1) >> 1;
WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now); - WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit)); + WRITE_ONCE(net->ipv4.tcp_challenge_count, + get_random_u32_inclusive(half, ack_limit + half - 1)); } count = READ_ONCE(net->ipv4.tcp_challenge_count); if (count > 0) { @@@ -4764,8 -4765,8 +4765,8 @@@ static void tcp_ofo_queue(struct sock * } }
-static bool tcp_prune_ofo_queue(struct sock *sk); -static int tcp_prune_queue(struct sock *sk); +static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); +static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) @@@ -4773,11 -4774,11 +4774,11 @@@ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, size)) {
- if (tcp_prune_queue(sk) < 0) + if (tcp_prune_queue(sk, skb) < 0) return -1;
while (!sk_rmem_schedule(sk, skb, size)) { - if (!tcp_prune_ofo_queue(sk)) + if (!tcp_prune_ofo_queue(sk, skb)) return -1; } } @@@ -5329,8 -5330,6 +5330,8 @@@ new_range * Clean the out-of-order queue to make room. * We drop high sequences packets to : * 1) Let a chance for holes to be filled. + * This means we do not drop packets from ooo queue if their sequence + * is before incoming packet sequence. * 2) not add too big latencies if thousands of packets sit there. * (But if application shrinks SO_RCVBUF, we could still end up * freeing whole queue here) @@@ -5338,31 -5337,24 +5339,31 @@@ * * Return true if queue has shrunk. */ -static bool tcp_prune_ofo_queue(struct sock *sk) +static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + bool pruned = false; int goal;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; + do { + struct sk_buff *skb = rb_to_skb(node); + + /* If incoming skb would land last in ofo queue, stop pruning. */ + if (after(TCP_SKB_CB(in_skb)->seq, TCP_SKB_CB(skb)->seq)) + break; + pruned = true; prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); - goal -= rb_to_skb(node)->truesize; - tcp_drop_reason(sk, rb_to_skb(node), - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); + goal -= skb->truesize; + tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); + tp->ooo_last_skb = rb_to_skb(prev); if (!prev || goal <= 0) { if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) @@@ -5371,18 -5363,16 +5372,18 @@@ } node = prev; } while (node); - tp->ooo_last_skb = rb_to_skb(prev);
- /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tp->rx_opt.sack_ok) - tcp_sack_reset(&tp->rx_opt); - return true; + if (pruned) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + } + return pruned; }
/* Reduce allocated memory if we can, trying to get @@@ -5392,7 -5382,7 +5393,7 @@@ * until the socket owning process reads some of the data * to stabilize the situation. */ -static int tcp_prune_queue(struct sock *sk) +static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk);
@@@ -5419,7 -5409,7 +5420,7 @@@ /* Collapsing did not help, destructive actions follow. * This must not ever occur. */
- tcp_prune_ofo_queue(sk); + tcp_prune_ofo_queue(sk, in_skb);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@@ -6841,18 -6831,10 +6842,18 @@@ static bool tcp_syn_flood_action(const #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned && syncookies != 2 && - xchg(&queue->synflood_warned, 1) == 0) - net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, sk->sk_num, msg); + if (!READ_ONCE(queue->synflood_warned) && syncookies != 2 && + xchg(&queue->synflood_warned, 1) == 0) { + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) { + net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n", + proto, inet6_rcv_saddr(sk), + sk->sk_num, msg); + } else { + net_info_ratelimited("%s: Possible SYN flooding on port %pI4:%u. %s.\n", + proto, &sk->sk_rcv_saddr, + sk->sk_num, msg); + } + }
return want_cookie; } diff --combined net/netfilter/nf_conntrack_core.c index b96338b4bf36,8703812405eb..dfc2b39bbcdf --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@@ -211,24 -211,28 +211,24 @@@ static u32 hash_conntrack_raw(const str unsigned int zoneid, const struct net *net) { - struct { - struct nf_conntrack_man src; - union nf_inet_addr dst_addr; - unsigned int zone; - u32 net_mix; - u16 dport; - u16 proto; - } __aligned(SIPHASH_ALIGNMENT) combined; + u64 a, b, c, d;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- memset(&combined, 0, sizeof(combined)); + /* The direction must be ignored, handle usable tuplehash members manually */ + a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3]; + b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
- /* The direction must be ignored, so handle usable members manually. */ - combined.src = tuple->src; - combined.dst_addr = tuple->dst.u3; - combined.zone = zoneid; - combined.net_mix = net_hash_mix(net); - combined.dport = (__force __u16)tuple->dst.u.all; - combined.proto = tuple->dst.protonum; + c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16; + c |= tuple->dst.protonum;
- return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd); + d = (u64)zoneid << 32 | net_hash_mix(net); + + /* IPv4: u3.all[1,2,3] == 0 */ + c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2]; + d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2]; + + return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd); }
static u32 scale_hash(u32 hash) @@@ -902,7 -906,7 +902,7 @@@ nf_conntrack_hash_check_insert(struct n nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
- max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN); + max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { @@@ -1223,7 -1227,7 +1223,7 @@@ __nf_conntrack_confirm(struct sk_buff * goto dying; }
- max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN); + max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ @@@ -1777,7 -1781,7 +1777,7 @@@ init_conntrack(struct net *net, struct }
#ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; diff --combined net/netlink/af_netlink.c index d73091f6bb0f,7a401d94463a..bca2a470ccad --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@@ -812,17 -812,6 +812,17 @@@ static int netlink_release(struct socke }
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + + /* Because struct net might disappear soon, do not keep a pointer. */ + if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + /* Because of deferred_put_nlk_sk and use of work queue, + * it is possible netns will be freed before this socket. + */ + sock_net_set(sk, &init_net); + __netns_tracker_alloc(&init_net, &sk->ns_tracker, + false, GFP_KERNEL); + } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@@ -846,7 -835,7 +846,7 @@@ retry /* Bind collision, search negative portid values. */ if (rover == -4096) /* rover will be in range [S32_MIN, -4097] */ - rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); + rover = S32_MIN + get_random_u32_below(-4096 - S32_MIN); else if (rover >= -4096) rover = -4097; portid = rover--; @@@ -2499,24 -2488,19 +2499,24 @@@ void netlink_ack(struct sk_buff *in_skb flags |= NLM_F_ACK_TLVS;
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); - if (!skb) { - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; - sk_error_report(NETLINK_CB(in_skb).sk); - return; - } + if (!skb) + goto err_skb;
rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, flags); + NLMSG_ERROR, sizeof(*errmsg), flags); + if (!rep) + goto err_bad_put; errmsg = nlmsg_data(rep); errmsg->error = err; - unsafe_memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) - ? nlh->nlmsg_len : sizeof(*nlh), - /* Bounds checked by the skb layer. */); + errmsg->msg = *nlh; + + if (!(flags & NLM_F_CAPPED)) { + if (!nlmsg_append(skb, nlmsg_len(nlh))) + goto err_bad_put; + + memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh), + nlmsg_len(nlh)); + }
if (tlvlen) netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); @@@ -2524,14 -2508,6 +2524,14 @@@ nlmsg_end(skb, rep);
nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); + + return; + +err_bad_put: + nlmsg_free(skb); +err_skb: + NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + sk_error_report(NETLINK_CB(in_skb).sk); } EXPORT_SYMBOL(netlink_ack);
diff --combined net/packet/af_packet.c index 44f20cf8a0c0,51a47ade92e8..4155ce680b41 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -1350,7 -1350,7 +1350,7 @@@ static bool fanout_flow_is_huge(struct if (READ_ONCE(history[i]) == rxhash) count++;
- victim = prandom_u32_max(ROLLOVER_HLEN); + victim = get_random_u32_below(ROLLOVER_HLEN);
/* Avoid dirtying the cache line if possible */ if (READ_ONCE(history[victim]) != rxhash) @@@ -1386,7 -1386,7 +1386,7 @@@ static unsigned int fanout_demux_rnd(st struct sk_buff *skb, unsigned int num) { - return prandom_u32_max(num); + return get_random_u32_below(num); }
static unsigned int fanout_demux_rollover(struct packet_fanout *f, @@@ -1777,7 -1777,6 +1777,7 @@@ static int fanout_add(struct sock *sk, match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; + match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING; list_add(&match->list, &fanout_list); } err = -EINVAL; @@@ -3278,7 -3277,7 +3278,7 @@@ static int packet_bind_spkt(struct sock int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1];
/* * Check legality @@@ -3289,8 -3288,8 +3289,8 @@@ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0;
return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } @@@ -3562,11 -3561,11 +3562,11 @@@ static int packet_getname_spkt(struct s return -EOPNOTSUPP;
uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock();
return sizeof(*uaddr); diff --combined net/sctp/socket.c index 5acbdf0d38f3,cfe72085fdc4..84021a6c4f9d --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -5098,17 -5098,13 +5098,17 @@@ static void sctp_destroy_sock(struct so }
/* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_sock(struct sock *sk) +static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk);
/* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); +}
+static void sctp_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); inet_sock_destruct(sk); }
@@@ -5315,14 -5311,14 +5315,14 @@@ EXPORT_SYMBOL_GPL(sctp_for_each_endpoin
int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p) + const union sctp_addr *paddr, void *p, int dif) { struct sctp_transport *transport; struct sctp_endpoint *ep; int err = -ENOENT;
rcu_read_lock(); - transport = sctp_addrs_lookup_transport(net, laddr, paddr); + transport = sctp_addrs_lookup_transport(net, laddr, paddr, dif, dif); if (!transport) { rcu_read_unlock(); return err; @@@ -8323,7 -8319,7 +8323,7 @@@ static int sctp_get_port_local(struct s
inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rover = prandom_u32_max(remaining) + low; + rover = get_random_u32_below(remaining) + low;
do { rover++; @@@ -8398,7 -8394,6 +8398,7 @@@ pp_found * in an endpoint. */ sk_for_each_bound(sk2, &pp->owner) { + int bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); struct sctp_sock *sp2 = sctp_sk(sk2); struct sctp_endpoint *ep2 = sp2->ep;
@@@ -8409,9 -8404,7 +8409,9 @@@ uid_eq(uid, sock_i_uid(sk2)))) continue;
- if (sctp_bind_addr_conflict(&ep2->base.bind_addr, + if ((!sk->sk_bound_dev_if || !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2) && + sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, sp2, sp)) { ret = 1; goto fail_unlock; @@@ -9434,7 -9427,7 +9434,7 @@@ void sctp_copy_sock(struct sock *newsk sctp_sk(newsk)->reuse = sp->reuse;
newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sctp_destruct_sock; + newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; @@@ -9669,20 -9662,11 +9669,20 @@@ struct proto sctp_prot =
#if IS_ENABLED(CONFIG_IPV6)
-#include <net/transp_v6.h> -static void sctp_v6_destroy_sock(struct sock *sk) +static void sctp_v6_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +static int sctp_v6_init_sock(struct sock *sk) { - sctp_destroy_sock(sk); - inet6_destroy_sock(sk); + int ret = sctp_init_sock(sk); + + if (!ret) + sk->sk_destruct = sctp_v6_destruct_sock; + + return ret; }
struct proto sctpv6_prot = { @@@ -9692,8 -9676,8 +9692,8 @@@ .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, - .init = sctp_init_sock, - .destroy = sctp_v6_destroy_sock, + .init = sctp_v6_init_sock, + .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt,