The following commit has been merged in the master branch: commit 061b6a9834b11463ff37e7e4080e95fb3cf3ee0d Merge: 82c177bfcb9b6e6c2a6c5bb779377fb4e089ddab 21c8b57c7a506a0233099ec4af4b503939c8010d Author: Stephen Rothwell sfr@canb.auug.org.au Date: Fri Nov 11 14:42:33 2022 +1100
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
diff --combined Documentation/admin-guide/kernel-parameters.txt index 1c45b8272858,78493797460f..7dea58f4a69c --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@@ -703,17 -703,6 +703,17 @@@ condev= [HW,S390] console device conmode=
+ con3215_drop= [S390] 3215 console drop mode. + Format: y|n|Y|N|1|0 + When set to true, drop data on the 3215 console when + the console buffer is full. In this case the + operator using a 3270 terminal emulator (for example + x3270) does not have to enter the clear key for the + console output to advance and the kernel to continue. + This leads to a much faster boot time when a 3270 + terminal emulator is active. If no 3270 terminal + emulator is used, this parameter has no effect. + console= [KNL] Output console device and options.
tty<n> Use the virtual console device <n>. @@@ -4577,17 -4566,15 +4577,15 @@@
ramdisk_start= [RAM] RAM disk image start address
- random.trust_cpu={on,off} - [KNL] Enable or disable trusting the use of the - CPU's random number generator (if available) to - fully seed the kernel's CRNG. Default is controlled - by CONFIG_RANDOM_TRUST_CPU. - - random.trust_bootloader={on,off} - [KNL] Enable or disable trusting the use of a - seed passed by the bootloader (if available) to - fully seed the kernel's CRNG. Default is controlled - by CONFIG_RANDOM_TRUST_BOOTLOADER. + random.trust_cpu=off + [KNL] Disable trusting the use of the CPU's + random number generator (if available) to + initialize the kernel's RNG. + + random.trust_bootloader=off + [KNL] Disable trusting the use of the a seed + passed by the bootloader (if available) to + initialize the kernel's RNG.
randomize_kstack_offset= [KNL] Enable or disable kernel stack offset diff --combined arch/s390/configs/debug_defconfig index 63807bd0b536,2bb3469092f0..a7b4e1d82758 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@@ -573,8 -573,6 +573,6 @@@ CONFIG_VIRTIO_CONSOLE= CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y - # CONFIG_RANDOM_TRUST_CPU is not set - # CONFIG_RANDOM_TRUST_BOOTLOADER is not set CONFIG_PPS=m # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set @@@ -723,42 -721,52 +721,42 @@@ CONFIG_CRYPTO_ECDSA= CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@@ -769,16 -777,6 +767,16 @@@ CONFIG_CRYPTO_USER_API_SKCIPHER= CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@@ -797,6 -795,7 +795,6 @@@ CONFIG_CMA_SIZE_MBYTES= CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --combined arch/s390/configs/defconfig index 4f9a98247442,366bf3fd0208..2bc2d0fe5774 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@@ -563,8 -563,6 +563,6 @@@ CONFIG_VIRTIO_CONSOLE= CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y - # CONFIG_RANDOM_TRUST_CPU is not set - # CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set CONFIG_WATCHDOG=y @@@ -707,43 -705,53 +705,43 @@@ CONFIG_CRYPTO_ECDSA= CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@@ -754,16 -762,6 +752,16 @@@ CONFIG_CRYPTO_USER_API_SKCIPHER= CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@@ -781,6 -779,7 +779,6 @@@ CONFIG_CMA_SIZE_MBYTES= CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --combined arch/s390/configs/zfcpdump_defconfig index 5fe9948be644,9a6269c2374c..ae14ab0b864d --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@@ -58,7 -58,6 +58,6 @@@ CONFIG_ZFCP= # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set - # CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set @@@ -74,6 -73,7 +73,6 @@@ CONFIG_PRINTK_TIME= # CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set diff --combined arch/x86/kernel/cpu/common.c index 73cc546e024d,3f66dd03c091..9cfca3d7d0e2 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@@ -22,9 -22,9 +22,9 @@@ #include <linux/io.h> #include <linux/syscore_ops.h> #include <linux/pgtable.h> + #include <linux/stackprotector.h>
#include <asm/cmdline.h> - #include <asm/stackprotector.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> #include <asm/doublefault.h> @@@ -52,7 -52,6 +52,7 @@@ #include <asm/cpu.h> #include <asm/mce.h> #include <asm/msr.h> +#include <asm/cacheinfo.h> #include <asm/memtype.h> #include <asm/microcode.h> #include <asm/microcode_intel.h> @@@ -610,7 -609,6 +610,7 @@@ static __always_inline void setup_cet(s
if (!ibt_selftest()) { pr_err("IBT selftest: Failed!\n"); + wrmsrl(MSR_IA32_S_CET, 0); setup_clear_cpu_cap(X86_FEATURE_IBT); return; } @@@ -703,6 -701,16 +703,6 @@@ static const char *table_lookup_model(s __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
-void load_percpu_segment(int cpu) -{ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); -#endif -} - #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); @@@ -730,45 -738,16 +730,45 @@@ void load_fixmap_gdt(int cpu } EXPORT_SYMBOL_GPL(load_fixmap_gdt);
-/* - * Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. +/** + * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base + * @cpu: The CPU number for which this is invoked + * + * Invoked during early boot to switch from early GDT and early per CPU to + * the direct GDT and the runtime per CPU area. On 32-bit the percpu base + * switch is implicit by loading the direct GDT. On 64bit this requires + * to update GSBASE. */ -void switch_to_new_gdt(int cpu) +void __init switch_gdt_and_percpu_base(int cpu) { - /* Load the original GDT */ load_direct_gdt(cpu); - /* Reload the per-cpu base */ - load_percpu_segment(cpu); + +#ifdef CONFIG_X86_64 + /* + * No need to load %gs. It is already correct. + * + * Writing %gs on 64bit would zero GSBASE which would make any per + * CPU operation up to the point of the wrmsrl() fault. + * + * Set GSBASE to the new offset. Until the wrmsrl() happens the + * early mapping is still valid. That means the GSBASE update will + * lose any prior per CPU data which was not copied over in + * setup_per_cpu_areas(). + * + * This works even with stackprotector enabled because the + * per CPU stack canary is 0 in both per CPU areas. + */ + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); +#else + /* + * %fs is already set to __KERNEL_PERCPU, but after switching GDT + * it is required to load FS again so that the 'hidden' part is + * updated from the new GDT. Up to this point the early per CPU + * translation is active. Any content of the early per CPU data + * which was not copied over in setup_per_cpu_areas() is lost. + */ + loadsegment(fs, __KERNEL_PERCPU); +#endif }
static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; @@@ -1969,6 -1948,7 +1969,6 @@@ void identify_secondary_cpu(struct cpui #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - mtrr_ap_init(); validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); @@@ -2013,18 -1993,27 +2013,18 @@@ static __init int setup_clearcpuid(cha } __setup("clearcpuid=", setup_clearcpuid);
+DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { + .current_task = &init_task, + .preempt_count = INIT_PREEMPT_COUNT, + .top_of_stack = TOP_OF_INIT_STACK, +}; +EXPORT_PER_CPU_SYMBOL(pcpu_hot); + #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __aligned(PAGE_SIZE) __visible; EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-/* - * The following percpu variables are hot. Align current_task to - * cacheline size such that they fall in the same cacheline. - */ -DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = - &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - -DEFINE_PER_CPU(void *, hardirq_stack_ptr); -DEFINE_PER_CPU(bool, hardirq_stack_inuse); - -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; - static void wrmsrl_cstar(unsigned long val) { /* @@@ -2075,6 -2064,20 +2075,6 @@@ void syscall_init(void
#else /* CONFIG_X86_64 */
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - -/* - * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find - * the top of the kernel stack. Use an extra percpu variable to track the - * top of the kernel stack directly. - */ -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); - #ifdef CONFIG_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, __stack_chk_guard); EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); @@@ -2245,6 -2248,12 +2245,6 @@@ void cpu_init(void boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - switch_to_new_gdt(cpu); - if (IS_ENABLED(CONFIG_X86_64)) { loadsegment(fs, 0); memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --combined arch/x86/kernel/module.c index 0142982e94c5,9f74b7d71a50..a3f8cdb80feb --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@@ -53,7 -53,7 +53,7 @@@ static unsigned long int get_module_loa */ if (module_load_offset == 0) module_load_offset = - (prandom_u32_max(1024) + 1) * PAGE_SIZE; + get_random_u32_between(1, 1024 + 1) * PAGE_SIZE; mutex_unlock(&module_kaslr_mutex); } return module_load_offset; @@@ -74,11 -74,10 +74,11 @@@ void *module_alloc(unsigned long size return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, - PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, - __builtin_return_address(0)); + MODULES_VADDR + get_module_load_offset(), + MODULES_END, gfp_mask, PAGE_KERNEL, + VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; @@@ -254,8 -253,7 +254,8 @@@ int module_finalize(const Elf_Ehdr *hdr { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, + *calls = NULL, *cfi = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@@ -275,10 -273,6 +275,10 @@@ retpolines = s; if (!strcmp(".return_sites", secstrings + s->sh_name)) returns = s; + if (!strcmp(".call_sites", secstrings + s->sh_name)) + calls = s; + if (!strcmp(".cfi_sites", secstrings + s->sh_name)) + cfi = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@@ -291,22 -285,6 +291,22 @@@ void *pseg = (void *)para->sh_addr; apply_paravirt(pseg, pseg + para->sh_size); } + if (retpolines || cfi) { + void *rseg = NULL, *cseg = NULL; + unsigned int rsize = 0, csize = 0; + + if (retpolines) { + rseg = (void *)retpolines->sh_addr; + rsize = retpolines->sh_size; + } + + if (cfi) { + cseg = (void *)cfi->sh_addr; + csize = cfi->sh_size; + } + + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); @@@ -320,21 -298,6 +320,21 @@@ void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (calls || para) { + struct callthunk_sites cs = {}; + + if (calls) { + cs.call_start = (void *)calls->sh_addr; + cs.call_end = (void *)calls->sh_addr + calls->sh_size; + } + + if (para) { + cs.pv_start = (void *)para->sh_addr; + cs.pv_end = (void *)para->sh_addr + para->sh_size; + } + + callthunks_patch_module_calls(&cs, me); + } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); diff --combined arch/x86/kernel/setup_percpu.c index c2fc4c41c164,b26123c90b4f..c242dc47e9cb --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@@ -11,6 -11,7 +11,7 @@@ #include <linux/smp.h> #include <linux/topology.h> #include <linux/pfn.h> + #include <linux/stackprotector.h> #include <asm/sections.h> #include <asm/processor.h> #include <asm/desc.h> @@@ -21,8 -22,10 +22,7 @@@ #include <asm/proto.h> #include <asm/cpumask.h> #include <asm/cpu.h> - #include <asm/stackprotector.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else @@@ -169,7 -172,7 +169,7 @@@ void __init setup_per_cpu_areas(void for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(cpu_number, cpu) = cpu; + per_cpu(pcpu_hot.cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the @@@ -208,7 -211,7 +208,7 @@@ * area. Reload any changed state for the boot CPU. */ if (!cpu) - switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu); }
/* indicate the early static arrays will soon be gone */ diff --combined arch/x86/kernel/smpboot.c index 26937f28000b,5a742b6ec46d..55cad72715d9 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@@ -56,9 -56,9 +56,10 @@@ #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> + #include <linux/stackprotector.h>
#include <asm/acpi.h> +#include <asm/cacheinfo.h> #include <asm/desc.h> #include <asm/nmi.h> #include <asm/irq.h> @@@ -1047,7 -1047,7 +1048,7 @@@ int common_cpu_up(unsigned int cpu, str /* Just in case we booted with a single CPU. */ alternatives_enable_smp();
- per_cpu(current_task, cpu) = idle; + per_cpu(pcpu_hot.current_task, cpu) = idle; cpu_init_stack_canary(cpu, idle);
/* Initialize the interrupt stack(s) */ @@@ -1057,7 -1057,7 +1058,7 @@@
#ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); + per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif @@@ -1429,6 -1429,8 +1430,6 @@@ void __init native_smp_prepare_cpus(uns
uv_system_init();
- set_mtrr_aps_delayed_init(); - smp_quirk_init_udelay();
speculative_store_bypass_ht_init(); @@@ -1438,12 -1440,12 +1439,12 @@@
void arch_thaw_secondary_cpus_begin(void) { - set_mtrr_aps_delayed_init(); + set_cache_aps_delayed_init(true); }
void arch_thaw_secondary_cpus_end(void) { - mtrr_aps_init(); + cache_aps_init(); }
/* @@@ -1452,11 -1454,7 +1453,11 @@@ void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(me); + + /* SMP handles this from setup_per_cpu_areas() */ + if (!IS_ENABLED(CONFIG_SMP)) + switch_gdt_and_percpu_base(me); + /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); cpu_set_state_online(me); @@@ -1490,7 -1488,7 +1491,7 @@@ void __init native_smp_cpus_done(unsign
nmi_selftest(); impress_friends(); - mtrr_aps_init(); + cache_aps_init(); }
static int __initdata setup_possible_cpus = -1; diff --combined arch/x86/xen/enlighten_pv.c index 9b892079581b,745420853a7c..05170fc19083 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@@ -32,6 -32,7 +32,7 @@@ #include <linux/edd.h> #include <linux/reboot.h> #include <linux/virtio_anchor.h> + #include <linux/stackprotector.h>
#include <xen/xen.h> #include <xen/events.h> @@@ -64,7 -65,6 +65,6 @@@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/reboot.h> - #include <asm/stackprotector.h> #include <asm/hypervisor.h> #include <asm/mach_traps.h> #include <asm/mwait.h> @@@ -1209,7 -1209,7 +1209,7 @@@ static void __init xen_setup_gdt(int cp pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; pv_ops.cpu.load_gdt = xen_load_gdt_boot;
- switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu);
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; pv_ops.cpu.load_gdt = xen_load_gdt; diff --combined crypto/testmgr.c index e2806ef044fd,b09b9408ec44..19639fb08c25 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@@ -855,9 -855,9 +855,9 @@@ static int prepare_keybuf(const u8 *key /* Generate a random length in range [0, max_len], but prefer smaller values */ static unsigned int generate_random_length(unsigned int max_len) { - unsigned int len = prandom_u32_max(max_len + 1); + unsigned int len = get_random_u32_below(max_len + 1);
- switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: return len % 64; case 1: @@@ -874,14 -874,14 +874,14 @@@ static void flip_random_bit(u8 *buf, si { size_t bitpos;
- bitpos = prandom_u32_max(size * 8); + bitpos = get_random_u32_below(size * 8); buf[bitpos / 8] ^= 1 << (bitpos % 8); }
/* Flip a random byte in the given nonempty data buffer */ static void flip_random_byte(u8 *buf, size_t size) { - buf[prandom_u32_max(size)] ^= 0xff; + buf[get_random_u32_below(size)] ^= 0xff; }
/* Sometimes make some random changes to the given nonempty data buffer */ @@@ -891,15 -891,15 +891,15 @@@ static void mutate_buffer(u8 *buf, size size_t i;
/* Sometimes flip some bits */ - if (prandom_u32_max(4) == 0) { - num_flips = min_t(size_t, 1 << prandom_u32_max(8), size * 8); + if (get_random_u32_below(4) == 0) { + num_flips = min_t(size_t, 1 << get_random_u32_below(8), size * 8); for (i = 0; i < num_flips; i++) flip_random_bit(buf, size); }
/* Sometimes flip some bytes */ - if (prandom_u32_max(4) == 0) { - num_flips = min_t(size_t, 1 << prandom_u32_max(8), size); + if (get_random_u32_below(4) == 0) { + num_flips = min_t(size_t, 1 << get_random_u32_below(8), size); for (i = 0; i < num_flips; i++) flip_random_byte(buf, size); } @@@ -915,11 -915,11 +915,11 @@@ static void generate_random_bytes(u8 *b if (count == 0) return;
- switch (prandom_u32_max(8)) { /* Choose a generation strategy */ + switch (get_random_u32_below(8)) { /* Choose a generation strategy */ case 0: case 1: /* All the same byte, plus optional mutations */ - switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: b = 0x00; break; @@@ -959,24 -959,24 +959,24 @@@ static char *generate_random_sgl_divisi unsigned int this_len; const char *flushtype_str;
- if (div == &divs[max_divs - 1] || prandom_u32_max(2) == 0) + if (div == &divs[max_divs - 1] || get_random_u32_below(2) == 0) this_len = remaining; else - this_len = 1 + prandom_u32_max(remaining); + this_len = get_random_u32_between(1, remaining + 1); div->proportion_of_total = this_len;
- if (prandom_u32_max(4) == 0) - div->offset = (PAGE_SIZE - 128) + prandom_u32_max(128); - else if (prandom_u32_max(2) == 0) - div->offset = prandom_u32_max(32); + if (get_random_u32_below(4) == 0) + div->offset = get_random_u32_between(PAGE_SIZE - 128, PAGE_SIZE); + else if (get_random_u32_below(2) == 0) + div->offset = get_random_u32_below(32); else - div->offset = prandom_u32_max(PAGE_SIZE); - if (prandom_u32_max(8) == 0) + div->offset = get_random_u32_below(PAGE_SIZE); + if (get_random_u32_below(8) == 0) div->offset_relative_to_alignmask = true;
div->flush_type = FLUSH_TYPE_NONE; if (gen_flushes) { - switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: div->flush_type = FLUSH_TYPE_REIMPORT; break; @@@ -988,7 -988,7 +988,7 @@@
if (div->flush_type != FLUSH_TYPE_NONE && !(req_flags & CRYPTO_TFM_REQ_MAY_SLEEP) && - prandom_u32_max(2) == 0) + get_random_u32_below(2) == 0) div->nosimd = true;
switch (div->flush_type) { @@@ -1035,7 -1035,7 +1035,7 @@@ static void generate_random_testvec_con
p += scnprintf(p, end - p, "random:");
- switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: case 1: cfg->inplace_mode = OUT_OF_PLACE; @@@ -1050,12 -1050,12 +1050,12 @@@ break; }
- if (prandom_u32_max(2) == 0) { + if (get_random_u32_below(2) == 0) { cfg->req_flags |= CRYPTO_TFM_REQ_MAY_SLEEP; p += scnprintf(p, end - p, " may_sleep"); }
- switch (prandom_u32_max(4)) { + switch (get_random_u32_below(4)) { case 0: cfg->finalization_type = FINALIZATION_TYPE_FINAL; p += scnprintf(p, end - p, " use_final"); @@@ -1071,7 -1071,7 +1071,7 @@@ }
if (!(cfg->req_flags & CRYPTO_TFM_REQ_MAY_SLEEP) && - prandom_u32_max(2) == 0) { + get_random_u32_below(2) == 0) { cfg->nosimd = true; p += scnprintf(p, end - p, " nosimd"); } @@@ -1084,7 -1084,7 +1084,7 @@@ cfg->req_flags); p += scnprintf(p, end - p, "]");
- if (cfg->inplace_mode == OUT_OF_PLACE && prandom_u32_max(2) == 0) { + if (cfg->inplace_mode == OUT_OF_PLACE && get_random_u32_below(2) == 0) { p += scnprintf(p, end - p, " dst_divs=["); p = generate_random_sgl_divisions(cfg->dst_divs, ARRAY_SIZE(cfg->dst_divs), @@@ -1093,13 -1093,13 +1093,13 @@@ p += scnprintf(p, end - p, "]"); }
- if (prandom_u32_max(2) == 0) { - cfg->iv_offset = 1 + prandom_u32_max(MAX_ALGAPI_ALIGNMASK); + if (get_random_u32_below(2) == 0) { + cfg->iv_offset = get_random_u32_between(1, MAX_ALGAPI_ALIGNMASK + 1); p += scnprintf(p, end - p, " iv_offset=%u", cfg->iv_offset); }
- if (prandom_u32_max(2) == 0) { - cfg->key_offset = 1 + prandom_u32_max(MAX_ALGAPI_ALIGNMASK); + if (get_random_u32_below(2) == 0) { + cfg->key_offset = get_random_u32_between(1, MAX_ALGAPI_ALIGNMASK + 1); p += scnprintf(p, end - p, " key_offset=%u", cfg->key_offset); }
@@@ -1652,8 -1652,8 +1652,8 @@@ static void generate_random_hash_testve vec->ksize = 0; if (maxkeysize) { vec->ksize = maxkeysize; - if (prandom_u32_max(4) == 0) - vec->ksize = 1 + prandom_u32_max(maxkeysize); + if (get_random_u32_below(4) == 0) + vec->ksize = get_random_u32_between(1, maxkeysize + 1); generate_random_bytes((u8 *)vec->key, vec->ksize);
vec->setkey_error = crypto_shash_setkey(desc->tfm, vec->key, @@@ -2218,13 -2218,13 +2218,13 @@@ static void mutate_aead_message(struct const unsigned int aad_tail_size = aad_iv ? ivsize : 0; const unsigned int authsize = vec->clen - vec->plen;
- if (prandom_u32_max(2) == 0 && vec->alen > aad_tail_size) { + if (get_random_u32_below(2) == 0 && vec->alen > aad_tail_size) { /* Mutate the AAD */ flip_random_bit((u8 *)vec->assoc, vec->alen - aad_tail_size); - if (prandom_u32_max(2) == 0) + if (get_random_u32_below(2) == 0) return; } - if (prandom_u32_max(2) == 0) { + if (get_random_u32_below(2) == 0) { /* Mutate auth tag (assuming it's at the end of ciphertext) */ flip_random_bit((u8 *)vec->ctext + vec->plen, authsize); } else { @@@ -2249,7 -2249,7 +2249,7 @@@ static void generate_aead_message(struc const unsigned int ivsize = crypto_aead_ivsize(tfm); const unsigned int authsize = vec->clen - vec->plen; const bool inauthentic = (authsize >= MIN_COLLISION_FREE_AUTHSIZE) && - (prefer_inauthentic || prandom_u32_max(4) == 0); + (prefer_inauthentic || get_random_u32_below(4) == 0);
/* Generate the AAD. */ generate_random_bytes((u8 *)vec->assoc, vec->alen); @@@ -2257,7 -2257,7 +2257,7 @@@ /* Avoid implementation-defined behavior. */ memcpy((u8 *)vec->assoc + vec->alen - ivsize, vec->iv, ivsize);
- if (inauthentic && prandom_u32_max(2) == 0) { + if (inauthentic && get_random_u32_below(2) == 0) { /* Generate a random ciphertext. */ generate_random_bytes((u8 *)vec->ctext, vec->clen); } else { @@@ -2321,8 -2321,8 +2321,8 @@@ static void generate_random_aead_testve
/* Key: length in [0, maxkeysize], but usually choose maxkeysize */ vec->klen = maxkeysize; - if (prandom_u32_max(4) == 0) - vec->klen = prandom_u32_max(maxkeysize + 1); + if (get_random_u32_below(4) == 0) + vec->klen = get_random_u32_below(maxkeysize + 1); generate_random_bytes((u8 *)vec->key, vec->klen); vec->setkey_error = crypto_aead_setkey(tfm, vec->key, vec->klen);
@@@ -2331,8 -2331,8 +2331,8 @@@
/* Tag length: in [0, maxauthsize], but usually choose maxauthsize */ authsize = maxauthsize; - if (prandom_u32_max(4) == 0) - authsize = prandom_u32_max(maxauthsize + 1); + if (get_random_u32_below(4) == 0) + authsize = get_random_u32_below(maxauthsize + 1); if (prefer_inauthentic && authsize < MIN_COLLISION_FREE_AUTHSIZE) authsize = MIN_COLLISION_FREE_AUTHSIZE; if (WARN_ON(authsize > maxdatasize)) @@@ -2342,7 -2342,7 +2342,7 @@@
/* AAD, plaintext, and ciphertext lengths */ total_len = generate_random_length(maxdatasize); - if (prandom_u32_max(4) == 0) + if (get_random_u32_below(4) == 0) vec->alen = 0; else vec->alen = generate_random_length(total_len); @@@ -2958,8 -2958,8 +2958,8 @@@ static void generate_random_cipher_test
/* Key: length in [0, maxkeysize], but usually choose maxkeysize */ vec->klen = maxkeysize; - if (prandom_u32_max(4) == 0) - vec->klen = prandom_u32_max(maxkeysize + 1); + if (get_random_u32_below(4) == 0) + vec->klen = get_random_u32_below(maxkeysize + 1); generate_random_bytes((u8 *)vec->key, vec->klen); vec->setkey_error = crypto_skcipher_setkey(tfm, vec->key, vec->klen);
@@@ -4712,12 -4712,6 +4712,12 @@@ static const struct alg_test_desc alg_t .alg = "cts(cbc(paes))", .test = alg_test_null, .fips_allowed = 1, + }, { + .alg = "cts(cbc(sm4))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_cts_tv_template) + } }, { .alg = "curve25519", .test = alg_test_kpp, @@@ -5592,12 -5586,6 +5592,12 @@@ .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xcbc(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_xcbc128_tv_template) + } }, { .alg = "xchacha12", .test = alg_test_skcipher, @@@ -5652,13 -5640,6 +5652,13 @@@ .suite = { .cipher = __VECS(serpent_xts_tv_template) } + }, { + .alg = "xts(sm4)", + .generic_driver = "xts(ecb(sm4-generic))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_xts_tv_template) + } }, { .alg = "xts(twofish)", .generic_driver = "xts(ecb(twofish-generic))", diff --combined drivers/block/drbd/drbd_receiver.c index 93d6df4dc5a4,3eccc6cd5004..72327278ada1 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@@ -781,7 -781,7 +781,7 @@@ static struct socket *drbd_wait_for_con
timeo = connect_int * HZ; /* 28.5% random jitter */ - timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7; + timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); if (err <= 0) @@@ -1004,7 -1004,7 +1004,7 @@@ retry drbd_warn(connection, "Error receiving initial packet\n"); sock_release(s); randomize: - if (prandom_u32_max(2)) + if (get_random_u32_below(2)) goto retry; } } @@@ -1603,19 -1603,9 +1603,19 @@@ static void drbd_issue_peer_discard_or_ drbd_endio_write_sec_final(peer_req); }
+static int peer_request_fault_type(struct drbd_peer_request *peer_req) +{ + if (peer_req_op(peer_req) == REQ_OP_READ) { + return peer_req->flags & EE_APPLICATION ? + DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD; + } else { + return peer_req->flags & EE_APPLICATION ? + DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR; + } +} + /** * drbd_submit_peer_request() - * @device: DRBD device. * @peer_req: peer request * * May spread the pages to multiple bios, @@@ -1629,9 -1619,10 +1629,9 @@@ * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_peer_request(struct drbd_device *device, - struct drbd_peer_request *peer_req, - const blk_opf_t opf, const int fault_type) +int drbd_submit_peer_request(struct drbd_peer_request *peer_req) { + struct drbd_device *device = peer_req->peer_device->device; struct bio *bios = NULL; struct bio *bio; struct page *page = peer_req->pages; @@@ -1676,18 -1667,7 +1676,18 @@@ * generated bio, but a bio allocated on behalf of the peer. */ next_bio: - bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO); + /* _DISCARD, _WRITE_ZEROES handled above. + * REQ_OP_FLUSH (empty flush) not expected, + * should have been mapped to a "drbd protocol barrier". + * REQ_OP_SECURE_ERASE: I don't see how we could ever support that. + */ + if (!(peer_req_op(peer_req) == REQ_OP_WRITE || + peer_req_op(peer_req) == REQ_OP_READ)) { + drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf); + return -EINVAL; + } + + bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO); /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; bio->bi_private = peer_req; @@@ -1717,7 -1697,7 +1717,7 @@@ bios = bios->bi_next; bio->bi_next = NULL;
- drbd_submit_bio_noacct(device, fault_type, bio); + drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio); } while (bios); return 0; } @@@ -2071,7 -2051,6 +2071,7 @@@ static int recv_resync_read(struct drbd * respective _drbd_clear_done_ee */
peer_req->w.cb = e_end_resync_block; + peer_req->opf = REQ_OP_WRITE; peer_req->submit_jif = jiffies;
spin_lock_irq(&device->resource->req_lock); @@@ -2079,7 -2058,8 +2079,7 @@@ spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, - DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0;
/* don't care for the reason here */ @@@ -2395,6 -2375,16 +2395,6 @@@ static int wait_for_and_update_peer_seq return ret; }
-/* see also bio_flags_to_wire() - * DRBD_REQ_*, because we need to semantically map the flags to data packet - * flags and back. We may replicate to other kernel versions. */ -static blk_opf_t wire_flags_to_bio_flags(u32 dpf) -{ - return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | - (dpf & DP_FUA ? REQ_FUA : 0) | - (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); -} - static enum req_op wire_flags_to_bio_op(u32 dpf) { if (dpf & DP_ZEROES) @@@ -2405,15 -2395,6 +2405,15 @@@ return REQ_OP_WRITE; }
+/* see also bio_flags_to_wire() */ +static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) +{ + return wire_flags_to_bio_op(dpf) | + (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | + (dpf & DP_FUA ? REQ_FUA : 0) | + (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); +} + static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { @@@ -2557,6 -2538,8 +2557,6 @@@ static int receive_Data(struct drbd_con struct drbd_peer_request *peer_req; struct p_data *p = pi->data; u32 peer_seq = be32_to_cpu(p->seq_num); - enum req_op op; - blk_opf_t op_flags; u32 dp_flags; int err, tp;
@@@ -2595,10 -2578,11 +2595,10 @@@ peer_req->flags |= EE_APPLICATION;
dp_flags = be32_to_cpu(p->dp_flags); - op = wire_flags_to_bio_op(dp_flags); - op_flags = wire_flags_to_bio_flags(dp_flags); + peer_req->opf = wire_flags_to_bio(connection, dp_flags); if (pi->cmd == P_TRIM) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_DISCARD); + D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD); D_ASSERT(peer_device, peer_req->pages == NULL); /* need to play safe: an older DRBD sender * may mean zero-out while sending P_TRIM. */ @@@ -2606,7 -2590,7 +2606,7 @@@ peer_req->flags |= EE_ZEROOUT; } else if (pi->cmd == P_ZEROES) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); + D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES); D_ASSERT(peer_device, peer_req->pages == NULL); /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ if (dp_flags & DP_DISCARD) @@@ -2693,7 -2677,8 +2693,7 @@@ peer_req->flags |= EE_CALL_AL_COMPLETE_IO; }
- err = drbd_submit_peer_request(device, peer_req, op | op_flags, - DRBD_FAULT_DT_WR); + err = drbd_submit_peer_request(peer_req); if (!err) return 0;
@@@ -2804,6 -2789,7 +2804,6 @@@ static int receive_DataRequest(struct d struct drbd_peer_request *peer_req; struct digest_info *di = NULL; int size, verb; - unsigned int fault_type; struct p_block_req *p = pi->data;
peer_device = conn_peer_device(connection, pi->vnr); @@@ -2863,11 -2849,11 +2863,11 @@@ put_ldev(device); return -ENOMEM; } + peer_req->opf = REQ_OP_READ;
switch (pi->cmd) { case P_DATA_REQUEST: peer_req->w.cb = w_e_end_data_req; - fault_type = DRBD_FAULT_DT_RD; /* application IO, don't drbd_rs_begin_io */ peer_req->flags |= EE_APPLICATION; goto submit; @@@ -2881,12 -2867,14 +2881,12 @@@ fallthrough; case P_RS_DATA_REQUEST: peer_req->w.cb = w_e_end_rsdata_req; - fault_type = DRBD_FAULT_RS_RD; /* used in the sector offset progress display */ device->bm_resync_fo = BM_SECT_TO_BIT(sector); break;
case P_OV_REPLY: case P_CSUM_RS_REQUEST: - fault_type = DRBD_FAULT_RS_RD; di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); if (!di) goto out_free_e; @@@ -2935,6 -2923,7 +2935,6 @@@ (unsigned long long)sector); } peer_req->w.cb = w_e_end_ov_req; - fault_type = DRBD_FAULT_RS_RD; break;
default: @@@ -2986,7 -2975,8 +2986,7 @@@ submit_for_resync submit: update_receiver_timing_details(connection, drbd_submit_peer_request); inc_unacked(device); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, - fault_type) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0;
/* don't care for the reason here */ @@@ -4957,6 -4947,7 +4957,6 @@@ static int receive_rs_deallocated(struc
if (get_ldev(device)) { struct drbd_peer_request *peer_req; - const enum req_op op = REQ_OP_WRITE_ZEROES;
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, size, 0, GFP_NOIO); @@@ -4966,7 -4957,6 +4966,7 @@@ }
peer_req->w.cb = e_end_resync_block; + peer_req->opf = REQ_OP_DISCARD; peer_req->submit_jif = jiffies; peer_req->flags |= EE_TRIM;
@@@ -4975,7 -4965,8 +4975,7 @@@ spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev); - err = drbd_submit_peer_request(device, peer_req, op, - DRBD_FAULT_RS_WR); + err = drbd_submit_peer_request(peer_req);
if (err) { spin_lock_irq(&device->resource->req_lock); diff --combined drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1160723c9d2d,29d2459bcc90..be14fce52a5b --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@@ -2424,7 -2424,7 +2424,7 @@@ gen8_dispatch_bsd_engine(struct drm_i91 /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - prandom_u32_max(num_vcs_engines(dev_priv)); + get_random_u32_below(num_vcs_engines(dev_priv));
return file_priv->bsd_engine; } @@@ -2954,6 -2954,11 +2954,6 @@@ await_fence_array(struct i915_execbuffe int err;
for (n = 0; n < eb->num_fences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - if (!eb->fences[n].dma_fence) continue;
diff --combined drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 0187bc72310d,45b605e32c87..85bfa6498f6c --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@@ -3471,9 -3471,9 +3471,9 @@@ logical_ring_default_vfuncs(struct inte
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen125_emit_bb_start; + engine->emit_bb_start = xehp_emit_bb_start; else - engine->emit_bb_start = gen125_emit_bb_start_noarb; + engine->emit_bb_start = xehp_emit_bb_start_noarb; } else { if (intel_engine_has_preemption(engine)) engine->emit_bb_start = gen8_emit_bb_start; @@@ -3689,7 -3689,7 +3689,7 @@@ static void virtual_engine_initial_hint * NB This does not force us to execute on this engine, it will just * typically be the first we inspect for submission. */ - swp = prandom_u32_max(ve->num_siblings); + swp = get_random_u32_below(ve->num_siblings); if (swp) swap(ve->siblings[swp], ve->siblings[0]); } diff --combined drivers/mmc/core/core.c index cdbaeb389fce,a1efda85c6f2..1bb70d438d6c --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@@ -56,7 -56,7 +56,7 @@@ static const unsigned freqs[] = { 40000 /* * Enabling software CRCs on the data blocks can be a significant (30%) * performance cost, and for other reasons may not always be desired. - * So we allow it it to be disabled. + * So we allow it to be disabled. */ bool use_spi_crc = 1; module_param(use_spi_crc, bool, 0); @@@ -97,8 -97,8 +97,8 @@@ static void mmc_should_fail_request(str !should_fail(&host->fail_mmc_request, data->blksz * data->blocks)) return;
- data->error = data_errors[prandom_u32_max(ARRAY_SIZE(data_errors))]; - data->bytes_xfered = prandom_u32_max(data->bytes_xfered >> 9) << 9; + data->error = data_errors[get_random_u32_below(ARRAY_SIZE(data_errors))]; + data->bytes_xfered = get_random_u32_below(data->bytes_xfered >> 9) << 9; }
#else /* CONFIG_FAIL_MMC_REQUEST */ @@@ -527,7 -527,7 +527,7 @@@ EXPORT_SYMBOL(mmc_cqe_post_req) * mmc_cqe_recovery - Recover from CQE errors. * @host: MMC host to recover * - * Recovery consists of stopping CQE, stopping eMMC, discarding the queue in + * Recovery consists of stopping CQE, stopping eMMC, discarding the queue * in eMMC, and discarding the queue in CQE. CQE must call * mmc_cqe_request_done() on all requests. An error is returned if the eMMC * fails to discard its queue. @@@ -1134,13 -1134,7 +1134,13 @@@ u32 mmc_select_voltage(struct mmc_host mmc_power_cycle(host, ocr); } else { bit = fls(ocr) - 1; - ocr &= 3 << bit; + /* + * The bit variable represents the highest voltage bit set in + * the OCR register. + * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V), + * we must shift the mask '3' with (bit - 1). + */ + ocr &= 3 << (bit - 1); if (bit != host->ios.vdd) dev_warn(mmc_dev(host), "exceeding card's volts\n"); } diff --combined drivers/mmc/host/dw_mmc.c index d35607128322,6ef410053037..e4d09c439051 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@@ -1363,7 -1363,7 +1363,7 @@@ static void __dw_mci_start_request(stru * is just about to roll over. * * We do this whole thing under spinlock and only if the - * command hasn't already completed (indicating the the irq + * command hasn't already completed (indicating the irq * already ran so we don't want the timeout). */ spin_lock_irqsave(&host->irq_lock, irqflags); @@@ -1858,7 -1858,7 +1858,7 @@@ static void dw_mci_start_fault_timer(st * Try to inject the error at random points during the data transfer. */ hrtimer_start(&host->fault_timer, - ms_to_ktime(prandom_u32_max(25)), + ms_to_ktime(get_random_u32_below(25)), HRTIMER_MODE_REL); }
diff --combined drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index af8843507f3d,0e6bf5e5c8a9..8ca6faf3fef5 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@@ -1128,7 -1128,7 +1128,7 @@@ static void brcmf_p2p_afx_handler(struc if (afx_hdl->is_listen && afx_hdl->my_listen_chan) /* 100ms ~ 300ms */ err = brcmf_p2p_discover_listen(p2p, afx_hdl->my_listen_chan, - 100 * (1 + prandom_u32_max(3))); + 100 * get_random_u32_between(1, 4)); else err = brcmf_p2p_act_frm_search(p2p, afx_hdl->peer_listen_chan);
@@@ -2424,12 -2424,8 +2424,12 @@@ int brcmf_p2p_del_vif(struct wiphy *wip brcmf_remove_interface(vif->ifp, true);
brcmf_cfg80211_arm_vif_event(cfg, NULL); - if (iftype != NL80211_IFTYPE_P2P_DEVICE) - p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL; + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { + if (vif == p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif) + p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL; + if (vif == p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION2].vif) + p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION2].vif = NULL; + }
return err; } diff --combined drivers/pci/p2pdma.c index 27539770a613,5565f67d6537..86812d2073ea --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@@ -89,90 -89,6 +89,90 @@@ static ssize_t published_show(struct de } static DEVICE_ATTR_RO(published);
+static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); + size_t len = vma->vm_end - vma->vm_start; + struct pci_p2pdma *p2pdma; + struct percpu_ref *ref; + unsigned long vaddr; + void *kaddr; + int ret; + + /* prevent private mappings from being established */ + if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { + pci_info_ratelimited(pdev, + "%s: fail, attempted private mapping\n", + current->comm); + return -EINVAL; + } + + if (vma->vm_pgoff) { + pci_info_ratelimited(pdev, + "%s: fail, attempted mapping with non-zero offset\n", + current->comm); + return -EINVAL; + } + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (!p2pdma) { + ret = -ENODEV; + goto out; + } + + kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref); + if (!kaddr) { + ret = -ENOMEM; + goto out; + } + + /* + * vm_insert_page() can sleep, so a reference is taken to mapping + * such that rcu_read_unlock() can be done before inserting the + * pages + */ + if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { + ret = -ENODEV; + goto out_free_mem; + } + rcu_read_unlock(); + + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr)); + if (ret) { + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); + return ret; + } + percpu_ref_get(ref); + put_page(virt_to_page(kaddr)); + kaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + + percpu_ref_put(ref); + + return 0; +out_free_mem: + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); +out: + rcu_read_unlock(); + return ret; +} + +static struct bin_attribute p2pmem_alloc_attr = { + .attr = { .name = "allocate", .mode = 0660 }, + .mmap = p2pmem_alloc_mmap, + /* + * Some places where we want to call mmap (ie. python) will check + * that the file size is greater than the mmap size before allowing + * the mmap to continue. To work around this, just set the size + * to be very large. + */ + .size = SZ_1T, +}; + static struct attribute *p2pmem_attrs[] = { &dev_attr_size.attr, &dev_attr_available.attr, @@@ -180,32 -96,11 +180,32 @@@ NULL, };
+static struct bin_attribute *p2pmem_bin_attrs[] = { + &p2pmem_alloc_attr, + NULL, +}; + static const struct attribute_group p2pmem_group = { .attrs = p2pmem_attrs, + .bin_attrs = p2pmem_bin_attrs, .name = "p2pmem", };
+static void p2pdma_page_free(struct page *page) +{ + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap); + struct percpu_ref *ref; + + gen_pool_free_owner(pgmap->provider->p2pdma->pool, + (uintptr_t)page_to_virt(page), PAGE_SIZE, + (void **)&ref); + percpu_ref_put(ref); +} + +static const struct dev_pagemap_ops p2pdma_pgmap_ops = { + .page_free = p2pdma_page_free, +}; + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; @@@ -257,19 -152,6 +257,19 @@@ out return error; }
+static void pci_p2pdma_unmap_mappings(void *data) +{ + struct pci_dev *pdev = data; + + /* + * Removing the alloc attribute from sysfs will call + * unmap_mapping_range() on the inode, teardown any existing userspace + * mappings and prevent new ones from being created. + */ + sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr, + p2pmem_group.name); +} + /** * pci_p2pdma_add_resource - add memory for use as p2p memory * @pdev: the device to add the memory to @@@ -316,7 -198,6 +316,7 @@@ int pci_p2pdma_add_resource(struct pci_ pgmap->range.end = pgmap->range.start + size - 1; pgmap->nr_range = 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; + pgmap->ops = &p2pdma_pgmap_ops;
p2p_pgmap->provider = pdev; p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - @@@ -328,11 -209,6 +328,11 @@@ goto pgmap_free; }
+ error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, + pdev); + if (error) + goto pages_free; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, @@@ -797,7 -673,7 +797,7 @@@ struct pci_dev *pci_p2pmem_find_many(st }
if (dev_cnt) - pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]); + pdev = pci_dev_get(closest_pdevs[get_random_u32_below(dev_cnt)]);
for (i = 0; i < dev_cnt; i++) pci_dev_put(closest_pdevs[i]); diff --combined drivers/scsi/scsi_debug.c index 629853662b82,4e476f1bc3c3..a86db9761d00 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@@ -1899,13 -1899,6 +1899,13 @@@ static int resp_readcap16(struct scsi_c arr[14] |= 0x40; }
+ /* + * Since the scsi_debug READ CAPACITY implementation always reports the + * total disk capacity, set RC BASIS = 1 for host-managed ZBC devices. + */ + if (devip->zmodel == BLK_ZONED_HM) + arr[12] |= 1 << 4; + arr[15] = sdebug_lowest_aligned & 0xff;
if (have_dif_prot) { @@@ -5702,16 -5695,16 +5702,16 @@@ static int schedule_resp(struct scsi_cm u64 ns = jiffies_to_nsecs(delta_jiff);
if (sdebug_random && ns < U32_MAX) { - ns = prandom_u32_max((u32)ns); + ns = get_random_u32_below((u32)ns); } else if (sdebug_random) { ns >>= 12; /* scale to 4 usec precision */ if (ns < U32_MAX) /* over 4 hours max */ - ns = prandom_u32_max((u32)ns); + ns = get_random_u32_below((u32)ns); ns <<= 12; } kt = ns_to_ktime(ns); } else { /* ndelay has a 4.2 second max */ - kt = sdebug_random ? prandom_u32_max((u32)ndelay) : + kt = sdebug_random ? get_random_u32_below((u32)ndelay) : (u32)ndelay; if (ndelay < INCLUSIVE_TIMING_MAX_NS) { u64 d = ktime_get_boottime_ns() - ns_from_boot; diff --combined fs/ceph/inode.c index 7bff2855aab6,fb255988dee8..29a6629f6299 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@@ -126,7 -126,7 +126,7 @@@ const struct inode_operations ceph_file .setattr = ceph_setattr, .getattr = ceph_getattr, .listxattr = ceph_listxattr, - .get_acl = ceph_get_acl, + .get_inode_acl = ceph_get_acl, .set_acl = ceph_set_acl, };
@@@ -362,7 -362,7 +362,7 @@@ static int ceph_fill_fragtree(struct in if (nsplits != ci->i_fragtree_nsplits) { update = true; } else if (nsplits) { - i = prandom_u32_max(nsplits); + i = get_random_u32_below(nsplits); id = le32_to_cpu(fragtree->splits[i].frag); if (!__ceph_find_frag(ci, id)) update = true; @@@ -2255,7 -2255,7 +2255,7 @@@ int ceph_setattr(struct user_namespace err = __ceph_setattr(inode, attr);
if (err >= 0 && (attr->ia_valid & ATTR_MODE)) - err = posix_acl_chmod(&init_user_ns, inode, attr->ia_mode); + err = posix_acl_chmod(&init_user_ns, dentry, attr->ia_mode);
return err; } @@@ -2417,10 -2417,10 +2417,10 @@@ static int statx_to_caps(u32 want, umod { int mask = 0;
- if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) + if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_AUTH_SHARED;
- if (want & (STATX_NLINK|STATX_CTIME)) { + if (want & (STATX_NLINK|STATX_CTIME|STATX_CHANGE_COOKIE)) { /* * The link count for directories depends on inode->i_subdirs, * and that is only updated when Fs caps are held. @@@ -2431,10 -2431,11 +2431,10 @@@ mask |= CEPH_CAP_LINK_SHARED; }
- if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| - STATX_BLOCKS)) + if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|STATX_BLOCKS|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_FILE_SHARED;
- if (want & (STATX_CTIME)) + if (want & (STATX_CTIME|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_XATTR_SHARED;
return mask; @@@ -2477,11 -2478,6 +2477,11 @@@ int ceph_getattr(struct user_namespace valid_mask |= STATX_BTIME; }
+ if (request_mask & STATX_CHANGE_COOKIE) { + stat->change_cookie = inode_peek_iversion_raw(inode); + valid_mask |= STATX_CHANGE_COOKIE; + } + if (ceph_snap(inode) == CEPH_NOSNAP) stat->dev = sb->s_dev; else @@@ -2496,7 -2492,7 +2496,7 @@@ struct inode *parent;
parent = ceph_lookup_inode(sb, ceph_ino(inode)); - if (!parent) + if (IS_ERR(parent)) return PTR_ERR(parent);
pci = ceph_inode(parent); @@@ -2523,8 -2519,6 +2523,8 @@@ stat->nlink = 1 + 1 + ci->i_subdirs; }
+ stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; stat->result_mask = request_mask & valid_mask; return err; } diff --combined fs/ext4/ialloc.c index 67a257a69758,9fc1af8e19a3..9aa8b18bdac1 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@@ -465,7 -465,7 +465,7 @@@ static int find_group_orlov(struct supe ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo); parent_group = hinfo.hash % ngroups; } else - parent_group = prandom_u32_max(ngroups); + parent_group = get_random_u32_below(ngroups); for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; get_orlov_stats(sb, g, flex_size, &stats); @@@ -870,7 -870,7 +870,7 @@@ static int ext4_xattr_credits_for_new_i struct super_block *sb = dir->i_sb; int nblocks = 0; #ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); + struct posix_acl *p = get_inode_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(p)) return PTR_ERR(p); diff --combined fs/f2fs/gc.c index f1a46519a5fe,536d332d9e2e..0a8cd2ca1090 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@@ -96,6 -96,16 +96,6 @@@ static int gc_thread_func(void *data * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (sbi->gc_mode == GC_URGENT_HIGH) { - spin_lock(&sbi->gc_urgent_high_lock); - if (sbi->gc_urgent_high_remaining) { - sbi->gc_urgent_high_remaining--; - if (!sbi->gc_urgent_high_remaining) - sbi->gc_mode = GC_NORMAL; - } - spin_unlock(&sbi->gc_urgent_high_lock); - } - if (sbi->gc_mode == GC_URGENT_HIGH || sbi->gc_mode == GC_URGENT_MID) { wait_ms = gc_th->urgent_sleep_time; @@@ -152,15 -162,6 +152,15 @@@ do_gc /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi, true); next: + if (sbi->gc_mode != GC_NORMAL) { + spin_lock(&sbi->gc_remaining_trials_lock); + if (sbi->gc_remaining_trials) { + sbi->gc_remaining_trials--; + if (!sbi->gc_remaining_trials) + sbi->gc_mode = GC_NORMAL; + } + spin_unlock(&sbi->gc_remaining_trials_lock); + } sb_end_write(sbi->sb);
} while (!kthread_should_stop()); @@@ -171,10 -172,13 +171,10 @@@ int f2fs_start_gc_thread(struct f2fs_sb { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; - int err = 0;
gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) { - err = -ENOMEM; - goto out; - } + if (!gc_th) + return -ENOMEM;
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; @@@ -189,14 -193,12 +189,14 @@@ sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { - err = PTR_ERR(gc_th->f2fs_gc_task); + int err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); sbi->gc_thread = NULL; + return err; } -out: - return err; + + return 0; }
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) @@@ -280,7 -282,7 +280,7 @@@ static void select_policy(struct f2fs_s
/* let's select beginning hot/small space first in no_heap mode*/ if (f2fs_need_rand_seg(sbi)) - p->offset = prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); + p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); else if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; @@@ -1108,7 -1110,6 +1108,7 @@@ static bool is_alive(struct f2fs_sb_inf if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", ofs_in_node, dni->ino, dni->nid, max_addrs); + f2fs_put_page(node_page, 1); return false; }
@@@ -1562,8 -1563,8 +1562,8 @@@ next_step continue; }
- data_page = f2fs_get_read_data_page(inode, - start_bidx, REQ_RAHEAD, true); + data_page = f2fs_get_read_data_page(inode, start_bidx, + REQ_RAHEAD, true, NULL); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); @@@ -2132,6 -2133,8 +2132,6 @@@ out_unlock if (err) return err;
- set_sbi_flag(sbi, SBI_IS_RESIZEFS); - freeze_super(sbi->sb); f2fs_down_write(&sbi->gc_lock); f2fs_down_write(&sbi->cp_global_sem); @@@ -2147,7 -2150,6 +2147,7 @@@ if (err) goto out_err;
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS); err = free_segment_range(sbi, secs, false); if (err) goto recover_out; @@@ -2171,7 -2173,6 +2171,7 @@@ f2fs_commit_super(sbi, false); } recover_out: + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); @@@ -2184,5 -2185,6 +2184,5 @@@ out_err f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); - clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; } diff --combined fs/f2fs/segment.c index 3654c30e0517,8319fdc59fb4..0d9a49d8ada0 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@@ -192,18 -192,14 +192,18 @@@ void f2fs_abort_atomic_write(struct ino if (!f2fs_is_atomic_file(inode)) return;
- if (clean) - truncate_inode_pages_final(inode->i_mapping); clear_inode_flag(fi->cow_inode, FI_COW_FILE); iput(fi->cow_inode); fi->cow_inode = NULL; release_atomic_write_cnt(inode); + clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_FILE); stat_dec_atomic_inode(inode); + + if (clean) { + truncate_inode_pages_final(inode->i_mapping); + f2fs_i_size_write(inode, fi->original_i_size); + } }
static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@@ -261,26 -257,14 +261,26 @@@ static void __complete_revoke_list(stru bool revoke) { struct revoke_entry *cur, *tmp; + pgoff_t start_index = 0; + bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
list_for_each_entry_safe(cur, tmp, head, list) { - if (revoke) + if (revoke) { __replace_atomic_write_block(inode, cur->index, cur->old_addr, NULL, true); + } else if (truncate) { + f2fs_truncate_hole(inode, start_index, cur->index); + start_index = cur->index + 1; + } + list_del(&cur->list); kmem_cache_free(revoke_entry_slab, cur); } + + if (!revoke && truncate) { + f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false); + clear_inode_flag(inode, FI_ATOMIC_REPLACE); + } }
static int __f2fs_commit_atomic_write(struct inode *inode) @@@ -351,12 -335,10 +351,12 @@@ next }
out: - if (ret) + if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; - else + } else { sbi->committed_atomic_block += fi->atomic_write_cnt; + set_inode_flag(inode, FI_ATOMIC_COMMITTED); + }
__complete_revoke_list(inode, &revoke_list, ret ? true : false);
@@@ -638,11 -620,12 +638,11 @@@ int f2fs_create_flush_cmd_control(struc { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; - int err = 0;
if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; if (fcc->f2fs_issue_flush) - return err; + return 0; goto init_thread; }
@@@ -655,20 -638,19 +655,20 @@@ init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; if (!test_opt(sbi, FLUSH_MERGE)) - return err; + return 0;
init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { - err = PTR_ERR(fcc->f2fs_issue_flush); + int err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; }
- return err; + return 0; }
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) @@@ -874,7 -856,7 +874,7 @@@ block_t f2fs_get_unusable_blocks(struc } mutex_unlock(&dirty_i->seglist_lock);
- unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + unusable = max(holes[DATA], holes[NODE]); if (unusable > ovp_holes) return unusable - ovp_holes; return 0; @@@ -1466,7 -1448,7 +1466,7 @@@ retry if (i + 1 < dpolicy->granularity) break;
- if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) return __issue_discard_cmd_orderly(sbi, dpolicy);
pend_list = &dcc->pend_list[i]; @@@ -2043,10 -2025,8 +2043,10 @@@ int f2fs_start_discard_thread(struct f2
dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(dcc->f2fs_issue_discard)) + if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); + dcc->f2fs_issue_discard = NULL; + }
return err; } @@@ -2066,7 -2046,6 +2066,7 @@@ static int create_discard_cmd_control(s return -ENOMEM;
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) dcc->discard_granularity = sbi->blocks_per_seg; else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) @@@ -2555,7 -2534,7 +2555,7 @@@ static unsigned int __get_next_segno(st
sanity_check_seg_type(sbi, seg_type); if (f2fs_need_rand_seg(sbi)) - return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); + return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */ if (__is_large_section(sbi)) @@@ -2609,7 -2588,7 +2609,7 @@@ static void new_curseg(struct f2fs_sb_i curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = - prandom_u32_max(sbi->max_fragment_chunk) + 1; + get_random_u32_between(1, sbi->max_fragment_chunk + 1); }
static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@@ -2646,9 -2625,9 +2646,9 @@@ static void __refresh_next_blkoff(struc /* To allocate block chunks in different sizes, use random number */ if (--seg->fragment_remained_chunk <= 0) { seg->fragment_remained_chunk = - prandom_u32_max(sbi->max_fragment_chunk) + 1; + get_random_u32_between(1, sbi->max_fragment_chunk + 1); seg->next_blkoff += - prandom_u32_max(sbi->max_fragment_hole) + 1; + get_random_u32_between(1, sbi->max_fragment_hole + 1); } } } diff --combined kernel/bpf/core.c index e772d9cc0a38,38159f39e2af..d7e1b30c33d6 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@@ -864,7 -864,8 +864,7 @@@ static struct bpf_prog_pack *alloc_new_ list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr); - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); return pack; }
@@@ -882,7 -883,8 +882,7 @@@ void *bpf_prog_pack_alloc(u32 size, bpf if (ptr) { bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); - set_memory_x((unsigned long)ptr, size / PAGE_SIZE); + set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); } goto out; } @@@ -1030,7 -1032,7 +1030,7 @@@ bpf_jit_binary_alloc(unsigned int progl hdr->size = size; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); - start = prandom_u32_max(hole) & ~(alignment - 1); + start = get_random_u32_below(hole) & ~(alignment - 1);
/* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; @@@ -1092,7 -1094,7 +1092,7 @@@ bpf_jit_binary_pack_alloc(unsigned int
hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)), BPF_PROG_CHUNK_SIZE - sizeof(*ro_header)); - start = prandom_u32_max(hole) & ~(alignment - 1); + start = get_random_u32_below(hole) & ~(alignment - 1);
*image_ptr = &ro_header->image[start]; *rw_image = &(*rw_header)->image[start]; @@@ -2249,14 -2251,8 +2249,14 @@@ static void __bpf_prog_array_free_sleep { struct bpf_prog_array *progs;
+ /* If RCU Tasks Trace grace period implies RCU grace period, there is + * no need to call kfree_rcu(), just call kfree() directly. + */ progs = container_of(rcu, struct bpf_prog_array, rcu); - kfree_rcu(progs, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(progs); + else + kfree_rcu(progs, rcu); }
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs) diff --combined kernel/fork.c index 4439f7ff78f6,ec57cae58ff1..cfff237094aa --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -75,7 -75,6 +75,6 @@@ #include <linux/freezer.h> #include <linux/delayacct.h> #include <linux/taskstats_kern.h> - #include <linux/random.h> #include <linux/tty.h> #include <linux/fs_struct.h> #include <linux/magic.h> @@@ -97,6 -96,7 +96,7 @@@ #include <linux/scs.h> #include <linux/io_uring.h> #include <linux/bpf.h> + #include <linux/stackprotector.h>
#include <asm/pgalloc.h> #include <linux/uaccess.h> @@@ -535,9 -535,6 +535,9 @@@ void put_task_stack(struct task_struct
void free_task(struct task_struct *tsk) { +#ifdef CONFIG_SECCOMP + WARN_ON_ONCE(tsk->seccomp.filter); +#endif release_user_cpus_ptr(tsk); scs_release(tsk);
@@@ -2409,6 -2406,12 +2409,6 @@@ static __latent_entropy struct task_str
spin_lock(¤t->sighand->siglock);
- /* - * Copy seccomp details explicitly here, in case they were changed - * before holding sighand lock. - */ - copy_seccomp(p); - rv_task_fork(p);
rseq_fork(p, clone_flags); @@@ -2425,14 -2428,6 +2425,14 @@@ goto bad_fork_cancel_cgroup; }
+ /* No more failure paths after this point. */ + + /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@@ -2597,6 -2592,11 +2597,6 @@@ struct task_struct * __init fork_idle(i return task; }
-struct mm_struct *copy_init_mm(void) -{ - return dup_mm(NULL, &init_mm); -} - /* * This is like kernel_clone(), but shaved down and tailored to just * creating io_uring workers. It returns a created task, or an error pointer. @@@ -3015,27 -3015,10 +3015,27 @@@ static void sighand_ctor(void *data init_waitqueue_head(&sighand->signalfd_wqh); }
-void __init proc_caches_init(void) +void __init mm_cache_init(void) { unsigned int mm_size;
+ /* + * The mm_cpumask is located at the end of mm_struct, and is + * dynamically sized based on the maximum CPU number this system + * can have, taking hotplug into account (nr_cpu_ids). + */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + + mm_cachep = kmem_cache_create_usercopy("mm_struct", + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), + NULL); +} + +void __init proc_caches_init(void) +{ sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@@ -3053,6 -3036,19 +3053,6 @@@ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
- /* - * The mm_cpumask is located at the end of mm_struct, and is - * dynamically sized based on the maximum CPU number this system - * can have, taking hotplug into account (nr_cpu_ids). - */ - mm_size = sizeof(struct mm_struct) + cpumask_size(); - - mm_cachep = kmem_cache_create_usercopy("mm_struct", - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, - offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), - NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); diff --combined kernel/time/clocksource.c index 4015ec6503a5,9cf32ccda715..5f9e0cb051c3 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@@ -310,7 -310,7 +310,7 @@@ static void clocksource_verify_choose_c * CPUs that are currently online. */ for (i = 1; i < n; i++) { - cpu = prandom_u32_max(nr_cpu_ids); + cpu = get_random_u32_below(nr_cpu_ids); cpu = cpumask_next(cpu - 1, cpu_online_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_online_mask); @@@ -386,7 -386,7 +386,7 @@@ EXPORT_SYMBOL_GPL(clocksource_verify_pe
static void clocksource_watchdog(struct timer_list *unused) { - u64 csnow, wdnow, cslast, wdlast, delta; + u64 csnow, wdnow, cslast, wdlast, delta, wdi; int next_cpu, reset_pending; int64_t wd_nsec, cs_nsec; struct clocksource *cs; @@@ -440,38 -440,6 +440,38 @@@ if (atomic_read(&watchdog_reset_pending)) continue;
+ /* Check for bogus measurements. */ + wdi = jiffies_to_nsecs(WATCHDOG_INTERVAL); + if (wd_nsec > (wdi << 2) || cs_nsec > (wdi << 2)) { + bool needwarn = false; + u64 wd_lb; + + cs->wd_bogus_count++; + if (!cs->wd_bogus_shift) { + needwarn = true; + } else { + delta = clocksource_delta(wdnow, cs->wd_last_bogus, watchdog->mask); + wd_lb = clocksource_cyc2ns(delta, watchdog->mult, watchdog->shift); + if ((1 << cs->wd_bogus_shift) * wdi <= wd_lb) + needwarn = true; + } + if (needwarn) { + /* This can happen on busy systems, which can delay the watchdog. */ + pr_warn("timekeeping watchdog on CPU%d: Watchdog clocksource '%s' advanced an excessive %lld ns during %d-jiffy time interval (%lu since last message), probable CPU overutilization, skipping watchdog check.\n", smp_processor_id(), watchdog->name, wd_nsec, WATCHDOG_INTERVAL, cs->wd_bogus_count); + cs->wd_last_bogus = wdnow; + if (cs->wd_bogus_shift < 10) + cs->wd_bogus_shift++; + cs->wd_bogus_count = 0; + } + continue; + } + /* Check too-short measurements second to handle wrap. */ + if (wd_nsec < (wdi >> 2) || cs_nsec < (wdi >> 2)) { + /* This usually indicates broken timer code or hardware. */ + pr_warn("timekeeping watchdog on CPU%d: Watchdog clocksource '%s' advanced only %lld ns during %d-jiffy time interval, skipping watchdog check.\n", smp_processor_id(), watchdog->name, wd_nsec, WATCHDOG_INTERVAL); + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { diff --combined lib/kobject.c index ba1017cd67d1,af1f5f2954d4..62b2b0643cbb --- a/lib/kobject.c +++ b/lib/kobject.c @@@ -94,10 -94,10 +94,10 @@@ static int create_dir(struct kobject *k return 0; }
-static int get_kobj_path_length(struct kobject *kobj) +static int get_kobj_path_length(const struct kobject *kobj) { int length = 1; - struct kobject *parent = kobj; + const struct kobject *parent = kobj;
/* walk up the ancestors until we hit the one pointing to the * root. @@@ -112,9 -112,9 +112,9 @@@ return length; }
-static void fill_kobj_path(struct kobject *kobj, char *path, int length) +static void fill_kobj_path(const struct kobject *kobj, char *path, int length) { - struct kobject *parent; + const struct kobject *parent;
--length; for (parent = kobj; parent; parent = parent->parent) { @@@ -136,7 -136,7 +136,7 @@@ * * Return: The newly allocated memory, caller must free with kfree(). */ -char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) +char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask) { char *path; int len; @@@ -694,7 -694,7 +694,7 @@@ static void kobject_release(struct kre { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - unsigned long delay = HZ + HZ * prandom_u32_max(4); + unsigned long delay = HZ + HZ * get_random_u32_below(4); pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); @@@ -834,9 -834,6 +834,9 @@@ EXPORT_SYMBOL_GPL(kobj_sysfs_ops) /** * kset_register() - Initialize and add a kset. * @k: kset. + * + * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name() + * is freed, it can not be used any more. */ int kset_register(struct kset *k) { @@@ -847,12 -844,8 +847,12 @@@
kset_init(k); err = kobject_add_internal(&k->kobj); - if (err) + if (err) { + kfree_const(k->kobj.name); + /* Set it to NULL to avoid accessing bad pointer in callers. */ + k->kobj.name = NULL; return err; + } kobject_uevent(&k->kobj, KOBJ_ADD); return 0; } diff --combined lib/test_printf.c index 1fb12c1a0fa9,da5efc8b8543..1c1c9f2b5a71 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@@ -126,7 -126,7 +126,7 @@@ __test(const char *expect, int elen, co * be able to print it as expected. */ failed_tests += do_test(BUF_SIZE, expect, elen, fmt, ap); - rand = 1 + prandom_u32_max(elen+1); + rand = get_random_u32_between(1, elen + 2); /* Since elen < BUF_SIZE, we have 1 <= rand <= BUF_SIZE. */ failed_tests += do_test(rand, expect, elen, fmt, ap); failed_tests += do_test(0, expect, elen, fmt, ap); @@@ -704,29 -704,31 +704,29 @@@ flags(void
static void __init fwnode_pointer(void) { - const struct software_node softnodes[] = { - { .name = "first", }, - { .name = "second", .parent = &softnodes[0], }, - { .name = "third", .parent = &softnodes[1], }, - { NULL /* Guardian */ } - }; - const char * const full_name = "first/second/third"; + const struct software_node first = { .name = "first" }; + const struct software_node second = { .name = "second", .parent = &first }; + const struct software_node third = { .name = "third", .parent = &second }; + const struct software_node *group[] = { &first, &second, &third, NULL }; const char * const full_name_second = "first/second"; + const char * const full_name_third = "first/second/third"; const char * const second_name = "second"; const char * const third_name = "third"; int rval;
- rval = software_node_register_nodes(softnodes); + rval = software_node_register_node_group(group); if (rval) { pr_warn("cannot register softnodes; rval %d\n", rval); return; }
- test(full_name_second, "%pfw", software_node_fwnode(&softnodes[1])); - test(full_name, "%pfw", software_node_fwnode(&softnodes[2])); - test(full_name, "%pfwf", software_node_fwnode(&softnodes[2])); - test(second_name, "%pfwP", software_node_fwnode(&softnodes[1])); - test(third_name, "%pfwP", software_node_fwnode(&softnodes[2])); + test(full_name_second, "%pfw", software_node_fwnode(&second)); + test(full_name_third, "%pfw", software_node_fwnode(&third)); + test(full_name_third, "%pfwf", software_node_fwnode(&third)); + test(second_name, "%pfwP", software_node_fwnode(&second)); + test(third_name, "%pfwP", software_node_fwnode(&third));
- software_node_unregister_nodes(softnodes); + software_node_unregister_node_group(group); }
static void __init fourcc_pointer(void) diff --combined mm/slub.c index 0a14e7bc278c,7cd2c657030a..1fd8e1f15cd0 --- a/mm/slub.c +++ b/mm/slub.c @@@ -1800,8 -1800,6 +1800,8 @@@ static inline struct slab *alloc_slab_p
slab = folio_slab(folio); __folio_set_slab(folio); + /* Make the flag visible before any changes to folio->mapping */ + smp_wmb(); if (page_is_pfmemalloc(folio_page(folio, 0))) slab_set_pfmemalloc(slab);
@@@ -1883,7 -1881,7 +1883,7 @@@ static bool shuffle_freelist(struct kme return false;
freelist_count = oo_objects(s->oo); - pos = prandom_u32_max(freelist_count); + pos = get_random_u32_below(freelist_count);
page_limit = slab->objects * s->size; start = fixup_red_left(s, slab_address(slab)); @@@ -2001,11 -1999,17 +2001,11 @@@ static void __free_slab(struct kmem_cac int order = folio_order(folio); int pages = 1 << order;
- if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { - void *p; - - slab_pad_check(s, slab); - for_each_object(p, s, slab_address(slab), slab->objects) - check_object(s, slab, p, SLUB_RED_INACTIVE); - } - __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); folio->mapping = NULL; + /* Make the mapping reset visible before clearing the flag */ + smp_wmb(); + __folio_clear_slab(folio); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; unaccount_slab(slab, order, s); @@@ -2021,17 -2025,9 +2021,17 @@@ static void rcu_free_slab(struct rcu_he
static void free_slab(struct kmem_cache *s, struct slab *slab) { - if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { + if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { + void *p; + + slab_pad_check(s, slab); + for_each_object(p, s, slab_address(slab), slab->objects) + check_object(s, slab, p, SLUB_RED_INACTIVE); + } + + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&slab->rcu_head, rcu_free_slab); - } else + else __free_slab(s, slab); }
@@@ -2415,7 -2411,7 +2415,7 @@@ static void init_kmem_cache_cpus(struc static void deactivate_slab(struct kmem_cache *s, struct slab *slab, void *freelist) { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST }; + enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST }; struct kmem_cache_node *n = get_node(s, slab_nid(slab)); int free_delta = 0; enum slab_modes mode = M_NONE; @@@ -2491,6 -2487,14 +2491,6 @@@ redo * acquire_slab() will see a slab that is frozen */ spin_lock_irqsave(&n->list_lock, flags); - } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) { - mode = M_FULL; - /* - * This also ensures that the scanning of full - * slabs from diagnostic functions will not see - * any frozen slabs. - */ - spin_lock_irqsave(&n->list_lock, flags); } else { mode = M_FULL_NOLIST; } @@@ -2500,7 -2504,7 +2500,7 @@@ old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")) { - if (mode == M_PARTIAL || mode == M_FULL) + if (mode == M_PARTIAL) spin_unlock_irqrestore(&n->list_lock, flags); goto redo; } @@@ -2514,6 -2518,10 +2514,6 @@@ stat(s, DEACTIVATE_EMPTY); discard_slab(s, slab); stat(s, FREE_SLAB); - } else if (mode == M_FULL) { - add_full(s, n, slab); - spin_unlock_irqrestore(&n->list_lock, flags); - stat(s, DEACTIVATE_FULL); } else if (mode == M_FULL_NOLIST) { stat(s, DEACTIVATE_FULL); } @@@ -4009,8 -4017,7 +4009,8 @@@ init_kmem_cache_node(struct kmem_cache_ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); + NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH * + sizeof(struct kmem_cache_cpu));
/* * Must align to double word boundary for the double cmpxchg @@@ -5579,21 -5586,7 +5579,21 @@@ static ssize_t failslab_show(struct kme { return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); } -SLAB_ATTR_RO(failslab); + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + if (s->refcount > 1) + return -EINVAL; + + if (buf[0] == '1') + WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB); + else + WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB); + + return length; +} +SLAB_ATTR(failslab); #endif
static ssize_t shrink_show(struct kmem_cache *s, char *buf) @@@ -5927,6 -5920,11 +5927,6 @@@ static int sysfs_slab_add(struct kmem_c struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s);
- if (!kset) { - kobject_init(&s->kobj, &slab_ktype); - return 0; - } - if (!unmergeable && disable_higher_order_debug && (slub_debug & DEBUG_METADATA_FLAGS)) unmergeable = 1; @@@ -6056,7 -6054,8 +6056,7 @@@ static int __init slab_sysfs_init(void mutex_unlock(&slab_mutex); return 0; } - -__initcall(slab_sysfs_init); +late_initcall(slab_sysfs_init); #endif /* CONFIG_SYSFS */
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) diff --combined net/bluetooth/mgmt.c index 0dd30a3beb77,bab37a529db5..38828621dd3d --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@@ -7373,9 -7373,8 +7373,8 @@@ static int get_conn_info(struct sock *s /* To avoid client trying to guess when to poll again for information we * calculate conn info age as random value between min/max set in hdev. */ - conn_info_age = hdev->conn_info_min_age + - prandom_u32_max(hdev->conn_info_max_age - - hdev->conn_info_min_age); + conn_info_age = get_random_u32_between(hdev->conn_info_min_age, + hdev->conn_info_max_age);
/* Query controller to refresh cached values if they are too old or were * never read. @@@ -8859,7 -8858,7 +8858,7 @@@ static int add_ext_adv_params(struct so * extra parameters we don't know about will be ignored in this request. */ if (data_len < MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_INVALID_PARAMS);
flags = __le32_to_cpu(cp->flags); diff --combined net/can/j1939/transport.c index f26f4cfa9e63,67d36776aff4..5c722b55fe23 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@@ -987,7 -987,7 +987,7 @@@ static int j1939_session_tx_eoma(struc /* wait for the EOMA packet to come in */ j1939_tp_set_rxtimeout(session, 1250);
- netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
return 0; } @@@ -1168,7 -1168,7 +1168,7 @@@ static enum hrtimer_restart j1939_tp_tx if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { session->tx_retry++; j1939_tp_schedule_txtimer(session, - 10 + prandom_u32_max(16)); + 10 + get_random_u32_below(16)); } else { netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", __func__, session); diff --combined net/ipv4/tcp_input.c index d764b5854dfc,1a348298a99e..c9e7c9f57ee1 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@@ -3646,7 -3646,8 +3646,8 @@@ static void tcp_send_challenge_ack(stru u32 half = (ack_limit + 1) >> 1;
WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now); - WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit)); + WRITE_ONCE(net->ipv4.tcp_challenge_count, + get_random_u32_between(half, ack_limit + half)); } count = READ_ONCE(net->ipv4.tcp_challenge_count); if (count > 0) { @@@ -4764,8 -4765,8 +4765,8 @@@ static void tcp_ofo_queue(struct sock * } }
-static bool tcp_prune_ofo_queue(struct sock *sk); -static int tcp_prune_queue(struct sock *sk); +static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); +static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) @@@ -4773,11 -4774,11 +4774,11 @@@ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, size)) {
- if (tcp_prune_queue(sk) < 0) + if (tcp_prune_queue(sk, skb) < 0) return -1;
while (!sk_rmem_schedule(sk, skb, size)) { - if (!tcp_prune_ofo_queue(sk)) + if (!tcp_prune_ofo_queue(sk, skb)) return -1; } } @@@ -5329,8 -5330,6 +5330,8 @@@ new_range * Clean the out-of-order queue to make room. * We drop high sequences packets to : * 1) Let a chance for holes to be filled. + * This means we do not drop packets from ooo queue if their sequence + * is before incoming packet sequence. * 2) not add too big latencies if thousands of packets sit there. * (But if application shrinks SO_RCVBUF, we could still end up * freeing whole queue here) @@@ -5338,31 -5337,24 +5339,31 @@@ * * Return true if queue has shrunk. */ -static bool tcp_prune_ofo_queue(struct sock *sk) +static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + bool pruned = false; int goal;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; + do { + struct sk_buff *skb = rb_to_skb(node); + + /* If incoming skb would land last in ofo queue, stop pruning. */ + if (after(TCP_SKB_CB(in_skb)->seq, TCP_SKB_CB(skb)->seq)) + break; + pruned = true; prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); - goal -= rb_to_skb(node)->truesize; - tcp_drop_reason(sk, rb_to_skb(node), - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); + goal -= skb->truesize; + tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); + tp->ooo_last_skb = rb_to_skb(prev); if (!prev || goal <= 0) { if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) @@@ -5371,18 -5363,16 +5372,18 @@@ } node = prev; } while (node); - tp->ooo_last_skb = rb_to_skb(prev);
- /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tp->rx_opt.sack_ok) - tcp_sack_reset(&tp->rx_opt); - return true; + if (pruned) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + } + return pruned; }
/* Reduce allocated memory if we can, trying to get @@@ -5392,7 -5382,7 +5393,7 @@@ * until the socket owning process reads some of the data * to stabilize the situation. */ -static int tcp_prune_queue(struct sock *sk) +static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk);
@@@ -5419,7 -5409,7 +5420,7 @@@ /* Collapsing did not help, destructive actions follow. * This must not ever occur. */
- tcp_prune_ofo_queue(sk); + tcp_prune_ofo_queue(sk, in_skb);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; diff --combined net/netlink/af_netlink.c index 9ebdf3262015,7a401d94463a..b8afec32cff6 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@@ -812,17 -812,6 +812,17 @@@ static int netlink_release(struct socke }
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + + /* Because struct net might disappear soon, do not keep a pointer. */ + if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + /* Because of deferred_put_nlk_sk and use of work queue, + * it is possible netns will be freed before this socket. + */ + sock_net_set(sk, &init_net); + __netns_tracker_alloc(&init_net, &sk->ns_tracker, + false, GFP_KERNEL); + } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@@ -846,7 -835,7 +846,7 @@@ retry /* Bind collision, search negative portid values. */ if (rover == -4096) /* rover will be in range [S32_MIN, -4097] */ - rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); + rover = S32_MIN + get_random_u32_below(-4096 - S32_MIN); else if (rover >= -4096) rover = -4097; portid = rover--; @@@ -2499,24 -2488,19 +2499,24 @@@ void netlink_ack(struct sk_buff *in_skb flags |= NLM_F_ACK_TLVS;
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); - if (!skb) { - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; - sk_error_report(NETLINK_CB(in_skb).sk); - return; - } + if (!skb) + goto err_skb;
rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, flags); + NLMSG_ERROR, sizeof(*errmsg), flags); + if (!rep) + goto err_bad_put; errmsg = nlmsg_data(rep); errmsg->error = err; - unsafe_memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) - ? nlh->nlmsg_len : sizeof(*nlh), - /* Bounds checked by the skb layer. */); + errmsg->msg = *nlh; + + if (!(flags & NLM_F_CAPPED)) { + if (!nlmsg_append(skb, nlmsg_len(nlh))) + goto err_bad_put; + + memcpy(errmsg->msg.nlmsg_data, nlh->nlmsg_data, + nlmsg_len(nlh)); + }
if (tlvlen) netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); @@@ -2524,14 -2508,6 +2524,14 @@@ nlmsg_end(skb, rep);
nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); + + return; + +err_bad_put: + nlmsg_free(skb); +err_skb: + NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + sk_error_report(NETLINK_CB(in_skb).sk); } EXPORT_SYMBOL(netlink_ack);
diff --combined net/packet/af_packet.c index 44f20cf8a0c0,51a47ade92e8..4155ce680b41 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -1350,7 -1350,7 +1350,7 @@@ static bool fanout_flow_is_huge(struct if (READ_ONCE(history[i]) == rxhash) count++;
- victim = prandom_u32_max(ROLLOVER_HLEN); + victim = get_random_u32_below(ROLLOVER_HLEN);
/* Avoid dirtying the cache line if possible */ if (READ_ONCE(history[victim]) != rxhash) @@@ -1386,7 -1386,7 +1386,7 @@@ static unsigned int fanout_demux_rnd(st struct sk_buff *skb, unsigned int num) { - return prandom_u32_max(num); + return get_random_u32_below(num); }
static unsigned int fanout_demux_rollover(struct packet_fanout *f, @@@ -1777,7 -1777,6 +1777,7 @@@ static int fanout_add(struct sock *sk, match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; + match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING; list_add(&match->list, &fanout_list); } err = -EINVAL; @@@ -3278,7 -3277,7 +3278,7 @@@ static int packet_bind_spkt(struct sock int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1];
/* * Check legality @@@ -3289,8 -3288,8 +3289,8 @@@ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0;
return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } @@@ -3562,11 -3561,11 +3562,11 @@@ static int packet_getname_spkt(struct s return -EOPNOTSUPP;
uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock();
return sizeof(*uaddr); diff --combined net/sctp/socket.c index 3e83963d1b8a,cfe72085fdc4..b198de62f376 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -5098,17 -5098,13 +5098,17 @@@ static void sctp_destroy_sock(struct so }
/* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_sock(struct sock *sk) +static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk);
/* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); +}
+static void sctp_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); inet_sock_destruct(sk); }
@@@ -8323,7 -8319,7 +8323,7 @@@ static int sctp_get_port_local(struct s
inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rover = prandom_u32_max(remaining) + low; + rover = get_random_u32_below(remaining) + low;
do { rover++; @@@ -9431,7 -9427,7 +9431,7 @@@ void sctp_copy_sock(struct sock *newsk sctp_sk(newsk)->reuse = sp->reuse;
newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sctp_destruct_sock; + newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; @@@ -9666,20 -9662,11 +9666,20 @@@ struct proto sctp_prot =
#if IS_ENABLED(CONFIG_IPV6)
-#include <net/transp_v6.h> -static void sctp_v6_destroy_sock(struct sock *sk) +static void sctp_v6_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +static int sctp_v6_init_sock(struct sock *sk) { - sctp_destroy_sock(sk); - inet6_destroy_sock(sk); + int ret = sctp_init_sock(sk); + + if (!ret) + sk->sk_destruct = sctp_v6_destruct_sock; + + return ret; }
struct proto sctpv6_prot = { @@@ -9689,8 -9676,8 +9689,8 @@@ .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, - .init = sctp_init_sock, - .destroy = sctp_v6_destroy_sock, + .init = sctp_v6_init_sock, + .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt,
linux-merge@lists.open-mesh.org