The following commit has been merged in the master branch: commit 778ce723e93ee803ef5883619fe2391e00dbc209 Merge: 1440f576022887004f719883acb094e7e0dd4944 7880672bdc975daa586e8256714d9906d30c615e Author: Linus Torvalds torvalds@linux-foundation.org Date: Wed Oct 12 14:39:38 2022 -0700
Merge tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
- Some minor typo fixes
- A fix of the Xen pcifront driver for supporting the device model to run in a Linux stub domain
- A cleanup of the pcifront driver
- A series to enable grant-based virtio with Xen on x86
- A cleanup of Xen PV guests to distinguish between safe and faulting MSR accesses
- Two fixes of the Xen gntdev driver
- Two fixes of the new xen grant DMA driver
* tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen: Kconfig: Fix spelling mistake "Maxmium" -> "Maximum" xen/pv: support selecting safe/unsafe msr accesses xen/pv: refactor msr access functions to support safe and unsafe accesses xen/pv: fix vendor checks for pmu emulation xen/pv: add fault recovery control to pmu msr accesses xen/virtio: enable grant based virtio on x86 xen/virtio: use dom0 as default backend for CONFIG_XEN_VIRTIO_FORCE_GRANT xen/virtio: restructure xen grant dma setup xen/pcifront: move xenstore config scanning into sub-function xen/gntdev: Accommodate VMA splitting xen/gntdev: Prevent leaking grants xen/virtio: Fix potential deadlock when accessing xen_grant_dma_devices xen/virtio: Fix n_pages calculation in xen_grant_dma_map(unmap)_page() xen/xenbus: Fix spelling mistake "hardward" -> "hardware" xen-pcifront: Handle missed Connected state
diff --combined Documentation/admin-guide/kernel-parameters.txt index 69b1533c1f02,1bda9cf18fae..a465d5242774 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@@ -321,8 -321,6 +321,8 @@@ force_enable - Force enable the IOMMU on platforms known to be buggy with IOMMU enabled. Use this option with care. + pgtbl_v1 - Use v1 page table for DMA-API (Default). + pgtbl_v2 - Use v2 page table for DMA-API.
amd_iommu_dump= [HW,X86-64] Enable AMD IOMMU driver option to dump the ACPI table @@@ -968,6 -966,10 +968,6 @@@
debugpat [X86] Enable PAT debugging
- decnet.addr= [HW,NET] - Format: <area>[,<node>] - See also Documentation/networking/decnet.rst. - default_hugepagesz= [HW] The size of the default HugeTLB page. This is the size represented by the legacy /proc/ hugepages @@@ -1469,14 -1471,6 +1469,14 @@@ Permit 'security.evm' to be updated regardless of current integrity status.
+ early_page_ext [KNL] Enforces page_ext initialization to earlier + stages so cover more early boot allocations. + Please note that as side effect some optimizations + might be disabled to achieve that (e.g. parallelized + memory initialization is disabled) so the boot process + might take longer, especially on systems with a lot of + memory. Available with CONFIG_PAGE_EXTENSION=y. + failslab= fail_usercopy= fail_page_alloc= @@@ -2442,12 -2436,6 +2442,12 @@@ 0: force disabled 1: force enabled
+ kunit.enable= [KUNIT] Enable executing KUnit tests. Requires + CONFIG_KUNIT to be set to be fully enabled. The + default value can be overridden via + KUNIT_DEFAULT_ENABLED. + Default is 1 (enabled) + kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP)
@@@ -3219,7 -3207,6 +3219,7 @@@ spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] ssbd=force-off [ARM64] + nospectre_bhb [ARM64] l1tf=off [X86] mds=off [X86] tsx_async_abort=off [X86] @@@ -3626,7 -3613,7 +3626,7 @@@
nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
- nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. + nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings.
nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. @@@ -3639,15 -3626,11 +3639,15 @@@ (bounds check bypass). With this option data leaks are possible in the system.
- nospectre_v2 [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for + nospectre_v2 [X86,PPC_E500,ARM64] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may allow data leaks with this option.
+ nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch + history injection) vulnerability. System may allow data leaks + with this option. + nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
@@@ -3758,9 -3741,9 +3758,9 @@@ [X86,PV_OPS] Disable paravirtualized VMware scheduler clock and use the default one.
- no-steal-acc [X86,PV_OPS,ARM64] Disable paravirtualized steal time - accounting. steal time is computed, but won't - influence scheduler behaviour + no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized + steal time accounting. steal time is computed, but + won't influence scheduler behaviour
nolapic [X86-32,APIC] Do not enable or use the local APIC.
@@@ -3822,10 -3805,6 +3822,10 @@@
nox2apic [X86-64,APIC] Do not enable x2APIC mode.
+ NOTE: this parameter will be ignored on systems with the + LEGACY_XAPIC_DISABLED bit set in the + IA32_XAPIC_DISABLE_STATUS MSR. + nps_mtm_hs_ctr= [KNL,ARC] This parameter sets the maximum duration, in cycles, each HW thread of the CTOP can run @@@ -6049,6 -6028,12 +6049,6 @@@ This parameter controls use of the Protected Execution Facility on pSeries.
- swapaccount= [KNL] - Format: [0|1] - Enable accounting of swap in memory resource - controller if no parameter or 1 is given or disable - it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst) - swiotlb= [ARM,IA-64,PPC,MIPS,X86] Format: { <int> [,<int>] | force | noforce } <int> -- Number of I/O TLB slabs @@@ -6851,6 -6836,12 +6851,12 @@@ Crash from Xen panic notifier, without executing late panic() code such as dumping handler.
+ xen_msr_safe= [X86,XEN] + Format: <bool> + Select whether to always use non-faulting (safe) MSR + access functions when running as Xen PV guest. The + default value is controlled by CONFIG_XEN_PV_MSR_SAFE. + xen_nopvspin [X86,XEN] Disables the qspinlock slowpath using Xen PV optimizations. This parameter is obsoleted by "nopvspin" parameter, which diff --combined arch/x86/xen/enlighten_pv.c index 9b1a58dda935,0ad3d4bf52b3..f82857e48815 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@@ -108,11 -108,21 +108,21 @@@ struct tls_descs */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
+ static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); + + static int __init parse_xen_msr_safe(char *str) + { + if (str) + return strtobool(str, &xen_msr_safe); + return -EINVAL; + } + early_param("xen_msr_safe", parse_xen_msr_safe); + static void __init xen_pv_init_platform(void) { /* PV guests can't operate virtio devices without grants. */ if (IS_ENABLED(CONFIG_XEN_VIRTIO)) - virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
@@@ -765,7 -775,6 +775,7 @@@ static void xen_load_idt(const struct d { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + static const struct trap_info zero = { }; unsigned out;
trace_xen_cpu_load_idt(desc); @@@ -775,7 -784,7 +785,7 @@@ memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
out = xen_convert_trap_info(desc, traps, false); - memset(&traps[out], 0, sizeof(traps[0])); + traps[out] = zero;
xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) @@@ -917,14 -926,18 +927,18 @@@ static void xen_write_cr4(unsigned lon native_write_cr4(cr4); }
- static u64 xen_read_msr_safe(unsigned int msr, int *err) + static u64 xen_do_read_msr(unsigned int msr, int *err) { - u64 val; + u64 val = 0; /* Avoid uninitialized value for safe variant. */
if (pmu_msr_read(msr, &val, err)) return val;
- val = native_read_msr_safe(msr, err); + if (err) + val = native_read_msr_safe(msr, err); + else + val = native_read_msr(msr); + switch (msr) { case MSR_IA32_APICBASE: val &= ~X2APIC_ENABLE; @@@ -933,23 -946,39 +947,39 @@@ return val; }
- static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) + static void set_seg(unsigned int which, unsigned int low, unsigned int high, + int *err) { - int ret; - unsigned int which; - u64 base; + u64 base = ((u64)high << 32) | low;
- ret = 0; + if (HYPERVISOR_set_segment_base(which, base) == 0) + return;
+ if (err) + *err = -EIO; + else + WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); + } + + /* + * Support write_msr_safe() and write_msr() semantics. + * With err == NULL write_msr() semantics are selected. + * Supplying an err pointer requires err to be pre-initialized with 0. + */ + static void xen_do_write_msr(unsigned int msr, unsigned int low, + unsigned int high, int *err) + { switch (msr) { - case MSR_FS_BASE: which = SEGBASE_FS; goto set; - case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; - case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; - - set: - base = ((u64)high << 32) | low; - if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EIO; + case MSR_FS_BASE: + set_seg(SEGBASE_FS, low, high, err); + break; + + case MSR_KERNEL_GS_BASE: + set_seg(SEGBASE_GS_USER, low, high, err); + break; + + case MSR_GS_BASE: + set_seg(SEGBASE_GS_KERNEL, low, high, err); break;
case MSR_STAR: @@@ -965,31 -994,42 +995,42 @@@ break;
default: - if (!pmu_msr_write(msr, low, high, &ret)) - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, err)) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); + } } + } + + static u64 xen_read_msr_safe(unsigned int msr, int *err) + { + return xen_do_read_msr(msr, err); + } + + static int xen_write_msr_safe(unsigned int msr, unsigned int low, + unsigned int high) + { + int err = 0; + + xen_do_write_msr(msr, low, high, &err);
- return ret; + return err; }
static u64 xen_read_msr(unsigned int msr) { - /* - * This will silently swallow a #GP from RDMSR. It may be worth - * changing that. - */ int err;
- return xen_read_msr_safe(msr, &err); + return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); }
static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) { - /* - * This will silently swallow a #GP from WRMSR. It may be worth - * changing that. - */ - xen_write_msr_safe(msr, low, high); + int err; + + xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); }
/* This is called once we have the cpu_possible_mask */