The following commit has been merged in the master branch: commit 3e021389bef93231b68446ef5bbdfa58b9d06d8b Merge: 6cfdd67febd1a9670eaeed3744cff411ded5e186 f813614f531114db796ad66ced75c5dc8db7aa3a Author: Stephen Rothwell sfr@canb.auug.org.au Date: Tue Jan 30 11:07:14 2018 +1100
Merge remote-tracking branch 'net-next/master'
diff --combined Documentation/fault-injection/fault-injection.txt index 1c0bfbbf7869,f4a32463ca48..de1dc35fe500 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@@ -1,7 -1,7 +1,7 @@@ Fault injection capabilities infrastructure ===========================================
-See also drivers/md/faulty.c and "every_nth" module option for scsi_debug. +See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug.
Available fault injection capabilities @@@ -30,6 -30,12 +30,12 @@@ o fail_mmc_reques injects MMC data errors on devices permitted by setting debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
+ o fail_function + + injects error return on specific functions, which are marked by + ALLOW_ERROR_INJECTION() macro, by setting debugfs entries + under /sys/kernel/debug/fail_function. No boot option supported. + Configure fault-injection capabilities behavior -----------------------------------------------
@@@ -123,6 -129,29 +129,29 @@@ configuration of fault-injection capabi default is 'N', setting it to 'Y' will disable failure injections when dealing with private (address space) futexes.
+ - /sys/kernel/debug/fail_function/inject: + + Format: { 'function-name' | '!function-name' | '' } + specifies the target function of error injection by name. + If the function name leads '!' prefix, given function is + removed from injection list. If nothing specified ('') + injection list is cleared. + + - /sys/kernel/debug/fail_function/injectable: + + (read only) shows error injectable functions and what type of + error values can be specified. The error type will be one of + below; + - NULL: retval must be 0. + - ERRNO: retval must be -1 to -MAX_ERRNO (-4096). + - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096). + + - /sys/kernel/debug/fail_function/<functiuon-name>/retval: + + specifies the "error" return value to inject to the given + function for given function. This will be created when + user specifies new injection entry. + o Boot option
In order to inject faults while debugfs is not available (early boot time), @@@ -268,6 -297,45 +297,45 @@@ trap "echo 0 > /sys/kernel/debug/$FAILT echo "Injecting errors into the module $module... (interrupt to stop)" sleep 1000000
+ ------------------------------------------------------------------------------ + + o Inject open_ctree error while btrfs mount + + #!/bin/bash + + rm -f testfile.img + dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 + DEVICE=$(losetup --show -f testfile.img) + mkfs.btrfs -f $DEVICE + mkdir -p tmpmnt + + FAILTYPE=fail_function + FAILFUNC=open_ctree + echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject + echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval + echo N > /sys/kernel/debug/$FAILTYPE/task-filter + echo 100 > /sys/kernel/debug/$FAILTYPE/probability + echo 0 > /sys/kernel/debug/$FAILTYPE/interval + echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 1 > /sys/kernel/debug/$FAILTYPE/verbose + + mount -t btrfs $DEVICE tmpmnt + if [ $? -ne 0 ] + then + echo "SUCCESS!" + else + echo "FAILED!" + umount tmpmnt + fi + + echo > /sys/kernel/debug/$FAILTYPE/inject + + rmdir tmpmnt + losetup -d $DEVICE + rm testfile.img + + Tool to run command with failslab or fail_page_alloc ---------------------------------------------------- In order to make it easier to accomplish the tasks mentioned above, we can use diff --combined MAINTAINERS index 09fccc4420d6,884ee9601707..88f2986eb766 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -329,7 -329,7 +329,7 @@@ F: drivers/acpi/apei
ACPI COMPONENT ARCHITECTURE (ACPICA) M: Robert Moore robert.moore@intel.com -M: Lv Zheng lv.zheng@intel.com +M: Erik Schmauss erik.schmauss@intel.com M: "Rafael J. Wysocki" rafael.j.wysocki@intel.com L: linux-acpi@vger.kernel.org L: devel@acpica.org @@@ -1263,12 -1263,6 +1263,12 @@@ L: linux-arm-kernel@lists.infradead.or S: Supported F: drivers/net/ethernet/cavium/thunder/
+ARM/CIRRUS LOGIC BK3 MACHINE SUPPORT +M: Lukasz Majewski lukma@denx.de +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-ep93xx/ts72xx.c + ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE M: Alexander Shiyan shc_work@mail.ru L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -1341,8 -1335,10 +1341,10 @@@ T: git git://github.com/ulli-kroll/linu S: Maintained F: Documentation/devicetree/bindings/arm/gemini.txt F: Documentation/devicetree/bindings/pinctrl/cortina,gemini-pinctrl.txt + F: Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt F: Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt F: arch/arm/mach-gemini/ + F: drivers/net/ethernet/cortina/gemini/* F: drivers/pinctrl/pinctrl-gemini.c F: drivers/rtc/rtc-ftrtc010.c
@@@ -1597,7 -1593,6 +1599,7 @@@ F: arch/arm/boot/dts/kirkwood F: arch/arm/configs/mvebu_*_defconfig F: arch/arm/mach-mvebu/ F: arch/arm64/boot/dts/marvell/armada* +F: drivers/cpufreq/armada-37xx-cpufreq.c F: drivers/cpufreq/mvebu-cpufreq.c F: drivers/irqchip/irq-armada-370-xp.c F: drivers/irqchip/irq-mvebu-* @@@ -1650,38 -1645,14 +1652,38 @@@ ARM/NEC MOBILEPRO 900/c MACHINE SUPPOR M: Michael Petchkovsky mkpetch@internode.on.net S: Maintained
-ARM/NOMADIK ARCHITECTURE -M: Alessandro Rubini rubini@unipv.it +ARM/NOMADIK/U300/Ux500 ARCHITECTURES M: Linus Walleij linus.walleij@linaro.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-nomadik/ -F: drivers/pinctrl/nomadik/ +F: arch/arm/mach-u300/ +F: arch/arm/mach-ux500/ +F: arch/arm/boot/dts/ste-* +F: drivers/clk/clk-nomadik.c +F: drivers/clk/clk-u300.c +F: drivers/clocksource/clksrc-dbx500-prcmu.c +F: drivers/clocksource/timer-u300.c +F: drivers/dma/coh901318* +F: drivers/dma/ste_dma40* +F: drivers/hwspinlock/u8500_hsem.c F: drivers/i2c/busses/i2c-nomadik.c +F: drivers/i2c/busses/i2c-stu300.c +F: drivers/mfd/ab3100* +F: drivers/mfd/ab8500* +F: drivers/mfd/abx500* +F: drivers/mfd/dbx500* +F: drivers/mfd/db8500* +F: drivers/pinctrl/nomadik/ +F: drivers/pinctrl/pinctrl-coh901* +F: drivers/pinctrl/pinctrl-u300.c +F: drivers/rtc/rtc-ab3100.c +F: drivers/rtc/rtc-ab8500.c +F: drivers/rtc/rtc-coh901331.c +F: drivers/rtc/rtc-pl031.c +F: drivers/watchdog/coh901327_wdt.c +F: Documentation/devicetree/bindings/arm/ste-* +F: Documentation/devicetree/bindings/arm/ux500/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
ARM/NUVOTON W90X900 ARM ARCHITECTURE @@@ -1995,10 -1966,9 +1997,10 @@@ N: stm3 F: drivers/clocksource/armv7m_systick.c
ARM/TANGO ARCHITECTURE -M: Marc Gonzalez marc_gonzalez@sigmadesigns.com +M: Marc Gonzalez marc.w.gonzalez@free.fr +M: Mans Rullgard mans@mansr.com L: linux-arm-kernel@lists.infradead.org -S: Maintained +S: Odd Fixes N: tango
ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT @@@ -2062,6 -2032,21 +2064,6 @@@ M: Dmitry Eremin-Solenikov <dbaryshkov@ M: Dirk Opfer dirk@opfer-online.de S: Maintained
-ARM/U300 MACHINE SUPPORT -M: Linus Walleij linus.walleij@linaro.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Supported -F: arch/arm/mach-u300/ -F: drivers/clocksource/timer-u300.c -F: drivers/i2c/busses/i2c-stu300.c -F: drivers/rtc/rtc-coh901331.c -F: drivers/watchdog/coh901327_wdt.c -F: drivers/dma/coh901318* -F: drivers/mfd/ab3100* -F: drivers/rtc/rtc-ab3100.c -F: drivers/rtc/rtc-coh901331.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git - ARM/UNIPHIER ARCHITECTURE M: Masahiro Yamada yamada.masahiro@socionext.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -2083,6 -2068,24 +2085,6 @@@ F: drivers/reset/reset-uniphier. F: drivers/tty/serial/8250/8250_uniphier.c N: uniphier
-ARM/Ux500 ARM ARCHITECTURE -M: Linus Walleij linus.walleij@linaro.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained -F: arch/arm/mach-ux500/ -F: drivers/clocksource/clksrc-dbx500-prcmu.c -F: drivers/dma/ste_dma40* -F: drivers/hwspinlock/u8500_hsem.c -F: drivers/mfd/abx500* -F: drivers/mfd/ab8500* -F: drivers/mfd/dbx500* -F: drivers/mfd/db8500* -F: drivers/pinctrl/nomadik/pinctrl-ab* -F: drivers/pinctrl/nomadik/pinctrl-nomadik* -F: drivers/rtc/rtc-ab8500.c -F: drivers/rtc/rtc-pl031.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git - ARM/Ux500 CLOCK FRAMEWORK SUPPORT M: Ulf Hansson ulf.hansson@linaro.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -2209,6 -2212,14 +2211,6 @@@ L: linux-leds@vger.kernel.or S: Maintained F: drivers/leds/leds-as3645a.c
-AS3645A LED FLASH CONTROLLER DRIVER -M: Laurent Pinchart laurent.pinchart@ideasonboard.com -L: linux-media@vger.kernel.org -T: git git://linuxtv.org/media_tree.git -S: Maintained -F: drivers/media/i2c/as3645a.c -F: include/media/i2c/as3645a.h - ASAHI KASEI AK8974 DRIVER M: Linus Walleij linus.walleij@linaro.org L: linux-iio@vger.kernel.org @@@ -2261,9 -2272,7 +2263,9 @@@ F: include/linux/async_tx. AT24 EEPROM DRIVER M: Bartosz Golaszewski brgl@bgdev.pl L: linux-i2c@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git S: Maintained +F: Documentation/devicetree/bindings/eeprom/at24.txt F: drivers/misc/eeprom/at24.c F: include/linux/platform_data/at24.h
@@@ -2384,6 -2393,13 +2386,6 @@@ F: Documentation/devicetree/bindings/in F: drivers/input/touchscreen/atmel_mxt_ts.c F: include/linux/platform_data/atmel_mxt_ts.h
-ATMEL NAND DRIVER -M: Wenyou Yang wenyou.yang@atmel.com -M: Josh Wu rainyfeeling@outlook.com -L: linux-mtd@lists.infradead.org -S: Supported -F: drivers/mtd/nand/atmel/* - ATMEL SAMA5D2 ADC DRIVER M: Ludovic Desroches ludovic.desroches@microchip.com L: linux-iio@vger.kernel.org @@@ -2495,8 -2511,6 +2497,8 @@@ L: linux-arm-kernel@lists.infradead.or S: Maintained F: Documentation/devicetree/bindings/arm/axentia.txt F: arch/arm/boot/dts/at91-linea.dtsi +F: arch/arm/boot/dts/at91-natte.dtsi +F: arch/arm/boot/dts/at91-nattis-2-natte-2.dts F: arch/arm/boot/dts/at91-tse850-3.dts
AXENTIA ASOC DRIVERS @@@ -2560,6 -2574,7 +2562,7 @@@ S: Maintaine F: Documentation/ABI/testing/sysfs-class-net-batman-adv F: Documentation/ABI/testing/sysfs-class-net-mesh F: Documentation/networking/batman-adv.rst + F: include/uapi/linux/batadv_packet.h F: include/uapi/linux/batman_adv.h F: net/batman-adv/
@@@ -2683,7 -2698,6 +2686,6 @@@ F: drivers/mtd/devices/block2mtd.
BLUETOOTH DRIVERS M: Marcel Holtmann marcel@holtmann.org - M: Gustavo Padovan gustavo@padovan.org M: Johan Hedberg johan.hedberg@gmail.com L: linux-bluetooth@vger.kernel.org W: http://www.bluez.org/ @@@ -2694,7 -2708,6 +2696,6 @@@ F: drivers/bluetooth
BLUETOOTH SUBSYSTEM M: Marcel Holtmann marcel@holtmann.org - M: Gustavo Padovan gustavo@padovan.org M: Johan Hedberg johan.hedberg@gmail.com L: linux-bluetooth@vger.kernel.org W: http://www.bluez.org/ @@@ -2719,12 -2732,16 +2720,16 @@@ M: Alexei Starovoitov <ast@kernel.org M: Daniel Borkmann daniel@iogearbox.net L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org + T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git + T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git S: Supported F: arch/x86/net/bpf_jit* F: Documentation/networking/filter.txt F: Documentation/bpf/ F: include/linux/bpf* F: include/linux/filter.h + F: include/trace/events/bpf.h + F: include/trace/events/xdp.h F: include/uapi/linux/bpf* F: include/uapi/linux/filter.h F: kernel/bpf/ @@@ -2828,8 -2845,6 +2833,8 @@@ S: Maintaine F: arch/arm/mach-bcm/*brcmstb* F: arch/arm/boot/dts/bcm7*.dts* F: drivers/bus/brcmstb_gisb.c +F: arch/arm/mm/cache-b15-rac.c +F: arch/arm/include/asm/hardware/cache-b15-rac.h N: brcmstb
BROADCOM BMIPS CPUFREQ DRIVER @@@ -3191,7 -3206,7 +3196,7 @@@ W: https://github.com/linux-ca T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git S: Maintained - F: Documentation/networking/can.txt + F: Documentation/networking/can.rst F: net/can/ F: include/linux/can/core.h F: include/uapi/linux/can.h @@@ -4332,12 -4347,10 +4337,12 @@@ T: git git://git.infradead.org/users/hc W: http://git.infradead.org/users/hch/dma-mapping.git S: Supported F: lib/dma-debug.c -F: lib/dma-noop.c +F: lib/dma-direct.c F: lib/dma-virt.c F: drivers/base/dma-mapping.c F: drivers/base/dma-coherent.c +F: include/asm-generic/dma-mapping.h +F: include/linux/dma-direct.h F: include/linux/dma-mapping.h
DME1737 HARDWARE MONITOR DRIVER @@@ -4939,6 -4952,11 +4944,11 @@@ S: Maintaine F: lib/dynamic_debug.c F: include/linux/dynamic_debug.h
+ DYNAMIC INTERRUPT MODERATION + M: Tal Gilboa talgi@mellanox.com + S: Maintained + F: include/linux/net_dim.h + DZ DECSTATION DZ11 SERIAL DRIVER M: "Maciej W. Rozycki" macro@linux-mips.org S: Maintained @@@ -5139,12 -5157,6 +5149,12 @@@ L: linux-edac@vger.kernel.or S: Maintained F: drivers/edac/skx_edac.c
+EDAC-TI +M: Tero Kristo t-kristo@ti.com +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/ti_edac.c + EDIROL UA-101/UA-1000 DRIVER M: Clemens Ladisch clemens@ladisch.de L: alsa-devel@alsa-project.org (moderated for non-subscribers) @@@ -6615,11 -6627,15 +6625,11 @@@ L: linux-i2c@vger.kernel.or S: Maintained F: drivers/i2c/i2c-stub.c
-i386 BOOT CODE -M: "H. Peter Anvin" hpa@zytor.com -S: Maintained -F: arch/x86/boot/ - -i386 SETUP CODE / CPU ERRATA WORKAROUNDS -M: "H. Peter Anvin" hpa@zytor.com -T: git git://git.kernel.org/pub/scm/linux/kernel/git/hpa/linux-2.6-x86setup.git +I2C MV64XXX MARVELL AND ALLWINNER DRIVER +M: Gregory CLEMENT gregory.clement@free-electrons.com +L: linux-i2c@vger.kernel.org S: Maintained +F: drivers/i2c/busses/i2c-mv64xxx.c
IA64 (Itanium) PLATFORM M: Tony Luck tony.luck@intel.com @@@ -7075,14 -7091,6 +7085,14 @@@ R: Dan Williams <dan.j.williams@intel.c S: Odd fixes F: drivers/dma/iop-adma.c
+INTEL IPU3 CSI-2 CIO2 DRIVER +M: Yong Zhi yong.zhi@intel.com +M: Sakari Ailus sakari.ailus@linux.intel.com +L: linux-media@vger.kernel.org +S: Maintained +F: drivers/media/pci/intel/ipu3/ +F: Documentation/media/uapi/v4l/pixfmt-srggb10-ipu3.rst + INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT M: Krzysztof Halasa khalasa@piap.pl S: Maintained @@@ -8417,13 -8425,6 +8427,13 @@@ L: linux-wireless@vger.kernel.or S: Odd Fixes F: drivers/net/wireless/marvell/mwl8k.c
+MARVELL NAND CONTROLLER DRIVER +M: Miquel Raynal miquel.raynal@free-electrons.com +L: linux-mtd@lists.infradead.org +S: Maintained +F: drivers/mtd/nand/marvell_nand.c +F: Documentation/devicetree/bindings/mtd/marvell-nand.txt + MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER M: Nicolas Pitre nico@fluxnic.net S: Odd Fixes @@@ -8695,15 -8696,6 +8705,15 @@@ T: git git://linuxtv.org/media_tree.gi S: Maintained F: drivers/media/dvb-frontends/stv6111*
+MEDIA DRIVERS FOR NVIDIA TEGRA - VDE +M: Dmitry Osipenko digetx@gmail.com +L: linux-media@vger.kernel.org +L: linux-tegra@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt +F: drivers/staging/media/tegra-vde/ + MEDIA INPUT INFRASTRUCTURE (V4L/DVB) M: Mauro Carvalho Chehab mchehab@s-opensource.com M: Mauro Carvalho Chehab mchehab@kernel.org @@@ -8747,6 -8739,13 +8757,13 @@@ L: netdev@vger.kernel.or S: Maintained F: drivers/net/ethernet/mediatek/
+ MEDIATEK SWITCH DRIVER + M: Sean Wang sean.wang@mediatek.com + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/dsa/mt7530.* + F: net/dsa/tag_mtk.c + MEDIATEK JPEG DRIVER M: Rick Chang rick.chang@mediatek.com M: Bin Liu bin.liu@mediatek.com @@@ -8980,7 -8979,7 +8997,7 @@@ L: linux-mtd@lists.infradead.or W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git master -T: git git://git.infradead.org/l2-mtd.git master +T: git git://git.infradead.org/linux-mtd.git mtd/next S: Maintained F: Documentation/devicetree/bindings/mtd/ F: drivers/mtd/ @@@ -9069,14 -9068,6 +9086,14 @@@ F: drivers/media/platform/atmel/atmel-i F: drivers/media/platform/atmel/atmel-isc-regs.h F: devicetree/bindings/media/atmel-isc.txt
+MICROCHIP / ATMEL NAND DRIVER +M: Wenyou Yang wenyou.yang@microchip.com +M: Josh Wu rainyfeeling@outlook.com +L: linux-mtd@lists.infradead.org +S: Supported +F: drivers/mtd/nand/atmel/* +F: Documentation/devicetree/bindings/mtd/atmel-nand.txt + MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER M: Woojung Huh Woojung.Huh@microchip.com M: Microchip Linux Driver Support UNGLinuxDriver@microchip.com @@@ -9128,7 -9119,6 +9145,7 @@@ S: Supporte F: Documentation/devicetree/bindings/mips/ F: Documentation/mips/ F: arch/mips/ +F: drivers/platform/mips/
MIPS BOSTON DEVELOPMENT BOARD M: Paul Burton paul.burton@mips.com @@@ -9156,25 -9146,6 +9173,25 @@@ F: arch/mips/include/asm/mach-loongson3 F: drivers/*/*loongson1* F: drivers/*/*/*loongson1*
+MIPS/LOONGSON2 ARCHITECTURE +M: Jiaxun Yang jiaxun.yang@flygoat.com +L: linux-mips@linux-mips.org +S: Maintained +F: arch/mips/loongson64/*{2e/2f}* +F: arch/mips/include/asm/mach-loongson64/ +F: drivers/*/*loongson2* +F: drivers/*/*/*loongson2* + +MIPS/LOONGSON3 ARCHITECTURE +M: Huacai Chen chenhc@lemote.com +L: linux-mips@linux-mips.org +S: Maintained +F: arch/mips/loongson64/ +F: arch/mips/include/asm/mach-loongson64/ +F: drivers/platform/mips/cpu_hwmon.c +F: drivers/*/*loongson3* +F: drivers/*/*/*loongson3* + MIPS RINT INSTRUCTION EMULATION M: Aleksandar Markovic aleksandar.markovic@mips.com L: linux-mips@linux-mips.org @@@ -9397,7 -9368,7 +9414,7 @@@ L: linux-mtd@lists.infradead.or W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git nand/fixes -T: git git://git.infradead.org/l2-mtd.git nand/next +T: git git://git.infradead.org/linux-mtd.git nand/next S: Maintained F: drivers/mtd/nand/ F: include/linux/mtd/*nand*.h @@@ -9652,6 -9623,11 +9669,11 @@@ NETWORKING [WIRELESS L: linux-wireless@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-wireless/list/
+ NETDEVSIM + M: Jakub Kicinski jakub.kicinski@netronome.com + S: Maintained + F: drivers/net/netdevsim/* + NETXEN (1/10) GbE SUPPORT M: Manish Chopra manish.chopra@cavium.com M: Rahul Verma rahul.verma@cavium.com @@@ -9798,15 -9774,6 +9820,15 @@@ S: Supporte F: Documentation/filesystems/ntfs.txt F: fs/ntfs/
+NUBUS SUBSYSTEM +M: Finn Thain fthain@telegraphics.com.au +L: linux-m68k@lists.linux-m68k.org +S: Maintained +F: arch/*/include/asm/nubus.h +F: drivers/nubus/ +F: include/linux/nubus.h +F: include/uapi/linux/nubus.h + NVIDIA (rivafb and nvidiafb) FRAMEBUFFER DRIVER M: Antonino Daplas adaplas@gmail.com L: linux-fbdev@vger.kernel.org @@@ -9867,7 -9834,6 +9889,7 @@@ NXP TFA9879 DRIVE M: Peter Rosin peda@axentia.se L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained +F: Documentation/devicetree/bindings/sound/tfa9879.txt F: sound/soc/codecs/tfa9879*
NXP-NCI NFC DRIVER @@@ -9882,18 -9848,6 +9904,18 @@@ M: Josh Poimboeuf <jpoimboe@redhat.com S: Supported F: tools/objtool/
+OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER +M: Frederic Barrat fbarrat@linux.vnet.ibm.com +M: Andrew Donnellan andrew.donnellan@au1.ibm.com +L: linuxppc-dev@lists.ozlabs.org +S: Supported +F: arch/powerpc/platforms/powernv/ocxl.c +F: arch/powerpc/include/asm/pnv-ocxl.h +F: drivers/misc/ocxl/ +F: include/misc/ocxl* +F: include/uapi/misc/ocxl.h +F: Documentation/accelerators/ocxl.txt + OMAP AUDIO SUPPORT M: Peter Ujfalusi peter.ujfalusi@ti.com M: Jarkko Nikula jarkko.nikula@bitmer.com @@@ -10128,14 -10082,6 +10150,14 @@@ S: Maintaine F: drivers/media/i2c/ov7670.c F: Documentation/devicetree/bindings/media/i2c/ov7670.txt
+OMNIVISION OV7740 SENSOR DRIVER +M: Wenyou Yang wenyou.yang@microchip.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov7740.c +F: Documentation/devicetree/bindings/media/i2c/ov7740.txt + ONENAND FLASH DRIVER M: Kyungmin Park kyungmin.park@samsung.com L: linux-mtd@lists.infradead.org @@@ -10634,12 -10580,8 +10656,12 @@@ T: git git://git.kernel.org/pub/scm/lin S: Supported F: Documentation/devicetree/bindings/pci/ F: Documentation/PCI/ +F: drivers/acpi/pci* F: drivers/pci/ +F: include/asm-generic/pci* F: include/linux/pci* +F: include/uapi/linux/pci* +F: lib/pci* F: arch/x86/pci/ F: arch/x86/kernel/quirks.c
@@@ -10978,7 -10920,6 +11000,7 @@@ F: include/linux/pm. F: include/linux/pm_* F: include/linux/powercap.h F: drivers/powercap/ +F: kernel/configs/nopm.config
POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland mark.rutland@arm.com @@@ -11740,8 -11681,8 +11762,8 @@@ F: drivers/mtd/nand/r852. RISC-V ARCHITECTURE M: Palmer Dabbelt palmer@sifive.com M: Albert Ou albert@sifive.com -L: patches@groups.riscv.org -T: git https://github.com/riscv/riscv-linux +L: linux-riscv@lists.infradead.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/palmer/riscv-linux.git S: Supported F: arch/riscv/ K: riscv @@@ -11848,15 -11789,13 +11870,13 @@@ T: git git://git.kernel.org/pub/scm/lin S: Maintained F: drivers/net/wireless/realtek/rtl818x/rtl8187/
- RTL8192CE WIRELESS DRIVER - M: Larry Finger Larry.Finger@lwfinger.net - M: Chaoming Li chaoming_li@realsil.com.cn + REALTEK WIRELESS DRIVER (rtlwifi family) + M: Ping-Ke Shih pkshih@realtek.com L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git S: Maintained F: drivers/net/wireless/realtek/rtlwifi/ - F: drivers/net/wireless/realtek/rtlwifi/rtl8192ce/
RTL8XXXU WIRELESS DRIVER (rtl8xxxu) M: Jes Sorensen Jes.Sorensen@gmail.com @@@ -12020,13 -11959,6 +12040,13 @@@ S: Maintaine F: drivers/crypto/exynos-rng.c F: Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
+SAMSUNG EXYNOS TRUE RANDOM NUMBER GENERATOR (TRNG) DRIVER +M: Łukasz Stelmach l.stelmach@samsung.com +L: linux-samsung-soc@vger.kernel.org +S: Maintained +F: drivers/char/hw_random/exynos-trng.c +F: Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt + SAMSUNG FRAMEBUFFER DRIVER M: Jingoo Han jingoohan1@gmail.com L: linux-fbdev@vger.kernel.org @@@ -12089,7 -12021,6 +12109,7 @@@ F: drivers/media/i2c/s5k5baf. SAMSUNG S5P Security SubSystem (SSS) DRIVER M: Krzysztof Kozlowski krzk@kernel.org M: Vladimir Zapolskiy vz@mleia.com +M: Kamil Konieczny k.konieczny@partner.samsung.com L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@@ -12434,14 -12365,6 +12454,14 @@@ T: git git://linuxtv.org/anttip/media_t S: Maintained F: drivers/media/tuners/si2157*
+SI2165 MEDIA DRIVER +M: Matthias Schwarzott zzam@gentoo.org +L: linux-media@vger.kernel.org +W: https://linuxtv.org +Q: http://patchwork.linuxtv.org/project/linux-media/list/ +S: Maintained +F: drivers/media/dvb-frontends/si2165* + SI2168 MEDIA DRIVER M: Antti Palosaari crope@iki.fi L: linux-media@vger.kernel.org @@@ -12697,12 -12620,6 +12717,12 @@@ F: include/media/soc F: drivers/media/i2c/soc_camera/ F: drivers/media/platform/soc_camera/
+SOCIONEXT UNIPHIER SOUND DRIVER +M: Katsuhiro Suzuki suzuki.katsuhiro@socionext.com +L: alsa-devel@alsa-project.org (moderated for non-subscribers) +S: Maintained +F: sound/soc/uniphier/ + SOEKRIS NET48XX LED SUPPORT M: Chris Boot bootc@bootc.net S: Maintained @@@ -12727,15 -12644,6 +12747,15 @@@ L: linux-media@vger.kernel.or S: Supported F: drivers/media/pci/solo6x10/
+SOFTWARE DELEGATED EXCEPTION INTERFACE (SDEI) +M: James Morse james.morse@arm.com +L: linux-arm-kernel@lists.infradead.org +S: Maintained +F: Documentation/devicetree/bindings/arm/firmware/sdei.txt +F: drivers/firmware/arm_sdei.c +F: include/linux/sdei.h +F: include/uapi/linux/sdei.h + SOFTWARE RAID (Multiple Disks) SUPPORT M: Shaohua Li shli@kernel.org L: linux-raid@vger.kernel.org @@@ -12748,6 -12656,13 +12768,13 @@@ F: drivers/md/raid F: include/linux/raid/ F: include/uapi/linux/raid/
+ SOCIONEXT (SNI) NETSEC NETWORK DRIVER + M: Jassi Brar jaswinder.singh@linaro.org + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/ethernet/socionext/netsec.c + F: Documentation/devicetree/bindings/net/socionext-netsec.txt + SONIC NETWORK DRIVER M: Thomas Bogendoerfer tsbogend@alpha.franken.de L: netdev@vger.kernel.org @@@ -12900,7 -12815,7 +12927,7 @@@ L: linux-mtd@lists.infradead.or W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git spi-nor/fixes -T: git git://git.infradead.org/l2-mtd.git spi-nor/next +T: git git://git.infradead.org/linux-mtd.git spi-nor/next S: Maintained F: drivers/mtd/spi-nor/ F: include/linux/mtd/spi-nor.h @@@ -12987,6 -12902,12 +13014,6 @@@ S: Odd Fixe F: Documentation/devicetree/bindings/staging/iio/ F: drivers/staging/iio/
-STAGING - LIRC (LINUX INFRARED REMOTE CONTROL) DRIVERS -M: Jarod Wilson jarod@wilsonet.com -W: http://www.lirc.org/ -S: Odd Fixes -F: drivers/staging/media/lirc/ - STAGING - LUSTRE PARALLEL FILESYSTEM M: Oleg Drokin oleg.drokin@intel.com M: Andreas Dilger andreas.dilger@intel.com @@@ -13147,7 -13068,7 +13174,7 @@@ F: arch/x86/boot/video
SWIOTLB SUBSYSTEM M: Konrad Rzeszutek Wilk konrad.wilk@oracle.com -L: linux-kernel@vger.kernel.org +L: iommu@lists.linux-foundation.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git S: Supported F: lib/swiotlb.c @@@ -13368,15 -13289,6 +13395,15 @@@ T: git git://linuxtv.org/anttip/media_t S: Maintained F: drivers/media/tuners/tda18218*
+TDA18250 MEDIA DRIVER +M: Olli Salonen olli.salonen@iki.fi +L: linux-media@vger.kernel.org +W: https://linuxtv.org +Q: http://patchwork.linuxtv.org/project/linux-media/list/ +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/tuners/tda18250* + TDA18271 MEDIA DRIVER M: Michael Krufky mkrufky@linuxtv.org L: linux-media@vger.kernel.org @@@ -13973,13 -13885,6 +14000,13 @@@ T: git git://git.kernel.org/pub/scm/lin S: Maintained K: ^Subject:.*(?i)trivial
+TEMPO SEMICONDUCTOR DRIVERS +M: Steven Eckhoff steven.eckhoff.opensource@gmail.com +S: Maintained +F: sound/soc/codecs/tscs*.c +F: sound/soc/codecs/tscs*.h +F: Documentation/devicetree/bindings/sound/tscs*.txt + TTY LAYER M: Greg Kroah-Hartman gregkh@linuxfoundation.org M: Jiri Slaby jslaby@suse.com @@@ -14778,7 -14683,6 +14805,7 @@@ W: http://www.slimlogic.co.uk/?p=4 T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git S: Supported F: Documentation/devicetree/bindings/regulator/ +F: Documentation/power/regulator/ F: drivers/regulator/ F: include/dt-bindings/regulator/ F: include/linux/regulator/ @@@ -14876,9 -14780,9 +14903,9 @@@ S: Maintaine F: drivers/hid/hid-wiimote*
WILOCITY WIL6210 WIRELESS DRIVER - M: Maya Erez qca_merez@qca.qualcomm.com + M: Maya Erez merez@codeaurora.org L: linux-wireless@vger.kernel.org - L: wil6210@qca.qualcomm.com + L: wil6210@qti.qualcomm.com S: Supported W: http://wireless.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ @@@ -14989,7 -14893,7 +15016,7 @@@ F: net/x25 X86 ARCHITECTURE (32-BIT AND 64-BIT) M: Thomas Gleixner tglx@linutronix.de M: Ingo Molnar mingo@redhat.com -M: "H. Peter Anvin" hpa@zytor.com +R: "H. Peter Anvin" hpa@zytor.com M: x86@kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core @@@ -15099,7 -15003,6 +15126,7 @@@ F: include/xen/interface/io/vscsiif. XEN SWIOTLB SUBSYSTEM M: Konrad Rzeszutek Wilk konrad.wilk@oracle.com L: xen-devel@lists.xenproject.org (moderated for non-subscribers) +L: iommu@lists.linux-foundation.org S: Supported F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* diff --combined arch/Kconfig index 1b560e7f06dc,97376accfb14..d007b2a15b22 --- a/arch/Kconfig +++ b/arch/Kconfig @@@ -196,6 -196,9 +196,9 @@@ config HAVE_OPTPROBE config HAVE_KPROBES_ON_FTRACE bool
+ config HAVE_FUNCTION_ERROR_INJECTION + bool + config HAVE_NMI bool
@@@ -234,8 -237,8 +237,8 @@@ config ARCH_HAS_FORTIFY_SOURC config ARCH_HAS_SET_MEMORY bool
-# Select if arch init_task initializer is different to init/init_task.c -config ARCH_INIT_TASK +# Select if arch init_task must go in the __init_task_data section +config ARCH_TASK_STRUCT_ON_STACK bool
# Select if arch has its private alloc_task_struct() function @@@ -938,10 -941,6 +941,10 @@@ config STRICT_MODULE_RW and non-text memory will be made non-executable. This provides protection against certain security exploits (e.g. writing to text)
+# select if the architecture provides an asm/dma-direct.h header +config ARCH_HAS_PHYS_TO_DMA + bool + config ARCH_HAS_REFCOUNT bool help diff --combined arch/arm/boot/dts/imx25.dtsi index c43cf704b768,fcaff1c66bcb..9445f8e1473c --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@@ -122,7 -122,7 +122,7 @@@ };
can1: can@43f88000 { - compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx25-flexcan"; reg = <0x43f88000 0x4000>; interrupts = <43>; clocks = <&clks 75>, <&clks 75>; @@@ -131,7 -131,7 +131,7 @@@ };
can2: can@43f8c000 { - compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx25-flexcan"; reg = <0x43f8c000 0x4000>; interrupts = <44>; clocks = <&clks 76>, <&clks 76>; @@@ -628,13 -628,11 +628,13 @@@ usbphy0: usb-phy@0 { reg = <0>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; };
usbphy1: usb-phy@1 { reg = <1>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; }; }; diff --combined arch/arm/boot/dts/imx35.dtsi index f049c692c6b0,1f0e2203b576..e08c0c193767 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@@ -303,7 -303,7 +303,7 @@@ };
can1: can@53fe4000 { - compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx35-flexcan"; reg = <0x53fe4000 0x1000>; clocks = <&clks 33>, <&clks 33>; clock-names = "ipg", "per"; @@@ -312,7 -312,7 +312,7 @@@ };
can2: can@53fe8000 { - compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx35-flexcan"; reg = <0x53fe8000 0x1000>; clocks = <&clks 34>, <&clks 34>; clock-names = "ipg", "per"; @@@ -402,13 -402,11 +402,13 @@@ usbphy0: usb-phy@0 { reg = <0>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; };
usbphy1: usb-phy@1 { reg = <1>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; }; }; diff --combined arch/arm/boot/dts/imx53.dtsi index 38b31a37339b,85071ff8c639..1040251f2951 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@@ -116,28 -116,6 +116,28 @@@ }; };
+ pmu { + compatible = "arm,cortex-a8-pmu"; + interrupt-parent = <&tzic>; + interrupts = <77>; + }; + + usbphy0: usbphy-0 { + compatible = "usb-nop-xceiv"; + clocks = <&clks IMX5_CLK_USB_PHY1_GATE>; + clock-names = "main_clk"; + #phy-cells = <0>; + status = "okay"; + }; + + usbphy1: usbphy-1 { + compatible = "usb-nop-xceiv"; + clocks = <&clks IMX5_CLK_USB_PHY2_GATE>; + clock-names = "main_clk"; + #phy-cells = <0>; + status = "okay"; + }; + soc { #address-cells = <1>; #size-cells = <1>; @@@ -321,6 -299,20 +321,6 @@@ reg = <0x53f00000 0x60>; };
- usbphy0: usbphy-0 { - compatible = "usb-nop-xceiv"; - clocks = <&clks IMX5_CLK_USB_PHY1_GATE>; - clock-names = "main_clk"; - status = "okay"; - }; - - usbphy1: usbphy-1 { - compatible = "usb-nop-xceiv"; - clocks = <&clks IMX5_CLK_USB_PHY2_GATE>; - clock-names = "main_clk"; - status = "okay"; - }; - usbotg: usb@53f80000 { compatible = "fsl,imx53-usb", "fsl,imx27-usb"; reg = <0x53f80000 0x0200>; @@@ -441,13 -433,6 +441,13 @@@ clock-names = "ipg", "per"; };
+ srtc: rtc@53fa4000 { + compatible = "fsl,imx53-rtc"; + reg = <0x53fa4000 0x4000>; + interrupts = <24>; + clocks = <&clks IMX5_CLK_SRTC_GATE>; + }; + iomuxc: iomuxc@53fa8000 { compatible = "fsl,imx53-iomuxc"; reg = <0x53fa8000 0x4000>; @@@ -551,7 -536,7 +551,7 @@@ };
can1: can@53fc8000 { - compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx53-flexcan"; reg = <0x53fc8000 0x4000>; interrupts = <82>; clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, @@@ -561,7 -546,7 +561,7 @@@ };
can2: can@53fcc000 { - compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx53-flexcan"; reg = <0x53fcc000 0x4000>; interrupts = <83>; clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>, @@@ -828,5 -813,10 +828,5 @@@ reg = <0xf8000000 0x20000>; clocks = <&clks IMX5_CLK_OCRAM>; }; - - pmu { - compatible = "arm,cortex-a8-pmu"; - interrupts = <77>; - }; }; }; diff --combined arch/arm/boot/dts/ls1021a-qds.dts index bf15dc27ca53,7bb402d3e9d0..499f41a2c6f0 --- a/arch/arm/boot/dts/ls1021a-qds.dts +++ b/arch/arm/boot/dts/ls1021a-qds.dts @@@ -239,11 -239,6 +239,11 @@@ device-width = <1>; };
+ nand@2,0 { + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + fpga: board-control@3,0 { #address-cells = <1>; #size-cells = <1>; @@@ -336,3 -331,19 +336,19 @@@ &uart1 { status = "okay"; }; + + &can0 { + status = "okay"; + }; + + &can1 { + status = "okay"; + }; + + &can2 { + status = "disabled"; + }; + + &can3 { + status = "disabled"; + }; diff --combined arch/arm/boot/dts/ls1021a-twr.dts index b186c370ad54,860b898141f0..f0c949d74833 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@@ -228,10 -228,6 +228,10 @@@ }; };
+&esdhc { + status = "okay"; +}; + &sai1 { status = "okay"; }; @@@ -247,3 -243,19 +247,19 @@@ &uart1 { status = "okay"; }; + + &can0 { + status = "okay"; + }; + + &can1 { + status = "okay"; + }; + + &can2 { + status = "disabled"; + }; + + &can3 { + status = "disabled"; + }; diff --combined arch/arm/boot/dts/ls1021a.dtsi index c5edfa9a68a6,7789031898b0..c31dad98f989 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@@ -106,14 -106,6 +106,14 @@@ compatible = "arm,cortex-a7-pmu"; interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&cpu0>, <&cpu1>; + }; + + reboot { + compatible = "syscon-reboot"; + regmap = <&dcfg>; + offset = <0xb0>; + mask = <0x02>; };
soc { @@@ -162,22 -154,8 +162,22 @@@ big-endian; };
+ qspi: quadspi@1550000 { + compatible = "fsl,ls1021a-qspi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x1550000 0x0 0x10000>, + <0x0 0x40000000 0x0 0x40000000>; + reg-names = "QuadSPI", "QuadSPI-memory"; + interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; + clock-names = "qspi_en", "qspi"; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + big-endian; + status = "disabled"; + }; + esdhc: esdhc@1560000 { - compatible = "fsl,esdhc"; + compatible = "fsl,ls1021a-esdhc", "fsl,esdhc"; reg = <0x0 0x1560000 0x0 0x10000>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; clock-frequency = <0>; @@@ -597,7 -575,7 +597,7 @@@ fsl,tclk-period = <5>; fsl,tmr-prsc = <2>; fsl,tmr-add = <0xaaaaaaab>; - fsl,tmr-fiper1 = <999999990>; + fsl,tmr-fiper1 = <999999995>; fsl,tmr-fiper2 = <99990>; fsl,max-adj = <499999999>; }; @@@ -690,7 -668,7 +690,7 @@@ }; };
- usb@8600000 { + usb2: usb@8600000 { compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; reg = <0x0 0x8600000 0x0 0x1000>; interrupts = <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; @@@ -698,7 -676,7 +698,7 @@@ phy_type = "ulpi"; };
- usb3@3100000 { + usb3: usb3@3100000 { compatible = "snps,dwc3"; reg = <0x0 0x3100000 0x0 0x10000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; @@@ -752,5 -730,41 +752,41 @@@ <0000 0 0 3 &gic GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>, <0000 0 0 4 &gic GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>; }; + + can0: can@2a70000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2a70000 0x0 0x1000>; + interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; + + can1: can@2a80000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2a80000 0x0 0x1000>; + interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; + + can2: can@2a90000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2a90000 0x0 0x1000>; + interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; + + can3: can@2aa0000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2aa0000 0x0 0x1000>; + interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; }; }; diff --combined arch/x86/Kconfig index 94851684361d,bc2204f829d3..068d2ea392b0 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@ -54,7 -54,6 +54,7 @@@ config X8 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 @@@ -155,6 -154,7 +155,7 @@@ select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --combined drivers/infiniband/hw/qedr/main.c index b3786474e84a,a9c3378bca38..db4bf97c0e15 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@@ -264,7 -264,7 +264,7 @@@ static int qedr_register_device(struct static int qedr_alloc_mem_sb(struct qedr_dev *dev, struct qed_sb_info *sb_info, u16 sb_id) { - struct status_block *sb_virt; + struct status_block_e4 *sb_virt; dma_addr_t sb_phys; int rc;
@@@ -430,16 -430,59 +430,16 @@@ static void qedr_remove_sysfiles(struc
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev) { - struct pci_dev *bridge; - u32 ctl2, cap2; - u16 flags; - int rc; - - bridge = pdev->bus->self; - if (!bridge) - goto disable; - - /* Check atomic routing support all the way to root complex */ - while (bridge->bus->parent) { - rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags); - if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2)) - goto disable; - - rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2); - if (rc) - goto disable; + int rc = pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP64);
- rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl2); - if (rc) - goto disable; - - if (!(cap2 & PCI_EXP_DEVCAP2_ATOMIC_ROUTE) || - (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)) - goto disable; - bridge = bridge->bus->parent->self; + if (rc) { + dev->atomic_cap = IB_ATOMIC_NONE; + DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n"); + } else { + dev->atomic_cap = IB_ATOMIC_GLOB; + DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n"); } - - rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags); - if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2)) - goto disable; - - rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2); - if (rc || !(cap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64)) - goto disable; - - /* Set atomic operations */ - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, - PCI_EXP_DEVCTL2_ATOMIC_REQ); - dev->atomic_cap = IB_ATOMIC_GLOB; - - DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n"); - - return; - -disable: - pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2, - PCI_EXP_DEVCTL2_ATOMIC_REQ); - dev->atomic_cap = IB_ATOMIC_NONE; - - DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n"); - }
static const struct qed_rdma_ops *qed_ops; diff --combined drivers/net/ethernet/broadcom/bcmsysport.c index 9d7a834c5f62,f15a8fc6dfc9..c2969b260aed --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@@ -1156,7 -1156,7 +1156,7 @@@ static struct sk_buff *bcm_sysport_inse memset(tsb, 0, sizeof(*tsb));
if (skb->ip_summed == CHECKSUM_PARTIAL) { - ip_ver = htons(skb->protocol); + ip_ver = ntohs(skb->protocol); switch (ip_ver) { case ETH_P_IP: ip_proto = ip_hdr(skb)->protocol; @@@ -1216,18 -1216,6 +1216,6 @@@ static netdev_tx_t bcm_sysport_xmit(str goto out; }
- /* The Ethernet switch we are interfaced with needs packets to be at - * least 64 bytes (including FCS) otherwise they will be discarded when - * they enter the switch port logic. When Broadcom tags are enabled, we - * need to make sure that packets are at least 68 bytes - * (including FCS and tag) because the length verification is done after - * the Broadcom tag is stripped off the ingress packet. - */ - if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { - ret = NETDEV_TX_OK; - goto out; - } - /* Insert TSB and checksum infos */ if (priv->tsb_en) { skb = bcm_sysport_insert_tsb(skb, dev); diff --combined drivers/net/ethernet/broadcom/bnx2.c index 154866e8517a,5e34b34f7740..5de4c33f682e --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@@ -5818,8 -5818,8 +5818,8 @@@ bnx2_run_loopback(struct bnx2 *bp, int struct l2_fhdr *rx_hdr; int ret = -ENODEV; struct bnx2_napi *bnapi = &bp->bnx2_napi[0], *tx_napi; - struct bnx2_tx_ring_info *txr = &bnapi->tx_ring; - struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring; + struct bnx2_tx_ring_info *txr; + struct bnx2_rx_ring_info *rxr;
tx_napi = bnapi;
@@@ -8330,9 -8330,9 +8330,9 @@@ bnx2_init_board(struct pci_dev *pdev, s if (j < 32) bp->fw_version[j++] = ' '; for (i = 0; i < 3 && j < 28; i++) { - reg = bnx2_reg_rd_ind(bp, addr + i * 4); - reg = be32_to_cpu(reg); - memcpy(&bp->fw_version[j], ®, 4); + __be32 v; + v = cpu_to_be32(bnx2_reg_rd_ind(bp, addr + i * 4)); + memcpy(&bp->fw_version[j], &v, 4); j += 4; } } diff --combined drivers/net/ethernet/broadcom/genet/bcmgenet.c index 77154f1479a9,b1e35a9accf1..db97873cdc0a --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@@ -1321,7 -1321,7 +1321,7 @@@ static struct sk_buff *bcmgenet_free_tx dma_unmap_addr_set(cb, dma_addr, 0); }
- return 0; + return NULL; }
/* Simple helper to free a receive control block's resources */ @@@ -1480,7 -1480,7 +1480,7 @@@ static struct sk_buff *bcmgenet_put_tx_ status = (struct status_64 *)skb->data;
if (skb->ip_summed == CHECKSUM_PARTIAL) { - ip_ver = htons(skb->protocol); + ip_ver = ntohs(skb->protocol); switch (ip_ver) { case ETH_P_IP: ip_proto = ip_hdr(skb)->protocol; @@@ -2527,9 -2527,10 +2527,10 @@@ static void bcmgenet_irq_task(struct wo spin_unlock_irq(&priv->lock);
/* Link UP/DOWN event */ - if (status & UMAC_IRQ_LINK_EVENT) - phy_mac_interrupt(priv->dev->phydev, - !!(status & UMAC_IRQ_LINK_UP)); + if (status & UMAC_IRQ_LINK_EVENT) { + priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP); + phy_mac_interrupt(priv->dev->phydev); + } }
/* bcmgenet_isr1: handle Rx and Tx priority queues */ diff --combined drivers/net/ethernet/broadcom/tg3.c index 86ff8b49ee57,a77ee2f8fb8d..2bd77d9990f2 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@@ -3227,7 -3227,7 +3227,7 @@@ static int tg3_nvram_read_using_eeprom( return 0; }
- #define NVRAM_CMD_TIMEOUT 5000 + #define NVRAM_CMD_TIMEOUT 10000
static int tg3_nvram_exec_cmd(struct tg3 *tp, u32 nvram_cmd) { @@@ -3744,7 -3744,7 +3744,7 @@@ static int tg3_load_firmware_cpu(struc }
do { - u32 *fw_data = (u32 *)(fw_hdr + 1); + __be32 *fw_data = (__be32 *)(fw_hdr + 1); for (i = 0; i < tg3_fw_data_len(tp, fw_hdr); i++) write_op(tp, cpu_scratch_base + (be32_to_cpu(fw_hdr->base_addr) & 0xffff) + @@@ -14789,7 -14789,7 +14789,7 @@@ static void tg3_get_5717_nvram_info(str
static void tg3_get_5720_nvram_info(struct tg3 *tp) { - u32 nvcfg1, nvmpinstrp; + u32 nvcfg1, nvmpinstrp, nv_status;
nvcfg1 = tr32(NVRAM_CFG1); nvmpinstrp = nvcfg1 & NVRAM_CFG1_5752VENDOR_MASK; @@@ -14801,6 -14801,23 +14801,23 @@@ }
switch (nvmpinstrp) { + case FLASH_5762_MX25L_100: + case FLASH_5762_MX25L_200: + case FLASH_5762_MX25L_400: + case FLASH_5762_MX25L_800: + case FLASH_5762_MX25L_160_320: + tp->nvram_pagesize = 4096; + tp->nvram_jedecnum = JEDEC_MACRONIX; + tg3_flag_set(tp, NVRAM_BUFFERED); + tg3_flag_set(tp, NO_NVRAM_ADDR_TRANS); + tg3_flag_set(tp, FLASH); + nv_status = tr32(NVRAM_AUTOSENSE_STATUS); + tp->nvram_size = + (1 << (nv_status >> AUTOSENSE_DEVID & + AUTOSENSE_DEVID_MASK) + << AUTOSENSE_SIZE_IN_MB); + return; + case FLASH_5762_EEPROM_HD: nvmpinstrp = FLASH_5720_EEPROM_HD; break; diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 351f4bf37ca9,429467364219..590e56a5f449 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@@ -58,13 -58,6 +58,13 @@@ extern struct list_head adapter_list; extern struct mutex uld_mutex;
+/* Suspend an Ethernet Tx queue with fewer available descriptors than this. + * This is the same as calc_tx_descs() for a TSO packet with + * nr_frags == MAX_SKB_FRAGS. + */ +#define ETHTXQ_STOP_THRES \ + (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) + enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ @@@ -84,7 -77,8 +84,8 @@@ enum MEM_EDC1, MEM_MC, MEM_MC0 = MEM_MC, - MEM_MC1 + MEM_MC1, + MEM_HMA, };
enum { @@@ -318,6 -312,7 +319,7 @@@ struct vpd_params };
struct pci_params { + unsigned int vpd_cap_addr; unsigned char speed; unsigned char width; }; @@@ -570,7 -565,6 +572,7 @@@ enum {
enum { ULP_CRYPTO_LOOKASIDE = 1 << 0, + ULP_CRYPTO_IPSEC_INLINE = 1 << 1, };
struct rx_sw_desc; @@@ -826,12 -820,17 +828,17 @@@ struct vf_info unsigned char vf_mac_addr[ETH_ALEN]; unsigned int tx_rate; bool pf_set_mac; + u16 vlan; };
struct mbox_list { struct list_head list; };
+ struct mps_encap_entry { + atomic_t refcnt; + }; + struct adapter { void __iomem *regs; void __iomem *bar2; @@@ -846,6 -845,10 +853,10 @@@ enum chip_type chip;
int msg_enable; + __be16 vxlan_port; + u8 vxlan_port_cnt; + __be16 geneve_port; + u8 geneve_port_cnt;
struct adapter_params params; struct cxgb4_virt_res vres; @@@ -875,7 -878,10 +886,10 @@@ unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; + unsigned int rawf_start; + unsigned int rawf_cnt; struct smt_data *smt; + struct mps_encap_entry *mps_encap; struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; @@@ -975,11 -981,6 +989,11 @@@ enum SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ };
+struct tx_sw_desc { /* SW state per Tx descriptor */ + struct sk_buff *skb; + struct ulptx_sgl *sgl; +}; + /* Support for "sched_queue" command to allow one or more NIC TX Queues * to be bound to a TX Scheduling Class. */ @@@ -1317,6 -1318,7 +1331,7 @@@ void t4_sge_start(struct adapter *adap) void t4_sge_stop(struct adapter *adap); void cxgb4_set_ethtool_ops(struct net_device *netdev); int cxgb4_write_rss(const struct port_info *pi, const u16 *queues); + enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb); extern int dbfifo_int_thresh;
#define for_each_port(adapter, iter) \ @@@ -1435,6 -1437,21 +1450,21 @@@ static inline void init_rspq(struct ada q->size = size; }
+ /** + * t4_is_inserted_mod_type - is a plugged in Firmware Module Type + * @fw_mod_type: the Firmware Mofule Type + * + * Return whether the Firmware Module Type represents a real Transceiver + * Module/Cable Module Type which has been inserted. + */ + static inline bool t4_is_inserted_mod_type(unsigned int fw_mod_type) + { + return (fw_mod_type != FW_PORT_MOD_TYPE_NONE && + fw_mod_type != FW_PORT_MOD_TYPE_NOTSUPPORTED && + fw_mod_type != FW_PORT_MOD_TYPE_UNKNOWN && + fw_mod_type != FW_PORT_MOD_TYPE_ERROR); + } + void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); @@@ -1524,6 -1541,7 +1554,7 @@@ int t4_init_portinfo(struct port_info * int port, int pf, int vf, u8 mac[]); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); void t4_fatal_err(struct adapter *adapter); + unsigned int t4_chip_rss_size(struct adapter *adapter); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, int start, int n, const u16 *rspq, unsigned int nrspq); int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, @@@ -1633,6 -1651,12 +1664,12 @@@ int t4_free_vi(struct adapter *adap, un int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid, int mtu, int promisc, int all_multi, int bcast, int vlanex, bool sleep_ok); + int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid, + const u8 *addr, const u8 *mask, unsigned int idx, + u8 lookup_type, u8 port_id, bool sleep_ok); + int t4_alloc_raw_mac_filt(struct adapter *adap, unsigned int viid, + const u8 *addr, const u8 *mask, unsigned int idx, + u8 lookup_type, u8 port_id, bool sleep_ok); int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, unsigned int viid, bool free, unsigned int naddr, const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok); @@@ -1665,7 -1689,7 +1702,7 @@@ int t4_ctrl_eq_free(struct adapter *ada unsigned int vf, unsigned int eqid); int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); - int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox); + int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type); void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl); int t4_update_port_info(struct port_info *pi); int t4_get_link_params(struct port_info *pi, unsigned int *link_okp, @@@ -1708,18 -1732,13 +1745,23 @@@ void t4_uld_mem_free(struct adapter *ad int t4_uld_mem_alloc(struct adapter *adap); void t4_uld_clean_up(struct adapter *adap); void t4_register_netevent_notifier(void); + int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port, + unsigned int devid, unsigned int offset, + unsigned int len, u8 *buf); void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); void free_tx_desc(struct adapter *adap, struct sge_txq *q, unsigned int n, bool unmap); void free_txq(struct adapter *adap, struct sge_txq *q); + int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf, + u16 vlan); +void cxgb4_reclaim_completed_tx(struct adapter *adap, + struct sge_txq *q, bool unmap); +int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb, + dma_addr_t *addr); +void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, + void *pos); +void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, unsigned int start, + const dma_addr_t *addr); +void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n); #endif /* __CXGB4_H__ */ diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index cf471831ee71,4ea76c1411dc..2822bbff73e8 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@@ -45,6 -45,10 +45,10 @@@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" + #include "cudbg_if.h" + #include "cudbg_lib_common.h" + #include "cudbg_entity.h" + #include "cudbg_lib.h"
/* generic seq_file support for showing a table of size rows x width. */ static void *seq_tab_get_idx(struct seq_tab *tb, loff_t pos) @@@ -1739,7 -1743,7 +1743,7 @@@ static int mps_tcam_show(struct seq_fil */ if (lookup_type && (lookup_type != DATALKPTYPE_M)) { /* Inner header VNI */ - vniy = ((data2 & DATAVIDH2_F) << 23) | + vniy = (data2 & DATAVIDH2_F) | (DATAVIDH1_G(data2) << 16) | VIDL_G(val); dip_hit = data2 & DATADIPHIT_F; } else { @@@ -1749,6 -1753,7 +1753,7 @@@ port_num = DATAPORTNUM_G(data2);
/* Read tcamx. Change the control param */ + vnix = 0; ctl |= CTLXYBITSEL_V(1); t4_write_reg(adap, MPS_CLS_TCAM_DATA2_CTL_A, ctl); val = t4_read_reg(adap, MPS_CLS_TCAM_DATA1_A); @@@ -1757,7 -1762,7 +1762,7 @@@ data2 = t4_read_reg(adap, MPS_CLS_TCAM_DATA2_CTL_A); if (lookup_type && (lookup_type != DATALKPTYPE_M)) { /* Inner header VNI mask */ - vnix = ((data2 & DATAVIDH2_F) << 23) | + vnix = (data2 & DATAVIDH2_F) | (DATAVIDH1_G(data2) << 16) | VIDL_G(val); } } else { @@@ -1830,7 -1835,8 +1835,8 @@@ addr[1], addr[2], addr[3], addr[4], addr[5], (unsigned long long)mask, - vniy, vnix, dip_hit ? 'Y' : 'N', + vniy, (vnix | vniy), + dip_hit ? 'Y' : 'N', port_num, (cls_lo & T6_SRAM_VLD_F) ? 'Y' : 'N', PORTMAP_G(cls_hi), @@@ -2017,11 -2023,12 +2023,12 @@@ static int rss_show(struct seq_file *se
static int rss_open(struct inode *inode, struct file *file) { - int ret; - struct seq_tab *p; struct adapter *adap = inode->i_private; + int ret, nentries; + struct seq_tab *p;
- p = seq_open_tab(file, RSS_NENTRIES / 8, 8 * sizeof(u16), 0, rss_show); + nentries = t4_chip_rss_size(adap); + p = seq_open_tab(file, nentries / 8, 8 * sizeof(u16), 0, rss_show); if (!p) return -ENOMEM;
@@@ -2664,10 -2671,14 +2671,14 @@@ static const struct file_operations mem
static int tid_info_show(struct seq_file *seq, void *v) { + unsigned int tid_start = 0; struct adapter *adap = seq->private; const struct tid_info *t = &adap->tids; enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
+ if (chip > CHELSIO_T5) + tid_start = t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); + if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) { unsigned int sb; seq_printf(seq, "Connections in use: %u\n", @@@ -2679,8 -2690,8 +2690,8 @@@ sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A);
if (sb) { - seq_printf(seq, "TID range: 0..%u/%u..%u", sb - 1, - adap->tids.hash_base, + seq_printf(seq, "TID range: %u..%u/%u..%u", tid_start, + sb - 1, adap->tids.hash_base, t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), @@@ -2705,7 -2716,8 +2716,8 @@@ seq_printf(seq, "Connections in use: %u\n", atomic_read(&t->conns_in_use));
- seq_printf(seq, "TID range: 0..%u", t->ntids - 1); + seq_printf(seq, "TID range: %u..%u", tid_start, + tid_start + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->tids_in_use)); } @@@ -2794,18 -2806,6 +2806,6 @@@ static const struct file_operations blo .llseek = generic_file_llseek, };
- struct mem_desc { - unsigned int base; - unsigned int limit; - unsigned int idx; - }; - - static int mem_desc_cmp(const void *a, const void *b) - { - return ((const struct mem_desc *)a)->base - - ((const struct mem_desc *)b)->base; - } - static void mem_region_show(struct seq_file *seq, const char *name, unsigned int from, unsigned int to) { @@@ -2819,250 -2819,60 +2819,60 @@@ static int meminfo_show(struct seq_file *seq, void *v) { static const char * const memory[] = { "EDC0:", "EDC1:", "MC:", - "MC0:", "MC1:"}; - static const char * const region[] = { - "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", - "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", - "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", - "TDDP region:", "TPT region:", "STAG region:", "RQ region:", - "RQUDP region:", "PBL region:", "TXPBL region:", - "DBVFIFO region:", "ULPRX state:", "ULPTX state:", - "On-chip queues:" - }; - - int i, n; - u32 lo, hi, used, alloc; - struct mem_desc avail[4]; - struct mem_desc mem[ARRAY_SIZE(region) + 3]; /* up to 3 holes */ - struct mem_desc *md = mem; + "MC0:", "MC1:", "HMA:"}; struct adapter *adap = seq->private; + struct cudbg_meminfo meminfo; + int i, rc;
- for (i = 0; i < ARRAY_SIZE(mem); i++) { - mem[i].limit = 0; - mem[i].idx = i; - } - - /* Find and sort the populated memory ranges */ - i = 0; - lo = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); - if (lo & EDRAM0_ENABLE_F) { - hi = t4_read_reg(adap, MA_EDRAM0_BAR_A); - avail[i].base = EDRAM0_BASE_G(hi) << 20; - avail[i].limit = avail[i].base + (EDRAM0_SIZE_G(hi) << 20); - avail[i].idx = 0; - i++; - } - if (lo & EDRAM1_ENABLE_F) { - hi = t4_read_reg(adap, MA_EDRAM1_BAR_A); - avail[i].base = EDRAM1_BASE_G(hi) << 20; - avail[i].limit = avail[i].base + (EDRAM1_SIZE_G(hi) << 20); - avail[i].idx = 1; - i++; - } - - if (is_t5(adap->params.chip)) { - if (lo & EXT_MEM0_ENABLE_F) { - hi = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A); - avail[i].base = EXT_MEM0_BASE_G(hi) << 20; - avail[i].limit = - avail[i].base + (EXT_MEM0_SIZE_G(hi) << 20); - avail[i].idx = 3; - i++; - } - if (lo & EXT_MEM1_ENABLE_F) { - hi = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A); - avail[i].base = EXT_MEM1_BASE_G(hi) << 20; - avail[i].limit = - avail[i].base + (EXT_MEM1_SIZE_G(hi) << 20); - avail[i].idx = 4; - i++; - } - } else { - if (lo & EXT_MEM_ENABLE_F) { - hi = t4_read_reg(adap, MA_EXT_MEMORY_BAR_A); - avail[i].base = EXT_MEM_BASE_G(hi) << 20; - avail[i].limit = - avail[i].base + (EXT_MEM_SIZE_G(hi) << 20); - avail[i].idx = 2; - i++; - } - } - if (!i) /* no memory available */ - return 0; - sort(avail, i, sizeof(struct mem_desc), mem_desc_cmp, NULL); - - (md++)->base = t4_read_reg(adap, SGE_DBQ_CTXT_BADDR_A); - (md++)->base = t4_read_reg(adap, SGE_IMSG_CTXT_BADDR_A); - (md++)->base = t4_read_reg(adap, SGE_FLM_CACHE_BADDR_A); - (md++)->base = t4_read_reg(adap, TP_CMM_TCB_BASE_A); - (md++)->base = t4_read_reg(adap, TP_CMM_MM_BASE_A); - (md++)->base = t4_read_reg(adap, TP_CMM_TIMER_BASE_A); - (md++)->base = t4_read_reg(adap, TP_CMM_MM_RX_FLST_BASE_A); - (md++)->base = t4_read_reg(adap, TP_CMM_MM_TX_FLST_BASE_A); - (md++)->base = t4_read_reg(adap, TP_CMM_MM_PS_FLST_BASE_A); - - /* the next few have explicit upper bounds */ - md->base = t4_read_reg(adap, TP_PMM_TX_BASE_A); - md->limit = md->base - 1 + - t4_read_reg(adap, TP_PMM_TX_PAGE_SIZE_A) * - PMTXMAXPAGE_G(t4_read_reg(adap, TP_PMM_TX_MAX_PAGE_A)); - md++; - - md->base = t4_read_reg(adap, TP_PMM_RX_BASE_A); - md->limit = md->base - 1 + - t4_read_reg(adap, TP_PMM_RX_PAGE_SIZE_A) * - PMRXMAXPAGE_G(t4_read_reg(adap, TP_PMM_RX_MAX_PAGE_A)); - md++; - - if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) { - if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) { - hi = t4_read_reg(adap, LE_DB_TID_HASHBASE_A) / 4; - md->base = t4_read_reg(adap, LE_DB_HASH_TID_BASE_A); - } else { - hi = t4_read_reg(adap, LE_DB_HASH_TID_BASE_A); - md->base = t4_read_reg(adap, - LE_DB_HASH_TBL_BASE_ADDR_A); - } - md->limit = 0; - } else { - md->base = 0; - md->idx = ARRAY_SIZE(region); /* hide it */ - } - md++; - - #define ulp_region(reg) do { \ - md->base = t4_read_reg(adap, ULP_ ## reg ## _LLIMIT_A);\ - (md++)->limit = t4_read_reg(adap, ULP_ ## reg ## _ULIMIT_A); \ - } while (0) - - ulp_region(RX_ISCSI); - ulp_region(RX_TDDP); - ulp_region(TX_TPT); - ulp_region(RX_STAG); - ulp_region(RX_RQ); - ulp_region(RX_RQUDP); - ulp_region(RX_PBL); - ulp_region(TX_PBL); - #undef ulp_region - md->base = 0; - md->idx = ARRAY_SIZE(region); - if (!is_t4(adap->params.chip)) { - u32 size = 0; - u32 sge_ctrl = t4_read_reg(adap, SGE_CONTROL2_A); - u32 fifo_size = t4_read_reg(adap, SGE_DBVFIFO_SIZE_A); - - if (is_t5(adap->params.chip)) { - if (sge_ctrl & VFIFO_ENABLE_F) - size = DBVFIFO_SIZE_G(fifo_size); - } else { - size = T6_DBVFIFO_SIZE_G(fifo_size); - } - - if (size) { - md->base = BASEADDR_G(t4_read_reg(adap, - SGE_DBVFIFO_BADDR_A)); - md->limit = md->base + (size << 2) - 1; - } - } - - md++; - - md->base = t4_read_reg(adap, ULP_RX_CTX_BASE_A); - md->limit = 0; - md++; - md->base = t4_read_reg(adap, ULP_TX_ERR_TABLE_BASE_A); - md->limit = 0; - md++; - - md->base = adap->vres.ocq.start; - if (adap->vres.ocq.size) - md->limit = md->base + adap->vres.ocq.size - 1; - else - md->idx = ARRAY_SIZE(region); /* hide it */ - md++; - - /* add any address-space holes, there can be up to 3 */ - for (n = 0; n < i - 1; n++) - if (avail[n].limit < avail[n + 1].base) - (md++)->base = avail[n].limit; - if (avail[n].limit) - (md++)->base = avail[n].limit; - - n = md - mem; - sort(mem, n, sizeof(struct mem_desc), mem_desc_cmp, NULL); + memset(&meminfo, 0, sizeof(struct cudbg_meminfo)); + rc = cudbg_fill_meminfo(adap, &meminfo); + if (rc) + return -ENXIO;
- for (lo = 0; lo < i; lo++) - mem_region_show(seq, memory[avail[lo].idx], avail[lo].base, - avail[lo].limit - 1); + for (i = 0; i < meminfo.avail_c; i++) + mem_region_show(seq, memory[meminfo.avail[i].idx], + meminfo.avail[i].base, + meminfo.avail[i].limit - 1);
seq_putc(seq, '\n'); - for (i = 0; i < n; i++) { - if (mem[i].idx >= ARRAY_SIZE(region)) + for (i = 0; i < meminfo.mem_c; i++) { + if (meminfo.mem[i].idx >= ARRAY_SIZE(cudbg_region)) continue; /* skip holes */ - if (!mem[i].limit) - mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; - mem_region_show(seq, region[mem[i].idx], mem[i].base, - mem[i].limit); + if (!meminfo.mem[i].limit) + meminfo.mem[i].limit = + i < meminfo.mem_c - 1 ? + meminfo.mem[i + 1].base - 1 : ~0; + mem_region_show(seq, cudbg_region[meminfo.mem[i].idx], + meminfo.mem[i].base, meminfo.mem[i].limit); }
seq_putc(seq, '\n'); - lo = t4_read_reg(adap, CIM_SDRAM_BASE_ADDR_A); - hi = t4_read_reg(adap, CIM_SDRAM_ADDR_SIZE_A) + lo - 1; - mem_region_show(seq, "uP RAM:", lo, hi); - - lo = t4_read_reg(adap, CIM_EXTMEM2_BASE_ADDR_A); - hi = t4_read_reg(adap, CIM_EXTMEM2_ADDR_SIZE_A) + lo - 1; - mem_region_show(seq, "uP Extmem2:", lo, hi); + mem_region_show(seq, "uP RAM:", meminfo.up_ram_lo, meminfo.up_ram_hi); + mem_region_show(seq, "uP Extmem2:", meminfo.up_extmem2_lo, + meminfo.up_extmem2_hi);
- lo = t4_read_reg(adap, TP_PMM_RX_MAX_PAGE_A); seq_printf(seq, "\n%u Rx pages of size %uKiB for %u channels\n", - PMRXMAXPAGE_G(lo), - t4_read_reg(adap, TP_PMM_RX_PAGE_SIZE_A) >> 10, - (lo & PMRXNUMCHN_F) ? 2 : 1); + meminfo.rx_pages_data[0], meminfo.rx_pages_data[1], + meminfo.rx_pages_data[2]);
- lo = t4_read_reg(adap, TP_PMM_TX_MAX_PAGE_A); - hi = t4_read_reg(adap, TP_PMM_TX_PAGE_SIZE_A); seq_printf(seq, "%u Tx pages of size %u%ciB for %u channels\n", - PMTXMAXPAGE_G(lo), - hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), - hi >= (1 << 20) ? 'M' : 'K', 1 << PMTXNUMCHN_G(lo)); - seq_printf(seq, "%u p-structs\n\n", - t4_read_reg(adap, TP_CMM_MM_MAX_PSTRUCT_A)); - - for (i = 0; i < 4; i++) { - if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) - lo = t4_read_reg(adap, MPS_RX_MAC_BG_PG_CNT0_A + i * 4); - else - lo = t4_read_reg(adap, MPS_RX_PG_RSV0_A + i * 4); - if (is_t5(adap->params.chip)) { - used = T5_USED_G(lo); - alloc = T5_ALLOC_G(lo); - } else { - used = USED_G(lo); - alloc = ALLOC_G(lo); - } + meminfo.tx_pages_data[0], meminfo.tx_pages_data[1], + meminfo.tx_pages_data[2], meminfo.tx_pages_data[3]); + + seq_printf(seq, "%u p-structs\n\n", meminfo.p_structs); + + for (i = 0; i < 4; i++) /* For T6 these are MAC buffer groups */ seq_printf(seq, "Port %d using %u pages out of %u allocated\n", - i, used, alloc); - } - for (i = 0; i < adap->params.arch.nchan; i++) { - if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) - lo = t4_read_reg(adap, - MPS_RX_LPBK_BG_PG_CNT0_A + i * 4); - else - lo = t4_read_reg(adap, MPS_RX_PG_RSV4_A + i * 4); - if (is_t5(adap->params.chip)) { - used = T5_USED_G(lo); - alloc = T5_ALLOC_G(lo); - } else { - used = USED_G(lo); - alloc = ALLOC_G(lo); - } + i, meminfo.port_used[i], meminfo.port_alloc[i]); + + for (i = 0; i < adap->params.arch.nchan; i++) /* For T6 these are MAC buffer groups */ seq_printf(seq, "Loopback %d using %u pages out of %u allocated\n", - i, used, alloc); - } + i, meminfo.loopback_used[i], + meminfo.loopback_alloc[i]); + return 0; }
@@@ -3096,8 -2906,6 +2906,8 @@@ static int chcr_show(struct seq_file *s atomic_read(&adap->chcr_stats.error)); seq_printf(seq, "Fallback: %10u \n", atomic_read(&adap->chcr_stats.fallback)); + seq_printf(seq, "IPSec PDU: %10u\n", + atomic_read(&adap->chcr_stats.ipsec_cnt)); return 0; }
diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 05a4abfd5ec1,1e3cd8abc56d..1ca2a39ed0f8 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@@ -65,6 -65,7 +65,7 @@@ #include <net/addrconf.h> #include <linux/uaccess.h> #include <linux/crash_dump.h> + #include <net/udp_tunnel.h>
#include "cxgb4.h" #include "cxgb4_filter.h" @@@ -101,7 -102,9 +102,9 @@@ const char cxgb4_driver_version[] = DRV */ #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ static const struct pci_device_id cxgb4_pci_tbl[] = { - #define CH_PCI_DEVICE_ID_FUNCTION 0x4 + #define CXGB4_UNIFIED_PF 0x4 + + #define CH_PCI_DEVICE_ID_FUNCTION CXGB4_UNIFIED_PF
/* Include PCI Device IDs for both PF4 and PF0-3 so our PCI probe() routine is * called for both. @@@ -109,7 -112,7 +112,7 @@@ #define CH_PCI_DEVICE_ID_FUNCTION2 0x0
#define CH_PCI_ID_TABLE_ENTRY(devid) \ - {PCI_VDEVICE(CHELSIO, (devid)), 4} + {PCI_VDEVICE(CHELSIO, (devid)), CXGB4_UNIFIED_PF}
#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \ { 0, } \ @@@ -1673,7 -1676,7 +1676,7 @@@ int cxgb4_flush_eq_cache(struct net_dev { struct adapter *adap = netdev2adap(dev);
- return t4_sge_ctxt_flush(adap, adap->mbox); + return t4_sge_ctxt_flush(adap, adap->mbox, CTXT_EGRESS); } EXPORT_SYMBOL(cxgb4_flush_eq_cache);
@@@ -2604,7 -2607,7 +2607,7 @@@ static int cxgb_change_mtu(struct net_d }
#ifdef CONFIG_PCI_IOV - static int dummy_open(struct net_device *dev) + static int cxgb4_mgmt_open(struct net_device *dev) { /* Turn carrier off since we don't have to transmit anything on this * interface. @@@ -2614,39 -2617,44 +2617,44 @@@ }
/* Fill MAC address that will be assigned by the FW */ - static void fill_vf_station_mac_addr(struct adapter *adap) + static void cxgb4_mgmt_fill_vf_station_mac_addr(struct adapter *adap) { - unsigned int i; u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN]; + unsigned int i, vf, nvfs; + u16 a, b; int err; u8 *na; - u16 a, b;
+ adap->params.pci.vpd_cap_addr = pci_find_capability(adap->pdev, + PCI_CAP_ID_VPD); err = t4_get_raw_vpd_params(adap, &adap->params.vpd); - if (!err) { - na = adap->params.vpd.na; - for (i = 0; i < ETH_ALEN; i++) - hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + - hex2val(na[2 * i + 1])); - a = (hw_addr[0] << 8) | hw_addr[1]; - b = (hw_addr[1] << 8) | hw_addr[2]; - a ^= b; - a |= 0x0200; /* locally assigned Ethernet MAC address */ - a &= ~0x0100; /* not a multicast Ethernet MAC address */ - macaddr[0] = a >> 8; - macaddr[1] = a & 0xff; - - for (i = 2; i < 5; i++) - macaddr[i] = hw_addr[i + 1]; - - for (i = 0; i < adap->num_vfs; i++) { - macaddr[5] = adap->pf * 16 + i; - ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr); - } + if (err) + return; + + na = adap->params.vpd.na; + for (i = 0; i < ETH_ALEN; i++) + hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + + hex2val(na[2 * i + 1])); + + a = (hw_addr[0] << 8) | hw_addr[1]; + b = (hw_addr[1] << 8) | hw_addr[2]; + a ^= b; + a |= 0x0200; /* locally assigned Ethernet MAC address */ + a &= ~0x0100; /* not a multicast Ethernet MAC address */ + macaddr[0] = a >> 8; + macaddr[1] = a & 0xff; + + for (i = 2; i < 5; i++) + macaddr[i] = hw_addr[i + 1]; + + for (vf = 0, nvfs = pci_sriov_get_totalvfs(adap->pdev); + vf < nvfs; vf++) { + macaddr[5] = adap->pf * 16 + vf; + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, macaddr); } }
- static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) + static int cxgb4_mgmt_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; @@@ -2668,8 -2676,8 +2676,8 @@@ return ret; }
- static int cxgb_get_vf_config(struct net_device *dev, - int vf, struct ifla_vf_info *ivi) + static int cxgb4_mgmt_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; @@@ -2683,8 -2691,8 +2691,8 @@@ return 0; }
- static int cxgb_get_phys_port_id(struct net_device *dev, - struct netdev_phys_item_id *ppid) + static int cxgb4_mgmt_get_phys_port_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct port_info *pi = netdev_priv(dev); unsigned int phy_port_id; @@@ -2695,8 -2703,8 +2703,8 @@@ return 0; }
- static int cxgb_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, - int max_tx_rate) + static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf, + int min_tx_rate, int max_tx_rate) { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; @@@ -2775,7 -2783,30 +2783,30 @@@ return 0; }
- #endif + static int cxgb4_mgmt_set_vf_vlan(struct net_device *dev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) + { + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + int ret; + + if (vf >= adap->num_vfs || vlan > 4095 || qos > 7) + return -EINVAL; + + if (vlan_proto != htons(ETH_P_8021Q) || qos != 0) + return -EPROTONOSUPPORT; + + ret = t4_set_vlan_acl(adap, adap->mbox, vf + 1, vlan); + if (!ret) { + adap->vfinfo[vf].vlan = vlan; + return 0; + } + + dev_err(adap->pdev_dev, "Err %d %s VLAN ACL for PF/VF %d/%d\n", + ret, (vlan ? "setting" : "clearing"), adap->pf, vf); + return ret; + } + #endif /* CONFIG_PCI_IOV */
static int cxgb_set_mac_addr(struct net_device *dev, void *p) { @@@ -2897,9 -2928,6 +2928,6 @@@ static int cxgb_set_tx_maxrate(struct n static int cxgb_setup_tc_flower(struct net_device *dev, struct tc_cls_flower_offload *cls_flower) { - if (cls_flower->common.chain_index) - return -EOPNOTSUPP; - switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: return cxgb4_tc_flower_replace(dev, cls_flower); @@@ -2915,9 -2943,6 +2943,6 @@@ static int cxgb_setup_tc_cls_u32(struct net_device *dev, struct tc_cls_u32_offload *cls_u32) { - if (cls_u32->common.chain_index) - return -EOPNOTSUPP; - switch (cls_u32->command) { case TC_CLSU32_NEW_KNODE: case TC_CLSU32_REPLACE_KNODE: @@@ -2943,7 -2968,7 +2968,7 @@@ static int cxgb_setup_tc_block_cb(enum return -EINVAL; }
- if (!tc_can_offload(dev)) + if (!tc_cls_can_offload_and_chain0(dev, type_data)) return -EOPNOTSUPP;
switch (type) { @@@ -2987,6 -3012,174 +3012,174 @@@ static int cxgb_setup_tc(struct net_dev } }
+ static void cxgb_del_udp_tunnel(struct net_device *netdev, + struct udp_tunnel_info *ti) + { + struct port_info *pi = netdev_priv(netdev); + struct adapter *adapter = pi->adapter; + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; + int ret = 0, i; + + if (chip_ver < CHELSIO_T6) + return; + + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + if (!adapter->vxlan_port_cnt || + adapter->vxlan_port != ti->port) + return; /* Invalid VxLAN destination port */ + + adapter->vxlan_port_cnt--; + if (adapter->vxlan_port_cnt) + return; + + adapter->vxlan_port = 0; + t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, 0); + break; + case UDP_TUNNEL_TYPE_GENEVE: + if (!adapter->geneve_port_cnt || + adapter->geneve_port != ti->port) + return; /* Invalid GENEVE destination port */ + + adapter->geneve_port_cnt--; + if (adapter->geneve_port_cnt) + return; + + adapter->geneve_port = 0; + t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0); + default: + return; + } + + /* Matchall mac entries can be deleted only after all tunnel ports + * are brought down or removed. + */ + if (!adapter->rawf_cnt) + return; + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + ret = t4_free_raw_mac_filt(adapter, pi->viid, + match_all_mac, match_all_mac, + adapter->rawf_start + + pi->port_id, + 1, pi->port_id, true); + if (ret < 0) { + netdev_info(netdev, "Failed to free mac filter entry, for port %d\n", + i); + return; + } + atomic_dec(&adapter->mps_encap[adapter->rawf_start + + pi->port_id].refcnt); + } + } + + static void cxgb_add_udp_tunnel(struct net_device *netdev, + struct udp_tunnel_info *ti) + { + struct port_info *pi = netdev_priv(netdev); + struct adapter *adapter = pi->adapter; + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; + int i, ret; + + if (chip_ver < CHELSIO_T6 || !adapter->rawf_cnt) + return; + + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + /* Callback for adding vxlan port can be called with the same + * port for both IPv4 and IPv6. We should not disable the + * offloading when the same port for both protocols is added + * and later one of them is removed. + */ + if (adapter->vxlan_port_cnt && + adapter->vxlan_port == ti->port) { + adapter->vxlan_port_cnt++; + return; + } + + /* We will support only one VxLAN port */ + if (adapter->vxlan_port_cnt) { + netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n", + be16_to_cpu(adapter->vxlan_port), + be16_to_cpu(ti->port)); + return; + } + + adapter->vxlan_port = ti->port; + adapter->vxlan_port_cnt = 1; + + t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, + VXLAN_V(be16_to_cpu(ti->port)) | VXLAN_EN_F); + break; + case UDP_TUNNEL_TYPE_GENEVE: + if (adapter->geneve_port_cnt && + adapter->geneve_port == ti->port) { + adapter->geneve_port_cnt++; + return; + } + + /* We will support only one GENEVE port */ + if (adapter->geneve_port_cnt) { + netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n", + be16_to_cpu(adapter->geneve_port), + be16_to_cpu(ti->port)); + return; + } + + adapter->geneve_port = ti->port; + adapter->geneve_port_cnt = 1; + + t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, + GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F); + default: + return; + } + + /* Create a 'match all' mac filter entry for inner mac, + * if raw mac interface is supported. Once the linux kernel provides + * driver entry points for adding/deleting the inner mac addresses, + * we will remove this 'match all' entry and fallback to adding + * exact match filters. + */ + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + + ret = t4_alloc_raw_mac_filt(adapter, pi->viid, + match_all_mac, + match_all_mac, + adapter->rawf_start + + pi->port_id, + 1, pi->port_id, true); + if (ret < 0) { + netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n", + be16_to_cpu(ti->port)); + cxgb_del_udp_tunnel(netdev, ti); + return; + } + atomic_inc(&adapter->mps_encap[ret].refcnt); + } + } + + static netdev_features_t cxgb_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) + { + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + + if (CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6) + return features; + + /* Check if hw supports offload for this packet */ + if (!skb->encapsulation || cxgb_encap_offload_supported(skb)) + return features; + + /* Offload is not supported for this encapsulated packet */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + static netdev_features_t cxgb_fix_features(struct net_device *dev, netdev_features_t features) { @@@ -3018,20 -3211,25 +3211,25 @@@ static const struct net_device_ops cxgb #endif /* CONFIG_CHELSIO_T4_FCOE */ .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, .ndo_setup_tc = cxgb_setup_tc, + .ndo_udp_tunnel_add = cxgb_add_udp_tunnel, + .ndo_udp_tunnel_del = cxgb_del_udp_tunnel, + .ndo_features_check = cxgb_features_check, .ndo_fix_features = cxgb_fix_features, };
#ifdef CONFIG_PCI_IOV static const struct net_device_ops cxgb4_mgmt_netdev_ops = { - .ndo_open = dummy_open, - .ndo_set_vf_mac = cxgb_set_vf_mac, - .ndo_get_vf_config = cxgb_get_vf_config, - .ndo_set_vf_rate = cxgb_set_vf_rate, - .ndo_get_phys_port_id = cxgb_get_phys_port_id, + .ndo_open = cxgb4_mgmt_open, + .ndo_set_vf_mac = cxgb4_mgmt_set_vf_mac, + .ndo_get_vf_config = cxgb4_mgmt_get_vf_config, + .ndo_set_vf_rate = cxgb4_mgmt_set_vf_rate, + .ndo_get_phys_port_id = cxgb4_mgmt_get_phys_port_id, + .ndo_set_vf_vlan = cxgb4_mgmt_set_vf_vlan, }; #endif
- static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) + static void cxgb4_mgmt_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) { struct adapter *adapter = netdev2adap(dev);
@@@ -3043,7 -3241,7 +3241,7 @@@ }
static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = { - .get_drvinfo = get_drvinfo, + .get_drvinfo = cxgb4_mgmt_get_drvinfo, };
void t4_fatal_err(struct adapter *adap) @@@ -4096,7 -4294,7 +4294,7 @@@ static int adap_init0(struct adapter *a } else { adap->vres.ncrypto_fc = val[0]; } - adap->params.crypto |= ULP_CRYPTO_LOOKASIDE; + adap->params.crypto = ntohs(caps_cmd.cryptocaps); adap->num_uld += 1; } #undef FW_PARAM_PFVF @@@ -4759,7 -4957,7 +4957,7 @@@ static int get_chip_type(struct pci_de }
#ifdef CONFIG_PCI_IOV - static void dummy_setup(struct net_device *dev) + static void cxgb4_mgmt_setup(struct net_device *dev) { dev->type = ARPHRD_NONE; dev->mtu = 0; @@@ -4775,38 -4973,6 +4973,6 @@@ dev->needs_free_netdev = true; }
- static int config_mgmt_dev(struct pci_dev *pdev) - { - struct adapter *adap = pci_get_drvdata(pdev); - struct net_device *netdev; - struct port_info *pi; - char name[IFNAMSIZ]; - int err; - - snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf); - netdev = alloc_netdev(sizeof(struct port_info), name, NET_NAME_UNKNOWN, - dummy_setup); - if (!netdev) - return -ENOMEM; - - pi = netdev_priv(netdev); - pi->adapter = adap; - pi->tx_chan = adap->pf % adap->params.nports; - SET_NETDEV_DEV(netdev, &pdev->dev); - - adap->port[0] = netdev; - pi->port_id = 0; - - err = register_netdev(adap->port[0]); - if (err) { - pr_info("Unable to register VF mgmt netdev %s\n", name); - free_netdev(adap->port[0]); - adap->port[0] = NULL; - return err; - } - return 0; - } - static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) { struct adapter *adap = pci_get_drvdata(pdev); @@@ -4818,7 -4984,7 +4984,7 @@@ /* Check if cxgb4 is the MASTER and fw is initialized */ if (!(pcie_fw & PCIE_FW_INIT_F) || !(pcie_fw & PCIE_FW_MASTER_VLD_F) || - PCIE_FW_MASTER_G(pcie_fw) != 4) { + PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF) { dev_warn(&pdev->dev, "cxgb4 driver needs to be MASTER to support SRIOV\n"); return -EOPNOTSUPP; @@@ -4830,46 -4996,132 +4996,132 @@@ if (current_vfs && pci_vfs_assigned(pdev)) { dev_err(&pdev->dev, "Cannot modify SR-IOV while VFs are assigned\n"); - num_vfs = current_vfs; - return num_vfs; + return current_vfs; } - - /* Disable SRIOV when zero is passed. - * One needs to disable SRIOV before modifying it, else - * stack throws the below warning: - * " 'n' VFs already enabled. Disable before enabling 'm' VFs." + /* Note that the upper-level code ensures that we're never called with + * a non-zero "num_vfs" when we already have VFs instantiated. But + * it never hurts to code defensively. */ + if (num_vfs != 0 && current_vfs != 0) + return -EBUSY; + + /* Nothing to do for no change. */ + if (num_vfs == current_vfs) + return num_vfs; + + /* Disable SRIOV when zero is passed. */ if (!num_vfs) { pci_disable_sriov(pdev); - if (adap->port[0]) { - unregister_netdev(adap->port[0]); - adap->port[0] = NULL; - } + /* free VF Management Interface */ + unregister_netdev(adap->port[0]); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + /* free VF resources */ + adap->num_vfs = 0; kfree(adap->vfinfo); adap->vfinfo = NULL; - adap->num_vfs = 0; - return num_vfs; + return 0; }
- if (num_vfs != current_vfs) { - err = pci_enable_sriov(pdev, num_vfs); + if (!current_vfs) { + struct fw_pfvf_cmd port_cmd, port_rpl; + struct net_device *netdev; + unsigned int pmask, port; + struct pci_dev *pbridge; + struct port_info *pi; + char name[IFNAMSIZ]; + u32 devcap2; + u16 flags; + int pos; + + /* If we want to instantiate Virtual Functions, then our + * parent bridge's PCI-E needs to support Alternative Routing + * ID (ARI) because our VFs will show up at function offset 8 + * and above. + */ + pbridge = pdev->bus->self; + pos = pci_find_capability(pbridge, PCI_CAP_ID_EXP); + pci_read_config_word(pbridge, pos + PCI_EXP_FLAGS, &flags); + pci_read_config_dword(pbridge, pos + PCI_EXP_DEVCAP2, &devcap2); + + if ((flags & PCI_EXP_FLAGS_VERS) < 2 || + !(devcap2 & PCI_EXP_DEVCAP2_ARI)) { + /* Our parent bridge does not support ARI so issue a + * warning and skip instantiating the VFs. They + * won't be reachable. + */ + dev_warn(&pdev->dev, "Parent bridge %02x:%02x.%x doesn't support ARI; can't instantiate Virtual Functions\n", + pbridge->bus->number, PCI_SLOT(pbridge->devfn), + PCI_FUNC(pbridge->devfn)); + return -ENOTSUPP; + } + memset(&port_cmd, 0, sizeof(port_cmd)); + port_cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_PFVF_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_READ_F | + FW_PFVF_CMD_PFN_V(adap->pf) | + FW_PFVF_CMD_VFN_V(0)); + port_cmd.retval_len16 = cpu_to_be32(FW_LEN16(port_cmd)); + err = t4_wr_mbox(adap, adap->mbox, &port_cmd, sizeof(port_cmd), + &port_rpl); if (err) return err; + pmask = FW_PFVF_CMD_PMASK_G(be32_to_cpu(port_rpl.type_to_neq)); + port = ffs(pmask) - 1; + /* Allocate VF Management Interface. */ + snprintf(name, IFNAMSIZ, "mgmtpf%d,%d", adap->adap_idx, + adap->pf); + netdev = alloc_netdev(sizeof(struct port_info), + name, NET_NAME_UNKNOWN, cxgb4_mgmt_setup); + if (!netdev) + return -ENOMEM;
- adap->num_vfs = num_vfs; - err = config_mgmt_dev(pdev); - if (err) + pi = netdev_priv(netdev); + pi->adapter = adap; + pi->lport = port; + pi->tx_chan = port; + SET_NETDEV_DEV(netdev, &pdev->dev); + + adap->port[0] = netdev; + pi->port_id = 0; + + err = register_netdev(adap->port[0]); + if (err) { + pr_info("Unable to register VF mgmt netdev %s\n", name); + free_netdev(adap->port[0]); + adap->port[0] = NULL; return err; + } + /* Allocate and set up VF Information. */ + adap->vfinfo = kcalloc(pci_sriov_get_totalvfs(pdev), + sizeof(struct vf_info), GFP_KERNEL); + if (!adap->vfinfo) { + unregister_netdev(adap->port[0]); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + return -ENOMEM; + } + cxgb4_mgmt_fill_vf_station_mac_addr(adap); + } + /* Instantiate the requested number of VFs. */ + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + pr_info("Unable to instantiate %d VFs\n", num_vfs); + if (!current_vfs) { + unregister_netdev(adap->port[0]); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + kfree(adap->vfinfo); + adap->vfinfo = NULL; + } + return err; }
- adap->vfinfo = kcalloc(adap->num_vfs, - sizeof(struct vf_info), GFP_KERNEL); - if (adap->vfinfo) - fill_vf_station_mac_addr(adap); + adap->num_vfs = num_vfs; return num_vfs; } - #endif + #endif /* CONFIG_PCI_IOV */
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@@ -4882,9 -5134,6 +5134,6 @@@ u32 whoami, pl_rev; enum chip_type chip; static int adap_idx = 1; - #ifdef CONFIG_PCI_IOV - u32 v, port_vec; - #endif
printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
@@@ -4908,6 -5157,13 +5157,13 @@@ goto out_disable_device; }
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + err = -ENOMEM; + goto out_unmap_bar0; + } + + adapter->regs = regs; err = t4_wait_dev_ready(regs); if (err < 0) goto out_unmap_bar0; @@@ -4918,13 -5174,29 +5174,29 @@@ chip = get_chip_type(pdev, pl_rev); func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ? SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); + + adapter->pdev = pdev; + adapter->pdev_dev = &pdev->dev; + adapter->name = pci_name(pdev); + adapter->mbox = func; + adapter->pf = func; + adapter->msg_enable = DFLT_MSG_ENABLE; + adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + + (sizeof(struct mbox_cmd) * + T4_OS_LOG_MBOX_CMDS), + GFP_KERNEL); + if (!adapter->mbox_log) { + err = -ENOMEM; + goto out_free_adapter; + } + spin_lock_init(&adapter->mbox_lock); + INIT_LIST_HEAD(&adapter->mlist.list); + pci_set_drvdata(pdev, adapter); + if (func != ent->driver_data) { - #ifndef CONFIG_PCI_IOV - iounmap(regs); - #endif pci_disable_device(pdev); pci_save_state(pdev); /* to restore SR-IOV later */ - goto sriov; + return 0; }
if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { @@@ -4933,53 -5205,30 +5205,30 @@@ if (err) { dev_err(&pdev->dev, "unable to obtain 64-bit DMA for " "coherent allocations\n"); - goto out_unmap_bar0; + goto out_free_adapter; } } else { err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "no usable DMA configuration\n"); - goto out_unmap_bar0; + goto out_free_adapter; } }
pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); pci_save_state(pdev); - - adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); - if (!adapter) { - err = -ENOMEM; - goto out_unmap_bar0; - } adap_idx++; - adapter->workq = create_singlethread_workqueue("cxgb4"); if (!adapter->workq) { err = -ENOMEM; goto out_free_adapter; }
- adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + - (sizeof(struct mbox_cmd) * - T4_OS_LOG_MBOX_CMDS), - GFP_KERNEL); - if (!adapter->mbox_log) { - err = -ENOMEM; - goto out_free_adapter; - } adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS;
/* PCI device has been enabled */ adapter->flags |= DEV_ENABLED; - - adapter->regs = regs; - adapter->pdev = pdev; - adapter->pdev_dev = &pdev->dev; - adapter->name = pci_name(pdev); - adapter->mbox = func; - adapter->pf = func; - adapter->msg_enable = DFLT_MSG_ENABLE; memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
/* If possible, we use PCIe Relaxed Ordering Attribute to deliver @@@ -5002,9 -5251,6 +5251,6 @@@ spin_lock_init(&adapter->stats_lock); spin_lock_init(&adapter->tid_release_lock); spin_lock_init(&adapter->win0_lock); - spin_lock_init(&adapter->mbox_lock); - - INIT_LIST_HEAD(&adapter->mlist.list);
INIT_WORK(&adapter->tid_release_task, process_tid_release_list); INIT_WORK(&adapter->db_full_task, process_db_full); @@@ -5080,6 -5326,10 +5326,10 @@@ NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_TC; + + if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5) + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@@ -5273,58 -5523,6 +5523,6 @@@ setup_fw_sge_queues(adapter); return 0;
- sriov: - #ifdef CONFIG_PCI_IOV - adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); - if (!adapter) { - err = -ENOMEM; - goto free_pci_region; - } - - adapter->pdev = pdev; - adapter->pdev_dev = &pdev->dev; - adapter->name = pci_name(pdev); - adapter->mbox = func; - adapter->pf = func; - adapter->regs = regs; - adapter->adap_idx = adap_idx; - adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + - (sizeof(struct mbox_cmd) * - T4_OS_LOG_MBOX_CMDS), - GFP_KERNEL); - if (!adapter->mbox_log) { - err = -ENOMEM; - goto free_adapter; - } - spin_lock_init(&adapter->mbox_lock); - INIT_LIST_HEAD(&adapter->mlist.list); - - v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PORTVEC); - err = t4_query_params(adapter, adapter->mbox, adapter->pf, 0, 1, - &v, &port_vec); - if (err < 0) { - dev_err(adapter->pdev_dev, "Could not fetch port params\n"); - goto free_mbox_log; - } - - adapter->params.nports = hweight32(port_vec); - pci_set_drvdata(pdev, adapter); - return 0; - - free_mbox_log: - kfree(adapter->mbox_log); - free_adapter: - kfree(adapter); - free_pci_region: - iounmap(regs); - pci_disable_sriov(pdev); - pci_release_regions(pdev); - return err; - #else - return 0; - #endif - out_free_dev: free_some_resources(adapter); if (adapter->flags & USING_MSIX) @@@ -5416,14 -5614,7 +5614,7 @@@ static void remove_one(struct pci_dev * } #ifdef CONFIG_PCI_IOV else { - if (adapter->port[0]) - unregister_netdev(adapter->port[0]); - iounmap(adapter->regs); - kfree(adapter->vfinfo); - kfree(adapter->mbox_log); - kfree(adapter); - pci_disable_sriov(pdev); - pci_release_regions(pdev); + cxgb4_iov_configure(adapter->pdev, 0); } #endif } @@@ -5467,18 -5658,6 +5658,6 @@@ static void shutdown_one(struct pci_de if (adapter->flags & FW_OK) t4_fw_bye(adapter, adapter->mbox); } - #ifdef CONFIG_PCI_IOV - else { - if (adapter->port[0]) - unregister_netdev(adapter->port[0]); - iounmap(adapter->regs); - kfree(adapter->vfinfo); - kfree(adapter->mbox_log); - kfree(adapter); - pci_disable_sriov(pdev); - pci_release_regions(pdev); - } - #endif }
static struct pci_driver cxgb4_driver = { diff --combined drivers/net/ethernet/chelsio/cxgb4/sge.c index 6c7b0ac0b48b,a7af71bf14fb..6e310a0da7c9 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@@ -41,7 -41,6 +41,7 @@@ #include <linux/jiffies.h> #include <linux/prefetch.h> #include <linux/export.h> +#include <net/xfrm.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/busy_poll.h> @@@ -54,7 -53,6 +54,7 @@@ #include "t4_msg.h" #include "t4fw_api.h" #include "cxgb4_ptp.h" +#include "cxgb4_uld.h"
/* * Rx buffer size. We use largish buffers if possible but settle for single @@@ -112,6 -110,14 +112,6 @@@ #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
/* - * Suspend an Ethernet Tx queue with fewer available descriptors than this. - * This is the same as calc_tx_descs() for a TSO packet with - * nr_frags == MAX_SKB_FRAGS. - */ -#define ETHTXQ_STOP_THRES \ - (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) - -/* * Suspension threshold for non-Ethernet Tx queues. We require enough room * for a full sized WR. */ @@@ -128,6 -134,11 +128,6 @@@ */ #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
-struct tx_sw_desc { /* SW state per Tx descriptor */ - struct sk_buff *skb; - struct ulptx_sgl *sgl; -}; - struct rx_sw_desc { /* SW state per Rx descriptor */ struct page *page; dma_addr_t dma_addr; @@@ -237,8 -248,8 +237,8 @@@ static inline bool fl_starving(const st return fl->avail - fl->pend_cred <= s->fl_starve_thres; }
-static int map_skb(struct device *dev, const struct sk_buff *skb, - dma_addr_t *addr) +int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb, + dma_addr_t *addr) { const skb_frag_t *fp, *end; const struct skb_shared_info *si; @@@ -266,7 -277,6 +266,7 @@@ unwind out_err: return -ENOMEM; } +EXPORT_SYMBOL(cxgb4_map_skb);
#ifdef CONFIG_NEED_DMA_MAP_STATE static void unmap_skb(struct device *dev, const struct sk_buff *skb, @@@ -401,7 -411,7 +401,7 @@@ static inline int reclaimable(const str }
/** - * reclaim_completed_tx - reclaims completed Tx descriptors + * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors * @adap: the adapter * @q: the Tx queue to reclaim completed descriptors from * @unmap: whether the buffers should be unmapped for DMA @@@ -410,7 -420,7 +410,7 @@@ * and frees the associated buffers if possible. Called with the Tx * queue locked. */ -static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, +inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, bool unmap) { int avail = reclaimable(q); @@@ -427,7 -437,6 +427,7 @@@ q->in_use -= avail; } } +EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
static inline int get_buf_size(struct adapter *adapter, const struct rx_sw_desc *d) @@@ -761,12 -770,19 +761,19 @@@ static inline unsigned int flits_to_des * Returns whether an Ethernet packet is small enough to fit as * immediate data. Return value corresponds to headroom required. */ - static inline int is_eth_imm(const struct sk_buff *skb) + static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver) { - int hdrlen = skb_shinfo(skb)->gso_size ? - sizeof(struct cpl_tx_pkt_lso_core) : 0; + int hdrlen = 0;
- hdrlen += sizeof(struct cpl_tx_pkt); + if (skb->encapsulation && skb_shinfo(skb)->gso_size && + chip_ver > CHELSIO_T5) { + hdrlen = sizeof(struct cpl_tx_tnl_lso); + hdrlen += sizeof(struct cpl_tx_pkt_core); + } else { + hdrlen = skb_shinfo(skb)->gso_size ? + sizeof(struct cpl_tx_pkt_lso_core) : 0; + hdrlen += sizeof(struct cpl_tx_pkt); + } if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen) return hdrlen; return 0; @@@ -779,10 -795,11 +786,11 @@@ * Returns the number of flits needed for a Tx WR for the given Ethernet * packet, including the needed WR and CPL headers. */ - static inline unsigned int calc_tx_flits(const struct sk_buff *skb) + static inline unsigned int calc_tx_flits(const struct sk_buff *skb, + unsigned int chip_ver) { unsigned int flits; - int hdrlen = is_eth_imm(skb); + int hdrlen = is_eth_imm(skb, chip_ver);
/* If the skb is small enough, we can pump it out as a work request * with only immediate data. In that case we just have to have the @@@ -801,13 -818,20 +809,20 @@@ * with an embedded TX Packet Write CPL message. */ flits = sgl_len(skb_shinfo(skb)->nr_frags + 1); - if (skb_shinfo(skb)->gso_size) - flits += (sizeof(struct fw_eth_tx_pkt_wr) + - sizeof(struct cpl_tx_pkt_lso_core) + - sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); - else + if (skb_shinfo(skb)->gso_size) { + if (skb->encapsulation && chip_ver > CHELSIO_T5) + hdrlen = sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_tnl_lso); + else + hdrlen = sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_lso_core); + + hdrlen += sizeof(struct cpl_tx_pkt_core); + flits += (hdrlen / sizeof(__be64)); + } else { flits += (sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); + } return flits; }
@@@ -818,13 -842,14 +833,14 @@@ * Returns the number of Tx descriptors needed for the given Ethernet * packet, including the needed WR and CPL headers. */ - static inline unsigned int calc_tx_descs(const struct sk_buff *skb) + static inline unsigned int calc_tx_descs(const struct sk_buff *skb, + unsigned int chip_ver) { - return flits_to_desc(calc_tx_flits(skb)); + return flits_to_desc(calc_tx_flits(skb, chip_ver)); }
/** - * write_sgl - populate a scatter/gather list for a packet + * cxgb4_write_sgl - populate a scatter/gather list for a packet * @skb: the packet * @q: the Tx queue we are writing into * @sgl: starting location for writing the SGL @@@ -840,9 -865,9 +856,9 @@@ * right after the end of the SGL but does not account for any potential * wrap around, i.e., @end > @sgl. */ -static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, - struct ulptx_sgl *sgl, u64 *end, unsigned int start, - const dma_addr_t *addr) +void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, unsigned int start, + const dma_addr_t *addr) { unsigned int i, len; struct ulptx_sge_pair *to; @@@ -894,7 -919,6 +910,7 @@@ if ((uintptr_t)end & 8) /* 0-pad to multiple of 16 */ *end = 0; } +EXPORT_SYMBOL(cxgb4_write_sgl);
/* This function copies 64 byte coalesced work request to * memory mapped BAR2 space. For coalesced WR SGE fetches @@@ -913,14 -937,14 +929,14 @@@ static void cxgb_pio_copy(u64 __iomem * }
/** - * ring_tx_db - check and potentially ring a Tx queue's doorbell + * cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * @n: number of new descriptors to give to HW * * Ring the doorbel for a Tx queue. */ -static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) +inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { /* Make sure that all writes to the TX Descriptors are committed * before we tell the hardware about them. @@@ -987,10 -1011,9 +1003,10 @@@ wmb(); } } +EXPORT_SYMBOL(cxgb4_ring_tx_db);
/** - * inline_tx_skb - inline a packet's data into Tx descriptors + * cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors * @skb: the packet * @q: the Tx queue where the packet will be inlined * @pos: starting position in the Tx queue where to inline the packet @@@ -1000,8 -1023,8 +1016,8 @@@ * Most of the complexity of this operation is dealing with wrap arounds * in the middle of the packet we want to inline. */ -static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, - void *pos) +void cxgb4_inline_tx_skb(const struct sk_buff *skb, + const struct sge_txq *q, void *pos) { u64 *p; int left = (void *)q->stat - pos; @@@ -1023,7 -1046,6 +1039,7 @@@ if ((uintptr_t)p & 8) *p = 0; } +EXPORT_SYMBOL(cxgb4_inline_tx_skb);
static void *inline_tx_skb_header(const struct sk_buff *skb, const struct sge_txq *q, void *pos, @@@ -1148,6 -1170,105 +1164,105 @@@ cxgb_fcoe_offload(struct sk_buff *skb, } #endif /* CONFIG_CHELSIO_T4_FCOE */
+ /* Returns tunnel type if hardware supports offloading of the same. + * It is called only for T5 and onwards. + */ + enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb) + { + u8 l4_hdr = 0; + enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE; + struct port_info *pi = netdev_priv(skb->dev); + struct adapter *adapter = pi->adapter; + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB)) + return tnl_type; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_hdr = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + return tnl_type; + } + + switch (l4_hdr) { + case IPPROTO_UDP: + if (adapter->vxlan_port == udp_hdr(skb)->dest) + tnl_type = TX_TNL_TYPE_VXLAN; + else if (adapter->geneve_port == udp_hdr(skb)->dest) + tnl_type = TX_TNL_TYPE_GENEVE; + break; + default: + return tnl_type; + } + + return tnl_type; + } + + static inline void t6_fill_tnl_lso(struct sk_buff *skb, + struct cpl_tx_tnl_lso *tnl_lso, + enum cpl_tx_tnl_lso_type tnl_type) + { + u32 val; + int in_eth_xtra_len; + int l3hdr_len = skb_network_header_len(skb); + int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; + const struct skb_shared_info *ssi = skb_shinfo(skb); + bool v6 = (ip_hdr(skb)->version == 6); + + val = CPL_TX_TNL_LSO_OPCODE_V(CPL_TX_TNL_LSO) | + CPL_TX_TNL_LSO_FIRST_F | + CPL_TX_TNL_LSO_LAST_F | + (v6 ? CPL_TX_TNL_LSO_IPV6OUT_F : 0) | + CPL_TX_TNL_LSO_ETHHDRLENOUT_V(eth_xtra_len / 4) | + CPL_TX_TNL_LSO_IPHDRLENOUT_V(l3hdr_len / 4) | + (v6 ? 0 : CPL_TX_TNL_LSO_IPHDRCHKOUT_F) | + CPL_TX_TNL_LSO_IPLENSETOUT_F | + (v6 ? 0 : CPL_TX_TNL_LSO_IPIDINCOUT_F); + tnl_lso->op_to_IpIdSplitOut = htonl(val); + + tnl_lso->IpIdOffsetOut = 0; + + /* Get the tunnel header length */ + val = skb_inner_mac_header(skb) - skb_mac_header(skb); + in_eth_xtra_len = skb_inner_network_header(skb) - + skb_inner_mac_header(skb) - ETH_HLEN; + + switch (tnl_type) { + case TX_TNL_TYPE_VXLAN: + case TX_TNL_TYPE_GENEVE: + tnl_lso->UdpLenSetOut_to_TnlHdrLen = + htons(CPL_TX_TNL_LSO_UDPCHKCLROUT_F | + CPL_TX_TNL_LSO_UDPLENSETOUT_F); + break; + default: + tnl_lso->UdpLenSetOut_to_TnlHdrLen = 0; + break; + } + + tnl_lso->UdpLenSetOut_to_TnlHdrLen |= + htons(CPL_TX_TNL_LSO_TNLHDRLEN_V(val) | + CPL_TX_TNL_LSO_TNLTYPE_V(tnl_type)); + + tnl_lso->r1 = 0; + + val = CPL_TX_TNL_LSO_ETHHDRLEN_V(in_eth_xtra_len / 4) | + CPL_TX_TNL_LSO_IPV6_V(inner_ip_hdr(skb)->version == 6) | + CPL_TX_TNL_LSO_IPHDRLEN_V(skb_inner_network_header_len(skb) / 4) | + CPL_TX_TNL_LSO_TCPHDRLEN_V(inner_tcp_hdrlen(skb) / 4); + tnl_lso->Flow_to_TcpHdrLen = htonl(val); + + tnl_lso->IpIdOffset = htons(0); + + tnl_lso->IpIdSplit_to_Mss = htons(CPL_TX_TNL_LSO_MSS_V(ssi->gso_size)); + tnl_lso->TCPSeqOffset = htonl(0); + tnl_lso->EthLenOffset_Size = htonl(CPL_TX_TNL_LSO_SIZE_V(skb->len)); + } + /** * t4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet @@@ -1171,6 -1292,9 +1286,9 @@@ netdev_tx_t t4_eth_xmit(struct sk_buff bool immediate = false; int len, max_pkt_len; bool ptp_enabled = is_ptp_enabled(skb, dev); + unsigned int chip_ver; + enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE; + #ifdef CONFIG_CHELSIO_T4_FCOE int err; #endif /* CONFIG_CHELSIO_T4_FCOE */ @@@ -1193,12 -1317,6 +1311,12 @@@ out_free: dev_kfree_skb_any(skb)
pi = netdev_priv(dev); adap = pi->adapter; + ssi = skb_shinfo(skb); +#ifdef CONFIG_CHELSIO_IPSEC_INLINE + if (xfrm_offload(skb) && !ssi->gso_size) + return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev); +#endif /* CHELSIO_IPSEC_INLINE */ + qidx = skb_get_queue_mapping(skb); if (ptp_enabled) { spin_lock(&adap->ptp_lock); @@@ -1215,7 -1333,7 +1333,7 @@@ } skb_tx_timestamp(skb);
- reclaim_completed_tx(adap, &q->q, true); + cxgb4_reclaim_completed_tx(adap, &q->q, true); cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
#ifdef CONFIG_CHELSIO_T4_FCOE @@@ -1227,7 -1345,8 +1345,8 @@@ } #endif /* CONFIG_CHELSIO_T4_FCOE */
- flits = calc_tx_flits(skb); + chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); + flits = calc_tx_flits(skb, chip_ver); ndesc = flits_to_desc(flits); credits = txq_avail(&q->q) - ndesc;
@@@ -1241,11 -1360,14 +1360,14 @@@ return NETDEV_TX_BUSY; }
- if (is_eth_imm(skb)) + if (is_eth_imm(skb, chip_ver)) immediate = true;
+ if (skb->encapsulation && chip_ver > CHELSIO_T5) + tnl_type = cxgb_encap_offload_supported(skb); + if (!immediate && - unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) { q->mapping_err++; if (ptp_enabled) spin_unlock(&adap->ptp_lock); @@@ -1264,38 -1386,64 +1386,63 @@@ end = (u64 *)wr + flits;
len = immediate ? skb->len : 0; - ssi = skb_shinfo(skb); if (ssi->gso_size) { struct cpl_tx_pkt_lso *lso = (void *)wr; bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0; int l3hdr_len = skb_network_header_len(skb); int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; + struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1);
- len += sizeof(*lso); - wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | - FW_WR_IMMDLEN_V(len)); - lso->c.lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) | - LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F | - LSO_IPV6_V(v6) | - LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | - LSO_IPHDR_LEN_V(l3hdr_len / 4) | - LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); - lso->c.ipid_ofst = htons(0); - lso->c.mss = htons(ssi->gso_size); - lso->c.seqno_offset = htonl(0); - if (is_t4(adap->params.chip)) - lso->c.len = htonl(skb->len); + if (tnl_type) + len += sizeof(*tnl_lso); else - lso->c.len = htonl(LSO_T5_XFER_SIZE_V(skb->len)); - cpl = (void *)(lso + 1); + len += sizeof(*lso);
- if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) - cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len); - else - cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len); + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(len)); + if (tnl_type) { + struct iphdr *iph = ip_hdr(skb);
- cntrl |= TXPKT_CSUM_TYPE_V(v6 ? - TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | - TXPKT_IPHDR_LEN_V(l3hdr_len); + t6_fill_tnl_lso(skb, tnl_lso, tnl_type); + cpl = (void *)(tnl_lso + 1); + /* Driver is expected to compute partial checksum that + * does not include the IP Total Length. + */ + if (iph->version == 4) { + iph->check = 0; + iph->tot_len = 0; + iph->check = (u16)(~ip_fast_csum((u8 *)iph, + iph->ihl)); + } + if (skb->ip_summed == CHECKSUM_PARTIAL) + cntrl = hwcsum(adap->params.chip, skb); + } else { + lso->c.lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) | + LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F | + LSO_IPV6_V(v6) | + LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | + LSO_IPHDR_LEN_V(l3hdr_len / 4) | + LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); + lso->c.ipid_ofst = htons(0); + lso->c.mss = htons(ssi->gso_size); + lso->c.seqno_offset = htonl(0); + if (is_t4(adap->params.chip)) + lso->c.len = htonl(skb->len); + else + lso->c.len = + htonl(LSO_T5_XFER_SIZE_V(skb->len)); + cpl = (void *)(lso + 1); + + if (CHELSIO_CHIP_VERSION(adap->params.chip) + <= CHELSIO_T5) + cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len); + else + cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len); + + cntrl |= TXPKT_CSUM_TYPE_V(v6 ? + TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | + TXPKT_IPHDR_LEN_V(l3hdr_len); + } q->tso++; q->tx_cso += ssi->gso_segs; } else { @@@ -1340,13 -1488,13 +1487,13 @@@ cpl->ctrl1 = cpu_to_be64(cntrl);
if (immediate) { - inline_tx_skb(skb, &q->q, cpl + 1); + cxgb4_inline_tx_skb(skb, &q->q, cpl + 1); dev_consume_skb_any(skb); } else { int last_desc;
- write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0, - addr); + cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), + end, 0, addr); skb_orphan(skb);
last_desc = q->q.pidx + ndesc - 1; @@@ -1358,7 -1506,7 +1505,7 @@@
txq_advance(&q->q, ndesc);
- ring_tx_db(adap, &q->q, ndesc); + cxgb4_ring_tx_db(adap, &q->q, ndesc); if (ptp_enabled) spin_unlock(&adap->ptp_lock); return NETDEV_TX_OK; @@@ -1368,9 -1516,9 +1515,9 @@@ * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * - * This is a variant of reclaim_completed_tx() that is used for Tx queues - * that send only immediate data (presently just the control queues) and - * thus do not have any sk_buffs to release. + * This is a variant of cxgb4_reclaim_completed_tx() that is used + * for Tx queues that send only immediate data (presently just + * the control queues) and thus do not have any sk_buffs to release. */ static inline void reclaim_completed_tx_imm(struct sge_txq *q) { @@@ -1445,13 -1593,13 +1592,13 @@@ static int ctrl_xmit(struct sge_ctrl_tx }
wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx]; - inline_tx_skb(skb, &q->q, wr); + cxgb4_inline_tx_skb(skb, &q->q, wr);
txq_advance(&q->q, ndesc); if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) ctrlq_check_stop(q, wr);
- ring_tx_db(q->adap, &q->q, ndesc); + cxgb4_ring_tx_db(q->adap, &q->q, ndesc); spin_unlock(&q->sendq.lock);
kfree_skb(skb); @@@ -1486,7 -1634,7 +1633,7 @@@ static void restart_ctrlq(unsigned lon txq_advance(&q->q, ndesc); spin_unlock(&q->sendq.lock);
- inline_tx_skb(skb, &q->q, wr); + cxgb4_inline_tx_skb(skb, &q->q, wr); kfree_skb(skb);
if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { @@@ -1499,15 -1647,14 +1646,15 @@@ } } if (written > 16) { - ring_tx_db(q->adap, &q->q, written); + cxgb4_ring_tx_db(q->adap, &q->q, written); written = 0; } spin_lock(&q->sendq.lock); } q->full = 0; -ringdb: if (written) - ring_tx_db(q->adap, &q->q, written); +ringdb: + if (written) + cxgb4_ring_tx_db(q->adap, &q->q, written); spin_unlock(&q->sendq.lock); }
@@@ -1650,7 -1797,7 +1797,7 @@@ static void service_ofldq(struct sge_ul */ spin_unlock(&q->sendq.lock);
- reclaim_completed_tx(q->adap, &q->q, false); + cxgb4_reclaim_completed_tx(q->adap, &q->q, false);
flits = skb->priority; /* previously saved */ ndesc = flits_to_desc(flits); @@@ -1661,9 -1808,9 +1808,9 @@@
pos = (u64 *)&q->q.desc[q->q.pidx]; if (is_ofld_imm(skb)) - inline_tx_skb(skb, &q->q, pos); - else if (map_skb(q->adap->pdev_dev, skb, - (dma_addr_t *)skb->head)) { + cxgb4_inline_tx_skb(skb, &q->q, pos); + else if (cxgb4_map_skb(q->adap->pdev_dev, skb, + (dma_addr_t *)skb->head)) { txq_stop_maperr(q); spin_lock(&q->sendq.lock); break; @@@ -1694,9 -1841,9 +1841,9 @@@ pos = (void *)txq->desc; }
- write_sgl(skb, &q->q, (void *)pos, - end, hdr_len, - (dma_addr_t *)skb->head); + cxgb4_write_sgl(skb, &q->q, (void *)pos, + end, hdr_len, + (dma_addr_t *)skb->head); #ifdef CONFIG_NEED_DMA_MAP_STATE skb->dev = q->adap->port[0]; skb->destructor = deferred_unmap_destructor; @@@ -1710,7 -1857,7 +1857,7 @@@ txq_advance(&q->q, ndesc); written += ndesc; if (unlikely(written > 32)) { - ring_tx_db(q->adap, &q->q, written); + cxgb4_ring_tx_db(q->adap, &q->q, written); written = 0; }
@@@ -1725,7 -1872,7 +1872,7 @@@ kfree_skb(skb); } if (likely(written)) - ring_tx_db(q->adap, &q->q, written); + cxgb4_ring_tx_db(q->adap, &q->q, written);
/*Indicate that no thread is processing the Pending Send Queue * currently. diff --combined drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index be3658301832,f88766d2401d..0d83b4064a78 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@@ -513,13 -513,6 +513,13 @@@ struct fw_ulptx_wr u64 cookie; };
+#define FW_ULPTX_WR_DATA_S 28 +#define FW_ULPTX_WR_DATA_M 0x1 +#define FW_ULPTX_WR_DATA_V(x) ((x) << FW_ULPTX_WR_DATA_S) +#define FW_ULPTX_WR_DATA_G(x) \ + (((x) >> FW_ULPTX_WR_DATA_S) & FW_ULPTX_WR_DATA_M) +#define FW_ULPTX_WR_DATA_F FW_ULPTX_WR_DATA_V(1U) + struct fw_tp_wr { __be32 op_to_immdlen; __be32 flowid_len16; @@@ -835,6 -828,7 +835,7 @@@ enum fw_ldst_addrspc FW_LDST_ADDRSPC_MPS = 0x0020, FW_LDST_ADDRSPC_FUNC = 0x0028, FW_LDST_ADDRSPC_FUNC_PCIE = 0x0029, + FW_LDST_ADDRSPC_I2C = 0x0038, };
enum fw_ldst_mps_fid { @@@ -2066,6 -2060,7 +2067,7 @@@ struct fw_vi_cmd #define FW_VI_MAC_ADD_MAC 0x3FF #define FW_VI_MAC_ADD_PERSIST_MAC 0x3FE #define FW_VI_MAC_MAC_BASED_FREE 0x3FD + #define FW_VI_MAC_ID_BASED_FREE 0x3FC #define FW_CLS_TCAM_NUM_ENTRIES 336
enum fw_vi_mac_smac { @@@ -2082,6 -2077,13 +2084,13 @@@ enum fw_vi_mac_result FW_VI_MAC_R_F_ACL_CHECK };
+ enum fw_vi_mac_entry_types { + FW_VI_MAC_TYPE_EXACTMAC, + FW_VI_MAC_TYPE_HASHVEC, + FW_VI_MAC_TYPE_RAW, + FW_VI_MAC_TYPE_EXACTMAC_VNI, + }; + struct fw_vi_mac_cmd { __be32 op_to_viid; __be32 freemacs_to_len16; @@@ -2093,6 -2095,13 +2102,13 @@@ struct fw_vi_mac_hash { __be64 hashvec; } hash; + struct fw_vi_mac_raw { + __be32 raw_idx_pkd; + __be32 data0_pkd; + __be32 data1[2]; + __be64 data0m_pkd; + __be32 data1m[2]; + } raw; } u; };
@@@ -2102,6 -2111,12 +2118,12 @@@ #define FW_VI_MAC_CMD_FREEMACS_S 31 #define FW_VI_MAC_CMD_FREEMACS_V(x) ((x) << FW_VI_MAC_CMD_FREEMACS_S)
+ #define FW_VI_MAC_CMD_ENTRY_TYPE_S 23 + #define FW_VI_MAC_CMD_ENTRY_TYPE_M 0x7 + #define FW_VI_MAC_CMD_ENTRY_TYPE_V(x) ((x) << FW_VI_MAC_CMD_ENTRY_TYPE_S) + #define FW_VI_MAC_CMD_ENTRY_TYPE_G(x) \ + (((x) >> FW_VI_MAC_CMD_ENTRY_TYPE_S) & FW_VI_MAC_CMD_ENTRY_TYPE_M) + #define FW_VI_MAC_CMD_HASHVECEN_S 23 #define FW_VI_MAC_CMD_HASHVECEN_V(x) ((x) << FW_VI_MAC_CMD_HASHVECEN_S) #define FW_VI_MAC_CMD_HASHVECEN_F FW_VI_MAC_CMD_HASHVECEN_V(1U) @@@ -2128,6 -2143,12 +2150,12 @@@ #define FW_VI_MAC_CMD_IDX_G(x) \ (((x) >> FW_VI_MAC_CMD_IDX_S) & FW_VI_MAC_CMD_IDX_M)
+ #define FW_VI_MAC_CMD_RAW_IDX_S 16 + #define FW_VI_MAC_CMD_RAW_IDX_M 0xffff + #define FW_VI_MAC_CMD_RAW_IDX_V(x) ((x) << FW_VI_MAC_CMD_RAW_IDX_S) + #define FW_VI_MAC_CMD_RAW_IDX_G(x) \ + (((x) >> FW_VI_MAC_CMD_RAW_IDX_S) & FW_VI_MAC_CMD_RAW_IDX_M) + #define FW_RXMODE_MTU_NO_CHG 65535
struct fw_vi_rxmode_cmd { @@@ -2332,14 -2353,22 +2360,22 @@@ struct fw_acl_vlan_cmd #define FW_ACL_VLAN_CMD_VFN_S 0 #define FW_ACL_VLAN_CMD_VFN_V(x) ((x) << FW_ACL_VLAN_CMD_VFN_S)
- #define FW_ACL_VLAN_CMD_EN_S 31 - #define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S) + #define FW_ACL_VLAN_CMD_EN_S 31 + #define FW_ACL_VLAN_CMD_EN_M 0x1 + #define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S) + #define FW_ACL_VLAN_CMD_EN_G(x) \ + (((x) >> S_FW_ACL_VLAN_CMD_EN_S) & FW_ACL_VLAN_CMD_EN_M) + #define FW_ACL_VLAN_CMD_EN_F FW_ACL_VLAN_CMD_EN_V(1U)
#define FW_ACL_VLAN_CMD_DROPNOVLAN_S 7 #define FW_ACL_VLAN_CMD_DROPNOVLAN_V(x) ((x) << FW_ACL_VLAN_CMD_DROPNOVLAN_S)
- #define FW_ACL_VLAN_CMD_FM_S 6 - #define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S) + #define FW_ACL_VLAN_CMD_FM_S 6 + #define FW_ACL_VLAN_CMD_FM_M 0x1 + #define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S) + #define FW_ACL_VLAN_CMD_FM_G(x) \ + (((x) >> FW_ACL_VLAN_CMD_FM_S) & FW_ACL_VLAN_CMD_FM_M) + #define FW_ACL_VLAN_CMD_FM_F FW_ACL_VLAN_CMD_FM_V(1U)
/* old 16-bit port capabilities bitmap (fw_port_cap16_t) */ enum fw_port_cap { @@@ -2835,6 -2864,7 +2871,7 @@@ enum fw_port_type FW_PORT_TYPE_CR2_QSFP, FW_PORT_TYPE_SFP28, FW_PORT_TYPE_KR_SFP28, + FW_PORT_TYPE_KR_XLAUI,
FW_PORT_TYPE_NONE = FW_PORT_CMD_PTYPE_M }; diff --combined drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 5bbcaf8298f6,c728ffa095de..9df231b6ee89 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@@ -241,13 -241,12 +241,13 @@@ static int dwmac4_rx_check_timestamp(vo u32 own, ctxt; int ret = 1;
- own = p->des3 & RDES3_OWN; - ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR) + own = le32_to_cpu(p->des3) & RDES3_OWN; + ctxt = ((le32_to_cpu(p->des3) & RDES3_CONTEXT_DESCRIPTOR) >> RDES3_CONTEXT_DESCRIPTOR_SHIFT);
if (likely(!own && ctxt)) { - if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff)) + if ((p->des0 == cpu_to_le32(0xffffffff)) && + (p->des1 == cpu_to_le32(0xffffffff))) /* Corrupted value */ ret = -EINVAL; else @@@ -266,7 -265,7 +266,7 @@@ static int dwmac4_wrback_get_rx_timesta int ret = -EINVAL;
/* Get the status from normal w/b descriptor */ - if (likely(p->des3 & TDES3_RS1V)) { + if (likely(p->des3 & cpu_to_le32(TDES3_RS1V))) { if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) { int i = 0;
@@@ -335,7 -334,7 +335,7 @@@ static void dwmac4_rd_prepare_tx_desc(s if (tx_own) tdes3 |= TDES3_OWN;
- if (is_fs & tx_own) + if (is_fs && tx_own) /* When the own bit, for the first frame, has to be set, all * descriptors for the same frame has to be set before, to * avoid race condition. @@@ -378,7 -377,7 +378,7 @@@ static void dwmac4_rd_prepare_tso_tx_de if (tx_own) tdes3 |= TDES3_OWN;
- if (is_fs & tx_own) + if (is_fs && tx_own) /* When the own bit, for the first frame, has to be set, all * descriptors for the same frame has to be set before, to * avoid race condition. @@@ -407,7 -406,7 +407,7 @@@ static void dwmac4_display_ring(void *h pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
for (i = 0; i < size; i++) { - pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", i, (unsigned int)virt_to_phys(p), le32_to_cpu(p->des0), le32_to_cpu(p->des1), le32_to_cpu(p->des2), le32_to_cpu(p->des3)); diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 323464576fc0,7ad841434ec8..d8001fd2e4f1 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@@ -2003,22 -2003,60 +2003,60 @@@ static void stmmac_set_dma_operation_mo static void stmmac_dma_interrupt(struct stmmac_priv *priv) { u32 tx_channel_count = priv->plat->tx_queues_to_use; - int status; + u32 rx_channel_count = priv->plat->rx_queues_to_use; + u32 channels_to_check = tx_channel_count > rx_channel_count ? + tx_channel_count : rx_channel_count; u32 chan; + bool poll_scheduled = false; + int status[channels_to_check]; + + /* Each DMA channel can be used for rx and tx simultaneously, yet + * napi_struct is embedded in struct stmmac_rx_queue rather than in a + * stmmac_channel struct. + * Because of this, stmmac_poll currently checks (and possibly wakes) + * all tx queues rather than just a single tx queue. + */ + for (chan = 0; chan < channels_to_check; chan++) + status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr, + &priv->xstats, + chan);
- for (chan = 0; chan < tx_channel_count; chan++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; + for (chan = 0; chan < rx_channel_count; chan++) { + if (likely(status[chan] & handle_rx)) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
- status = priv->hw->dma->dma_interrupt(priv->ioaddr, - &priv->xstats, chan); - if (likely((status & handle_rx)) || (status & handle_tx)) { if (likely(napi_schedule_prep(&rx_q->napi))) { stmmac_disable_dma_irq(priv, chan); __napi_schedule(&rx_q->napi); + poll_scheduled = true; } } + }
- if (unlikely(status & tx_hard_error_bump_tc)) { + /* If we scheduled poll, we already know that tx queues will be checked. + * If we didn't schedule poll, see if any DMA channel (used by tx) has a + * completed transmission, if so, call stmmac_poll (once). + */ + if (!poll_scheduled) { + for (chan = 0; chan < tx_channel_count; chan++) { + if (status[chan] & handle_tx) { + /* It doesn't matter what rx queue we choose + * here. We use 0 since it always exists. + */ + struct stmmac_rx_queue *rx_q = + &priv->rx_queue[0]; + + if (likely(napi_schedule_prep(&rx_q->napi))) { + stmmac_disable_dma_irq(priv, chan); + __napi_schedule(&rx_q->napi); + } + break; + } + } + } + + for (chan = 0; chan < tx_channel_count; chan++) { + if (unlikely(status[chan] & tx_hard_error_bump_tc)) { /* Try to bump up the dma threshold on this failure */ if (unlikely(priv->xstats.threshold != SF_DMA_MODE) && (tc <= 256)) { @@@ -2035,7 -2073,7 +2073,7 @@@ chan); priv->xstats.threshold = tc; } - } else if (unlikely(status == tx_hard_error)) { + } else if (unlikely(status[chan] == tx_hard_error)) { stmmac_tx_err(priv, chan); } } @@@ -2489,7 -2527,7 +2527,7 @@@ static int stmmac_hw_setup(struct net_d }
/* Initialize the MAC Core */ - priv->hw->mac->core_init(priv->hw, dev->mtu); + priv->hw->mac->core_init(priv->hw, dev);
/* Initialize MTL*/ if (priv->synopsys_id >= DWMAC_CORE_4_00) @@@ -2539,7 -2577,7 +2577,7 @@@ }
if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane) - priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0); + priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1, priv->hw->ps, 0);
/* set TX and RX rings length */ stmmac_set_rings_length(priv); @@@ -3404,9 -3442,8 +3442,8 @@@ static int stmmac_rx(struct stmmac_pri if (netif_msg_rx_status(priv)) { netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n", p, entry, des); - if (frame_len > ETH_FRAME_LEN) - netdev_dbg(priv->dev, "frame size %d, COE: %d\n", - frame_len, status); + netdev_dbg(priv->dev, "frame size %d, COE: %d\n", + frame_len, status); }
/* The zero-copy is always used for all the sizes diff --combined drivers/net/tap.c index f39c6f876e67,77872699c45d..0a5ed004781c --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@@ -330,9 -330,6 +330,6 @@@ rx_handler_result_t tap_handle_frame(st if (!q) return RX_HANDLER_PASS;
- if (__skb_array_full(&q->skb_array)) - goto drop; - skb_push(skb, ETH_HLEN);
/* Apply the forward feature mask so that we perform segmentation @@@ -348,7 -345,7 +345,7 @@@ goto drop;
if (!segs) { - if (skb_array_produce(&q->skb_array, skb)) + if (ptr_ring_produce(&q->ring, skb)) goto drop; goto wake_up; } @@@ -358,7 -355,7 +355,7 @@@ struct sk_buff *nskb = segs->next;
segs->next = NULL; - if (skb_array_produce(&q->skb_array, segs)) { + if (ptr_ring_produce(&q->ring, segs)) { kfree_skb(segs); kfree_skb_list(nskb); break; @@@ -375,7 -372,7 +372,7 @@@ !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; - if (skb_array_produce(&q->skb_array, skb)) + if (ptr_ring_produce(&q->ring, skb)) goto drop; }
@@@ -497,7 -494,7 +494,7 @@@ static void tap_sock_destruct(struct so { struct tap_queue *q = container_of(sk, struct tap_queue, sk);
- skb_array_cleanup(&q->skb_array); + ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb); }
static int tap_open(struct inode *inode, struct file *file) @@@ -517,7 -514,7 +514,7 @@@ &tap_proto, 0); if (!q) goto err; - if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { + if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) { sk_free(&q->sk); goto err; } @@@ -546,7 -543,7 +543,7 @@@
err = tap_set_queue(tap, file, q); if (err) { - /* tap_sock_destruct() will take care of freeing skb_array */ + /* tap_sock_destruct() will take care of freeing ptr_ring */ goto err_put; }
@@@ -572,10 -569,10 +569,10 @@@ static int tap_release(struct inode *in return 0; }
-static unsigned int tap_poll(struct file *file, poll_table *wait) +static __poll_t tap_poll(struct file *file, poll_table *wait) { struct tap_queue *q = file->private_data; - unsigned int mask = POLLERR; + __poll_t mask = POLLERR;
if (!q) goto out; @@@ -583,7 -580,7 +580,7 @@@ mask = 0; poll_wait(file, &q->wq.wait, wait);
- if (!skb_array_empty(&q->skb_array)) + if (!ptr_ring_empty(&q->ring)) mask |= POLLIN | POLLRDNORM;
if (sock_writeable(&q->sk) || @@@ -844,7 -841,7 +841,7 @@@ static ssize_t tap_do_read(struct tap_q TASK_INTERRUPTIBLE);
/* Read frames from the queue */ - skb = skb_array_consume(&q->skb_array); + skb = ptr_ring_consume(&q->ring); if (skb) break; if (noblock) { @@@ -1176,7 -1173,7 +1173,7 @@@ static int tap_peek_len(struct socket * { struct tap_queue *q = container_of(sock, struct tap_queue, sock); - return skb_array_peek_len(&q->skb_array); + return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag); }
/* Ops structure to mimic raw sockets with tun */ @@@ -1202,7 -1199,7 +1199,7 @@@ struct socket *tap_get_socket(struct fi } EXPORT_SYMBOL_GPL(tap_get_socket);
- struct skb_array *tap_get_skb_array(struct file *file) + struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q;
@@@ -1211,29 -1208,30 +1208,30 @@@ q = file->private_data; if (!q) return ERR_PTR(-EBADFD); - return &q->skb_array; + return &q->ring; } - EXPORT_SYMBOL_GPL(tap_get_skb_array); + EXPORT_SYMBOL_GPL(tap_get_ptr_ring);
int tap_queue_resize(struct tap_dev *tap) { struct net_device *dev = tap->dev; struct tap_queue *q; - struct skb_array **arrays; + struct ptr_ring **rings; int n = tap->numqueues; int ret, i = 0;
- arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); - if (!arrays) + rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); + if (!rings) return -ENOMEM;
list_for_each_entry(q, &tap->queue_list, next) - arrays[i++] = &q->skb_array; + rings[i++] = &q->ring;
- ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); + ret = ptr_ring_resize_multiple(rings, n, + dev->tx_queue_len, GFP_KERNEL, + __skb_array_destroy_skb);
- kfree(arrays); + kfree(rings); return ret; } EXPORT_SYMBOL_GPL(tap_queue_resize); diff --combined drivers/net/tun.c index 7548d8a11bdf,a0c5cb1a1617..0dc66e4fbb2c --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@@ -179,7 -179,8 +179,8 @@@ struct tun_file struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; - struct skb_array tx_array; + struct ptr_ring tx_ring; + struct xdp_rxq_info xdp_rxq; };
struct tun_flow_entry { @@@ -195,6 -196,11 +196,11 @@@
#define TUN_NUM_FLOW_ENTRIES 1024
+ struct tun_prog { + struct rcu_head rcu; + struct bpf_prog *prog; + }; + /* Since the socket were moved to tun_file, to preserve the behavior of persist * device, socket filter, sndbuf and vnet header size were restore when the * file were attached to a persist device. @@@ -232,8 -238,33 +238,33 @@@ struct tun_struct u32 rx_batched; struct tun_pcpu_stats __percpu *pcpu_stats; struct bpf_prog __rcu *xdp_prog; + struct tun_prog __rcu *steering_prog; + struct tun_prog __rcu *filter_prog; };
+ struct veth { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + + bool tun_is_xdp_buff(void *ptr) + { + return (unsigned long)ptr & TUN_XDP_FLAG; + } + EXPORT_SYMBOL(tun_is_xdp_buff); + + void *tun_xdp_to_ptr(void *ptr) + { + return (void *)((unsigned long)ptr | TUN_XDP_FLAG); + } + EXPORT_SYMBOL(tun_xdp_to_ptr); + + void *tun_ptr_to_xdp(void *ptr) + { + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); + } + EXPORT_SYMBOL(tun_ptr_to_xdp); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@@ -537,15 -568,12 +568,12 @@@ static inline void tun_flow_save_rps_rx * different rxq no. here. If we could not get rxhash, then we would * hope the rxq no. may help here. */ - static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback) + static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) { - struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; u32 txq = 0; u32 numqueues = 0;
- rcu_read_lock(); numqueues = READ_ONCE(tun->numqueues);
txq = __skb_get_hash_symmetric(skb); @@@ -563,10 -591,37 +591,37 @@@ txq -= numqueues; }
- rcu_read_unlock(); return txq; }
+ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) + { + struct tun_prog *prog; + u16 ret = 0; + + prog = rcu_dereference(tun->steering_prog); + if (prog) + ret = bpf_prog_run_clear_cb(prog->prog, skb); + + return ret % tun->numqueues; + } + + static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) + { + struct tun_struct *tun = netdev_priv(dev); + u16 ret; + + rcu_read_lock(); + if (rcu_dereference(tun->steering_prog)) + ret = tun_ebpf_select_queue(tun, skb); + else + ret = tun_automq_select_queue(tun, skb); + rcu_read_unlock(); + + return ret; + } + static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); @@@ -600,22 -655,36 +655,36 @@@ static struct tun_struct *tun_enable_qu return tun; }
+ static void tun_ptr_free(void *ptr) + { + if (!ptr) + return; + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + put_page(virt_to_head_page(xdp->data)); + } else { + __skb_array_destroy_skb(ptr); + } + } + static void tun_queue_purge(struct tun_file *tfile) { - struct sk_buff *skb; + void *ptr;
- while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) - kfree_skb(skb); + while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL) + tun_ptr_free(ptr);
skb_queue_purge(&tfile->sk.sk_write_queue); skb_queue_purge(&tfile->sk.sk_error_queue); }
- static void tun_cleanup_tx_array(struct tun_file *tfile) + static void tun_cleanup_tx_ring(struct tun_file *tfile) { - if (tfile->tx_array.ring.queue) { - skb_array_cleanup(&tfile->tx_array); - memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + if (tfile->tx_ring.queue) { + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); + xdp_rxq_info_unreg(&tfile->xdp_rxq); + memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); } }
@@@ -665,7 -734,7 +734,7 @@@ static void __tun_detach(struct tun_fil tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - tun_cleanup_tx_array(tfile); + tun_cleanup_tx_ring(tfile); sock_put(&tfile->sk); } } @@@ -680,7 -749,6 +749,6 @@@ static void tun_detach(struct tun_file static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog); struct tun_file *tfile, *tmp; int i, n = tun->numqueues;
@@@ -707,19 -775,16 +775,16 @@@ /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); - tun_cleanup_tx_array(tfile); + tun_cleanup_tx_ring(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); sock_put(&tfile->sk); - tun_cleanup_tx_array(tfile); + tun_cleanup_tx_ring(tfile); } BUG_ON(tun->numdisabled != 0);
- if (xdp_prog) - bpf_prog_put(xdp_prog); - if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } @@@ -760,13 -825,29 +825,29 @@@ static int tun_attach(struct tun_struc }
if (!tfile->detached && - skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { + ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) { err = -ENOMEM; goto out; }
tfile->queue_index = tun->numqueues; tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; + + if (tfile->detached) { + /* Re-attach detached tfile, updating XDP queue_index */ + WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq)); + + if (tfile->xdp_rxq.queue_index != tfile->queue_index) + tfile->xdp_rxq.queue_index = tfile->queue_index; + } else { + /* Setup XDP RX-queue info, for new tfile getting attached */ + err = xdp_rxq_info_reg(&tfile->xdp_rxq, + tun->dev, tfile->queue_index); + if (err < 0) + goto out; + err = 0; + } + rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; @@@ -946,23 -1027,10 +1027,10 @@@ static int tun_net_close(struct net_dev }
/* Net device start xmit */ - static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) + static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) { - struct tun_struct *tun = netdev_priv(dev); - int txq = skb->queue_mapping; - struct tun_file *tfile; - u32 numqueues = 0; - - rcu_read_lock(); - tfile = rcu_dereference(tun->tfiles[txq]); - numqueues = READ_ONCE(tun->numqueues); - - /* Drop packet if interface is not attached */ - if (txq >= numqueues) - goto drop; - #ifdef CONFIG_RPS - if (numqueues == 1 && static_key_false(&rps_needed)) { + if (tun->numqueues == 1 && static_key_false(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ @@@ -978,6 -1046,37 +1046,37 @@@ } } #endif + } + + static unsigned int run_ebpf_filter(struct tun_struct *tun, + struct sk_buff *skb, + int len) + { + struct tun_prog *prog = rcu_dereference(tun->filter_prog); + + if (prog) + len = bpf_prog_run_clear_cb(prog->prog, skb); + + return len; + } + + /* Net device start xmit */ + static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct tun_struct *tun = netdev_priv(dev); + int txq = skb->queue_mapping; + struct tun_file *tfile; + int len = skb->len; + + rcu_read_lock(); + tfile = rcu_dereference(tun->tfiles[txq]); + + /* Drop packet if interface is not attached */ + if (txq >= tun->numqueues) + goto drop; + + if (!rcu_dereference(tun->steering_prog)) + tun_automq_xmit(tun, skb);
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
@@@ -993,6 -1092,15 +1092,15 @@@ sk_filter(tfile->socket.sk, skb)) goto drop;
+ len = run_ebpf_filter(tun, skb, len); + + /* Trim extra bytes since we may insert vlan proto & TCI + * in tun_put_user(). + */ + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0; + if (len <= 0 || pskb_trim(skb, len)) + goto drop; + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) goto drop;
@@@ -1005,7 -1113,7 +1113,7 @@@
nf_reset(skb);
- if (skb_array_produce(&tfile->tx_array, skb)) + if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop;
/* Notify and wake up reader process */ @@@ -1178,6 -1286,67 +1286,67 @@@ static const struct net_device_ops tun_ .ndo_get_stats64 = tun_net_get_stats64, };
+ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) + { + struct tun_struct *tun = netdev_priv(dev); + struct xdp_buff *buff = xdp->data_hard_start; + int headroom = xdp->data - xdp->data_hard_start; + struct tun_file *tfile; + u32 numqueues; + int ret = 0; + + /* Assure headroom is available and buff is properly aligned */ + if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp))) + return -ENOSPC; + + *buff = *xdp; + + rcu_read_lock(); + + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) { + ret = -ENOSPC; + goto out; + } + + tfile = rcu_dereference(tun->tfiles[smp_processor_id() % + numqueues]); + /* Encode the XDP flag into lowest bit for consumer to differ + * XDP buffer from sk_buff. + */ + if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) { + this_cpu_inc(tun->pcpu_stats->tx_dropped); + ret = -ENOSPC; + } + + out: + rcu_read_unlock(); + return ret; + } + + static void tun_xdp_flush(struct net_device *dev) + { + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile; + u32 numqueues; + + rcu_read_lock(); + + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) + goto out; + + tfile = rcu_dereference(tun->tfiles[smp_processor_id() % + numqueues]); + /* Notify and wake up reader process */ + if (tfile->flags & TUN_FASYNC) + kill_fasync(&tfile->fasync, SIGIO, POLL_IN); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + + out: + rcu_read_unlock(); + } + static const struct net_device_ops tap_netdev_ops = { .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, @@@ -1195,6 -1364,8 +1364,8 @@@ .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, .ndo_bpf = tun_xdp, + .ndo_xdp_xmit = tun_xdp_xmit, + .ndo_xdp_flush = tun_xdp_flush, };
static void tun_flow_init(struct tun_struct *tun) @@@ -1257,12 -1428,12 +1428,12 @@@ static void tun_net_init(struct net_dev /* Character device part */
/* Poll */ -static unsigned int tun_chr_poll(struct file *file, poll_table *wait) +static __poll_t tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); struct sock *sk; - unsigned int mask = 0; + __poll_t mask = 0;
if (!tun) return POLLERR; @@@ -1273,7 -1444,7 +1444,7 @@@
poll_wait(file, sk_sleep(sk), wait);
- if (!skb_array_empty(&tfile->tx_array)) + if (!ptr_ring_empty(&tfile->tx_ring)) mask |= POLLIN | POLLRDNORM;
if (tun->dev->flags & IFF_UP && @@@ -1486,6 -1657,7 +1657,7 @@@ static struct sk_buff *tun_build_skb(st xdp.data = buf + pad; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; + xdp.rxq = &tfile->xdp_rxq; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@@ -1560,7 -1732,7 +1732,7 @@@ static ssize_t tun_get_user(struct tun_ int copylen; bool zerocopy = false; int err; - u32 rxhash; + u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tun);
@@@ -1748,7 -1920,10 +1920,10 @@@ rcu_read_unlock(); }
- rxhash = __skb_get_hash_symmetric(skb); + rcu_read_lock(); + if (!rcu_dereference(tun->steering_prog)) + rxhash = __skb_get_hash_symmetric(skb); + rcu_read_unlock();
if (frags) { /* Exercise flow dissector code path. */ @@@ -1792,7 -1967,9 +1967,9 @@@ u64_stats_update_end(&stats->syncp); put_cpu_ptr(stats);
- tun_flow_update(tun, rxhash, tfile); + if (rxhash) + tun_flow_update(tun, rxhash, tfile); + return total_len; }
@@@ -1813,6 -1990,40 +1990,40 @@@ static ssize_t tun_chr_write_iter(struc return result; }
+ static ssize_t tun_put_user_xdp(struct tun_struct *tun, + struct tun_file *tfile, + struct xdp_buff *xdp, + struct iov_iter *iter) + { + int vnet_hdr_sz = 0; + size_t size = xdp->data_end - xdp->data; + struct tun_pcpu_stats *stats; + size_t ret; + + if (tun->flags & IFF_VNET_HDR) { + struct virtio_net_hdr gso = { 0 }; + + vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) + return -EINVAL; + if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != + sizeof(gso))) + return -EFAULT; + iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + } + + ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz; + + stats = get_cpu_ptr(tun->pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += ret; + u64_stats_update_end(&stats->syncp); + put_cpu_ptr(tun->pcpu_stats); + + return ret; + } + /* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_file *tfile, @@@ -1877,10 -2088,7 +2088,7 @@@
if (vlan_hlen) { int ret; - struct { - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - } veth; + struct veth veth;
veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); @@@ -1910,15 -2118,14 +2118,14 @@@ done return total; }
- static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, - int *err) + static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) { DECLARE_WAITQUEUE(wait, current); - struct sk_buff *skb = NULL; + void *ptr = NULL; int error = 0;
- skb = skb_array_consume(&tfile->tx_array); - if (skb) + ptr = ptr_ring_consume(&tfile->tx_ring); + if (ptr) goto out; if (noblock) { error = -EAGAIN; @@@ -1929,8 -2136,8 +2136,8 @@@ current->state = TASK_INTERRUPTIBLE;
while (1) { - skb = skb_array_consume(&tfile->tx_array); - if (skb) + ptr = ptr_ring_consume(&tfile->tx_ring); + if (ptr) break; if (signal_pending(current)) { error = -ERESTARTSYS; @@@ -1949,12 -2156,12 +2156,12 @@@
out: *err = error; - return skb; + return ptr; }
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *to, - int noblock, struct sk_buff *skb) + int noblock, void *ptr) { ssize_t ret; int err; @@@ -1962,23 -2169,31 +2169,31 @@@ tun_debug(KERN_INFO, tun, "tun_do_read\n");
if (!iov_iter_count(to)) { - if (skb) - kfree_skb(skb); + tun_ptr_free(ptr); return 0; }
- if (!skb) { + if (!ptr) { /* Read frames from ring */ - skb = tun_ring_recv(tfile, noblock, &err); - if (!skb) + ptr = tun_ring_recv(tfile, noblock, &err); + if (!ptr) return err; }
- ret = tun_put_user(tun, tfile, skb, to); - if (unlikely(ret < 0)) - kfree_skb(skb); - else - consume_skb(skb); + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + ret = tun_put_user_xdp(tun, tfile, xdp, to); + put_page(virt_to_head_page(xdp->data)); + } else { + struct sk_buff *skb = ptr; + + ret = tun_put_user(tun, tfile, skb, to); + if (unlikely(ret < 0)) + kfree_skb(skb); + else + consume_skb(skb); + }
return ret; } @@@ -2000,6 -2215,39 +2215,39 @@@ static ssize_t tun_chr_read_iter(struc return ret; }
+ static void tun_prog_free(struct rcu_head *rcu) + { + struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu); + + bpf_prog_destroy(prog->prog); + kfree(prog); + } + + static int __tun_set_ebpf(struct tun_struct *tun, + struct tun_prog __rcu **prog_p, + struct bpf_prog *prog) + { + struct tun_prog *old, *new = NULL; + + if (prog) { + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->prog = prog; + } + + spin_lock_bh(&tun->lock); + old = rcu_dereference_protected(*prog_p, + lockdep_is_held(&tun->lock)); + rcu_assign_pointer(*prog_p, new); + spin_unlock_bh(&tun->lock); + + if (old) + call_rcu(&old->rcu, tun_prog_free); + + return 0; + } + static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@@ -2008,6 -2256,8 +2256,8 @@@ free_percpu(tun->pcpu_stats); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); + __tun_set_ebpf(tun, &tun->steering_prog, NULL); + __tun_set_ebpf(tun, &tun->filter_prog, NULL); }
static void tun_setup(struct net_device *dev) @@@ -2081,12 -2331,12 +2331,12 @@@ static int tun_recvmsg(struct socket *s { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); - struct sk_buff *skb = m->msg_control; + void *ptr = m->msg_control; int ret;
if (!tun) { ret = -EBADFD; - goto out_free_skb; + goto out_free; }
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { @@@ -2098,7 -2348,7 +2348,7 @@@ SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); + ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; @@@ -2109,12 -2359,25 +2359,25 @@@ out
out_put_tun: tun_put(tun); - out_free_skb: - if (skb) - kfree_skb(skb); + out_free: + tun_ptr_free(ptr); return ret; }
+ static int tun_ptr_peek_len(void *ptr) + { + if (likely(ptr)) { + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + return xdp->data_end - xdp->data; + } + return __skb_array_len_with_tag(ptr); + } else { + return 0; + } + } + static int tun_peek_len(struct socket *sock) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); @@@ -2125,7 -2388,7 +2388,7 @@@ if (!tun) return 0;
- ret = skb_array_peek_len(&tfile->tx_array); + ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len); tun_put(tun);
return ret; @@@ -2296,6 -2559,7 +2559,7 @@@ static int tun_set_iff(struct net *net tun->filter_attached = false; tun->sndbuf = tfile->socket.sk->sk_sndbuf; tun->rx_batched = 0; + RCU_INIT_POINTER(tun->steering_prog, NULL);
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); if (!tun->pcpu_stats) { @@@ -2488,6 -2752,26 +2752,26 @@@ unlock return ret; }
+ static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p, + void __user *data) + { + struct bpf_prog *prog; + int fd; + + if (copy_from_user(&fd, data, sizeof(fd))) + return -EFAULT; + + if (fd == -1) { + prog = NULL; + } else { + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + return __tun_set_ebpf(tun, prog_p, prog); + } + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@@ -2764,6 -3048,14 +3048,14 @@@ ret = 0; break;
+ case TUNSETSTEERINGEBPF: + ret = tun_set_ebpf(tun, &tun->steering_prog, argp); + break; + + case TUNSETFILTEREBPF: + ret = tun_set_ebpf(tun, &tun->filter_prog, argp); + break; + default: ret = -EINVAL; break; @@@ -2860,7 -3152,7 +3152,7 @@@ static int tun_chr_open(struct inode *i
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
- memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
return 0; } @@@ -3009,25 -3301,26 +3301,26 @@@ static int tun_queue_resize(struct tun_ { struct net_device *dev = tun->dev; struct tun_file *tfile; - struct skb_array **arrays; + struct ptr_ring **rings; int n = tun->numqueues + tun->numdisabled; int ret, i;
- arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); - if (!arrays) + rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); + if (!rings) return -ENOMEM;
for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - arrays[i] = &tfile->tx_array; + rings[i] = &tfile->tx_ring; } list_for_each_entry(tfile, &tun->disabled, next) - arrays[i++] = &tfile->tx_array; + rings[i++] = &tfile->tx_ring;
- ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); + ret = ptr_ring_resize_multiple(rings, n, + dev->tx_queue_len, GFP_KERNEL, + tun_ptr_free);
- kfree(arrays); + kfree(rings); return ret; }
@@@ -3113,7 -3406,7 +3406,7 @@@ struct socket *tun_get_socket(struct fi } EXPORT_SYMBOL_GPL(tun_get_socket);
- struct skb_array *tun_get_skb_array(struct file *file) + struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile;
@@@ -3122,9 -3415,9 +3415,9 @@@ tfile = file->private_data; if (!tfile) return ERR_PTR(-EBADFD); - return &tfile->tx_array; + return &tfile->tx_ring; } - EXPORT_SYMBOL_GPL(tun_get_skb_array); + EXPORT_SYMBOL_GPL(tun_get_tx_ring);
module_init(tun_init); module_exit(tun_cleanup); diff --combined drivers/pci/pci.c index 006814babdce,764ca7b8840d..f6a4dd10d9b0 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@@ -1,4 -1,3 +1,4 @@@ +// SPDX-License-Identifier: GPL-2.0 /* * PCI Bus Services, see include/linux/pci.h for further explanation. * @@@ -157,7 -156,7 +157,7 @@@ void __iomem *pci_ioremap_bar(struct pc * Make sure the BAR is actually a memory resource, not an IO resource */ if (res->flags & IORESOURCE_UNSET || !(res->flags & IORESOURCE_MEM)) { - dev_warn(&pdev->dev, "can't ioremap BAR %d: %pR\n", bar, res); + pci_warn(pdev, "can't ioremap BAR %d: %pR\n", bar, res); return NULL; } return ioremap_nocache(res->start, resource_size(res)); @@@ -649,7 -648,7 +649,7 @@@ static int pci_raw_set_power_state(stru */ if (state != PCI_D0 && dev->current_state <= PCI_D3cold && dev->current_state > state) { - dev_err(&dev->dev, "invalid power transition (from state %d to %d)\n", + pci_err(dev, "invalid power transition (from state %d to %d)\n", dev->current_state, state); return -EINVAL; } @@@ -697,7 -696,7 +697,7 @@@ pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); if (dev->current_state != state && printk_ratelimit()) - dev_info(&dev->dev, "Refused to change power state, currently in D%d\n", + pci_info(dev, "Refused to change power state, currently in D%d\n", dev->current_state);
/* @@@ -971,7 -970,7 +971,7 @@@ pci_power_t pci_choose_state(struct pci case PM_EVENT_HIBERNATE: return PCI_D3hot; default: - dev_info(&dev->dev, "unrecognized suspend event %d\n", + pci_info(dev, "unrecognized suspend event %d\n", state.event); BUG(); } @@@ -1014,7 -1013,7 +1014,7 @@@ static int pci_save_pcie_state(struct p
save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __func__); + pci_err(dev, "buffer not found in %s\n", __func__); return -ENOMEM; }
@@@ -1062,7 -1061,7 +1062,7 @@@ static int pci_save_pcix_state(struct p
save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __func__); + pci_err(dev, "buffer not found in %s\n", __func__); return -ENOMEM; }
@@@ -1122,7 -1121,7 +1122,7 @@@ static void pci_restore_config_dword(st return;
for (;;) { - dev_dbg(&pdev->dev, "restoring config space at offset %#x (was %#x, writing %#x)\n", + pci_dbg(pdev, "restoring config space at offset %#x (was %#x, writing %#x)\n", offset, val, saved_val); pci_write_config_dword(pdev, offset, saved_val); if (retry-- <= 0) @@@ -1359,7 -1358,7 +1359,7 @@@ static void pci_enable_bridge(struct pc
retval = pci_enable_device(dev); if (retval) - dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", + pci_err(dev, "Error enabling bridge (%d), continuing\n", retval); pci_set_master(dev); } @@@ -1459,6 -1458,7 +1459,7 @@@ struct pci_devres unsigned int pinned:1; unsigned int orig_intx:1; unsigned int restore_intx:1; + unsigned int mwi:1; u32 region_mask; };
@@@ -1477,6 -1477,9 +1478,9 @@@ static void pcim_release(struct device if (this->region_mask & (1 << i)) pci_release_region(dev, i);
+ if (this->mwi) + pci_clear_mwi(dev); + if (this->restore_intx) pci_intx(dev, this->orig_intx);
@@@ -1864,7 -1867,7 +1868,7 @@@ void pci_pme_active(struct pci_dev *dev pme_dev = kmalloc(sizeof(struct pci_pme_device), GFP_KERNEL); if (!pme_dev) { - dev_warn(&dev->dev, "can't enable PME#\n"); + pci_warn(dev, "can't enable PME#\n"); return; } pme_dev->dev = dev; @@@ -1888,7 -1891,7 +1892,7 @@@ } }
- dev_dbg(&dev->dev, "PME# %s\n", enable ? "enabled" : "disabled"); + pci_dbg(dev, "PME# %s\n", enable ? "enabled" : "disabled"); } EXPORT_SYMBOL(pci_pme_active);
@@@ -2425,7 -2428,7 +2429,7 @@@ void pci_pm_init(struct pci_dev *dev pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc);
if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { - dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", + pci_err(dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); return; } @@@ -2445,14 -2448,15 +2449,14 @@@ dev->d2_support = true;
if (dev->d1_support || dev->d2_support) - dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n", + pci_printk(KERN_DEBUG, dev, "supports%s%s\n", dev->d1_support ? " D1" : "", dev->d2_support ? " D2" : ""); }
pmc &= PCI_PM_CAP_PME_MASK; if (pmc) { - dev_printk(KERN_DEBUG, &dev->dev, - "PME# supported from%s%s%s%s%s\n", + pci_printk(KERN_DEBUG, dev, "PME# supported from%s%s%s%s%s\n", (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "", (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "", (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "", @@@ -2544,13 -2548,13 +2548,13 @@@ static int pci_ea_read(struct pci_dev *
res = pci_ea_get_resource(dev, bei, prop); if (!res) { - dev_err(&dev->dev, "Unsupported EA entry BEI: %u\n", bei); + pci_err(dev, "Unsupported EA entry BEI: %u\n", bei); goto out; }
flags = pci_ea_flags(dev, prop); if (!flags) { - dev_err(&dev->dev, "Unsupported EA properties: %#x\n", prop); + pci_err(dev, "Unsupported EA properties: %#x\n", prop); goto out; }
@@@ -2600,12 -2604,13 +2604,12 @@@ }
if (end < start) { - dev_err(&dev->dev, "EA Entry crosses address boundary\n"); + pci_err(dev, "EA Entry crosses address boundary\n"); goto out; }
if (ent_size != ent_offset - offset) { - dev_err(&dev->dev, - "EA Entry Size (%d) does not match length read (%d)\n", + pci_err(dev, "EA Entry Size (%d) does not match length read (%d)\n", ent_size, ent_offset - offset); goto out; } @@@ -2616,16 -2621,16 +2620,16 @@@ res->flags = flags;
if (bei <= PCI_EA_BEI_BAR5) - dev_printk(KERN_DEBUG, &dev->dev, "BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n", + pci_printk(KERN_DEBUG, dev, "BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n", bei, res, prop); else if (bei == PCI_EA_BEI_ROM) - dev_printk(KERN_DEBUG, &dev->dev, "ROM: %pR (from Enhanced Allocation, properties %#02x)\n", + pci_printk(KERN_DEBUG, dev, "ROM: %pR (from Enhanced Allocation, properties %#02x)\n", res, prop); else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5) - dev_printk(KERN_DEBUG, &dev->dev, "VF BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n", + pci_printk(KERN_DEBUG, dev, "VF BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n", bei - PCI_EA_BEI_VF_BAR0, res, prop); else - dev_printk(KERN_DEBUG, &dev->dev, "BEI %d res: %pR (from Enhanced Allocation, properties %#02x)\n", + pci_printk(KERN_DEBUG, dev, "BEI %d res: %pR (from Enhanced Allocation, properties %#02x)\n", bei, res, prop);
out: @@@ -2722,11 -2727,13 +2726,11 @@@ void pci_allocate_cap_save_buffers(stru error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, PCI_EXP_SAVE_REGS * sizeof(u16)); if (error) - dev_err(&dev->dev, - "unable to preallocate PCI Express save buffer\n"); + pci_err(dev, "unable to preallocate PCI Express save buffer\n");
error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_PCIX, sizeof(u16)); if (error) - dev_err(&dev->dev, - "unable to preallocate PCI-X save buffer\n"); + pci_err(dev, "unable to preallocate PCI-X save buffer\n");
pci_allocate_vc_save_buffers(dev); } @@@ -3063,81 -3070,6 +3067,81 @@@ int pci_rebar_set_size(struct pci_dev * }
/** + * pci_enable_atomic_ops_to_root - enable AtomicOp requests to root port + * @dev: the PCI device + * @cap_mask: mask of desired AtomicOp sizes, including one or more of: + * PCI_EXP_DEVCAP2_ATOMIC_COMP32 + * PCI_EXP_DEVCAP2_ATOMIC_COMP64 + * PCI_EXP_DEVCAP2_ATOMIC_COMP128 + * + * Return 0 if all upstream bridges support AtomicOp routing, egress + * blocking is disabled on all upstream ports, and the root port supports + * the requested completion capabilities (32-bit, 64-bit and/or 128-bit + * AtomicOp completion), or negative otherwise. + */ +int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask) +{ + struct pci_bus *bus = dev->bus; + struct pci_dev *bridge; + u32 cap, ctl2; + + if (!pci_is_pcie(dev)) + return -EINVAL; + + /* + * Per PCIe r4.0, sec 6.15, endpoints and root ports may be + * AtomicOp requesters. For now, we only support endpoints as + * requesters and root ports as completers. No endpoints as + * completers, and no peer-to-peer. + */ + + switch (pci_pcie_type(dev)) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + break; + default: + return -EINVAL; + } + + while (bus->parent) { + bridge = bus->self; + + pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap); + + switch (pci_pcie_type(bridge)) { + /* Ensure switch ports support AtomicOp routing */ + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + if (!(cap & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) + return -EINVAL; + break; + + /* Ensure root port supports all the sizes we care about */ + case PCI_EXP_TYPE_ROOT_PORT: + if ((cap & cap_mask) != cap_mask) + return -EINVAL; + break; + } + + /* Ensure upstream ports don't block AtomicOps on egress */ + if (!bridge->has_secondary_link) { + pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, + &ctl2); + if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK) + return -EINVAL; + } + + bus = bus->parent; + } + + pcie_capability_set_word(dev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ATOMIC_REQ); + return 0; +} +EXPORT_SYMBOL(pci_enable_atomic_ops_to_root); + +/** * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge * @dev: the PCI device * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTC, 4=INTD) @@@ -3270,7 -3202,7 +3274,7 @@@ static int __pci_request_region(struct return 0;
err_out: - dev_warn(&pdev->dev, "BAR %d: can't reserve %pR\n", bar, + pci_warn(pdev, "BAR %d: can't reserve %pR\n", bar, &pdev->resource[bar]); return -EBUSY; } @@@ -3693,7 -3625,7 +3697,7 @@@ static void __pci_set_master(struct pci else cmd = old_cmd & ~PCI_COMMAND_MASTER; if (cmd != old_cmd) { - dev_dbg(&dev->dev, "%s bus mastering\n", + pci_dbg(dev, "%s bus mastering\n", enable ? "enabling" : "disabling"); pci_write_config_word(dev, PCI_COMMAND, cmd); } @@@ -3794,7 -3726,7 +3798,7 @@@ int pci_set_cacheline_size(struct pci_d if (cacheline_size == pci_cache_line_size) return 0;
- dev_printk(KERN_DEBUG, &dev->dev, "cache line size of %d is not supported\n", + pci_printk(KERN_DEBUG, dev, "cache line size of %d is not supported\n", pci_cache_line_size << 2);
return -EINVAL; @@@ -3823,7 -3755,7 +3827,7 @@@ int pci_set_mwi(struct pci_dev *dev
pci_read_config_word(dev, PCI_COMMAND, &cmd); if (!(cmd & PCI_COMMAND_INVALIDATE)) { - dev_dbg(&dev->dev, "enabling Mem-Wr-Inval\n"); + pci_dbg(dev, "enabling Mem-Wr-Inval\n"); cmd |= PCI_COMMAND_INVALIDATE; pci_write_config_word(dev, PCI_COMMAND, cmd); } @@@ -3833,6 -3765,27 +3837,27 @@@ EXPORT_SYMBOL(pci_set_mwi);
/** + * pcim_set_mwi - a device-managed pci_set_mwi() + * @dev: the PCI device for which MWI is enabled + * + * Managed pci_set_mwi(). + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ + int pcim_set_mwi(struct pci_dev *dev) + { + struct pci_devres *dr; + + dr = find_pci_dr(dev); + if (!dr) + return -ENOMEM; + + dr->mwi = 1; + return pci_set_mwi(dev); + } + EXPORT_SYMBOL(pcim_set_mwi); + + /** * pci_try_set_mwi - enables memory-write-invalidate PCI transaction * @dev: the PCI device for which MWI is enabled * @@@ -4019,13 -3972,13 +4044,13 @@@ static void pci_flr_wait(struct pci_de pci_read_config_dword(dev, PCI_COMMAND, &id); while (id == ~0) { if (delay > timeout) { - dev_warn(&dev->dev, "not ready %dms after FLR; giving up\n", + pci_warn(dev, "not ready %dms after FLR; giving up\n", 100 + delay - 1); return; }
if (delay > 1000) - dev_info(&dev->dev, "not ready %dms after FLR; waiting\n", + pci_info(dev, "not ready %dms after FLR; waiting\n", 100 + delay - 1);
msleep(delay); @@@ -4034,7 -3987,7 +4059,7 @@@ }
if (delay > 1000) - dev_info(&dev->dev, "ready %dms after FLR\n", 100 + delay - 1); + pci_info(dev, "ready %dms after FLR\n", 100 + delay - 1); }
/** @@@ -4066,7 -4019,7 +4091,7 @@@ static bool pcie_has_flr(struct pci_de void pcie_flr(struct pci_dev *dev) { if (!pci_wait_for_pending_transaction(dev)) - dev_err(&dev->dev, "timed out waiting for pending transaction; performing function level reset anyway\n"); + pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); pci_flr_wait(dev); @@@ -4099,7 -4052,7 +4124,7 @@@ static int pci_af_flr(struct pci_dev *d */ if (!pci_wait_for_pending(dev, pos + PCI_AF_CTRL, PCI_AF_STATUS_TP << 8)) - dev_err(&dev->dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n"); + pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); pci_flr_wait(dev); @@@ -5222,12 -5175,12 +5247,12 @@@ void pci_add_dma_alias(struct pci_dev * dev->dma_alias_mask = kcalloc(BITS_TO_LONGS(U8_MAX), sizeof(long), GFP_KERNEL); if (!dev->dma_alias_mask) { - dev_warn(&dev->dev, "Unable to allocate DMA alias mask\n"); + pci_warn(dev, "Unable to allocate DMA alias mask\n"); return; }
set_bit(devfn, dev->dma_alias_mask); - dev_info(&dev->dev, "Enabling fixed DMA alias to %02x.%d\n", + pci_info(dev, "Enabling fixed DMA alias to %02x.%d\n", PCI_SLOT(devfn), PCI_FUNC(devfn)); }
@@@ -5376,7 -5329,7 +5401,7 @@@ static void pci_request_resource_alignm return;
if (r->flags & IORESOURCE_PCI_FIXED) { - dev_info(&dev->dev, "BAR%d %pR: ignoring requested alignment %#llx\n", + pci_info(dev, "BAR%d %pR: ignoring requested alignment %#llx\n", bar, r, (unsigned long long)align); return; } @@@ -5413,7 -5366,7 +5438,7 @@@ * devices and we use the second. */
- dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", + pci_info(dev, "BAR%d %pR: requesting alignment to %#llx\n", bar, r, (unsigned long long)align);
if (resize) { @@@ -5459,11 -5412,13 +5484,11 @@@ void pci_reassigndev_resource_alignment
if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { - dev_warn(&dev->dev, - "Can't reassign resources to host bridge.\n"); + pci_warn(dev, "Can't reassign resources to host bridge\n"); return; }
- dev_info(&dev->dev, - "Disabling memory decoding and releasing memory resources.\n"); + pci_info(dev, "Disabling memory decoding and releasing memory resources\n"); pci_read_config_word(dev, PCI_COMMAND, &command); command &= ~PCI_COMMAND_MEMORY; pci_write_config_word(dev, PCI_COMMAND, command); diff --combined drivers/ssb/Kconfig index 65af12c3bdb2,ee18428a051f..b3f5cae98ea6 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@@ -3,10 -3,7 +3,7 @@@ config SSB_POSSIBL depends on HAS_IOMEM && HAS_DMA default y
- menu "Sonics Silicon Backplane" - depends on SSB_POSSIBLE - - config SSB + menuconfig SSB tristate "Sonics Silicon Backplane support" depends on SSB_POSSIBLE help @@@ -21,6 -18,8 +18,8 @@@
If unsure, say N.
+ if SSB + # Common SPROM support routines config SSB_SPROM bool @@@ -32,7 -31,7 +31,7 @@@ config SSB_BLOCKI
config SSB_PCIHOST_POSSIBLE bool - depends on SSB && (PCI = y || PCI = SSB) && PCI_DRIVERS_LEGACY + depends on SSB && (PCI = y || PCI = SSB) && (PCI_DRIVERS_LEGACY || !MIPS) default y
config SSB_PCIHOST @@@ -185,4 -184,4 +184,4 @@@ config SSB_DRIVER_GPI
If unsure, say N
- endmenu + endif # SSB diff --combined drivers/tty/serdev/core.c index 28133dbd2808,3c573a8caa80..5a135ed46159 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@@ -132,33 -132,6 +132,33 @@@ void serdev_device_close(struct serdev_ } EXPORT_SYMBOL_GPL(serdev_device_close);
+static void devm_serdev_device_release(struct device *dev, void *dr) +{ + serdev_device_close(*(struct serdev_device **)dr); +} + +int devm_serdev_device_open(struct device *dev, struct serdev_device *serdev) +{ + struct serdev_device **dr; + int ret; + + dr = devres_alloc(devm_serdev_device_release, sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + ret = serdev_device_open(serdev); + if (ret) { + devres_free(dr); + return ret; + } + + *dr = serdev; + devres_add(dev, dr); + + return 0; +} +EXPORT_SYMBOL_GPL(devm_serdev_device_open); + void serdev_device_write_wakeup(struct serdev_device *serdev) { complete(&serdev->write_comp); @@@ -252,6 -225,18 +252,18 @@@ void serdev_device_set_flow_control(str } EXPORT_SYMBOL_GPL(serdev_device_set_flow_control);
+ int serdev_device_set_parity(struct serdev_device *serdev, + enum serdev_parity parity) + { + struct serdev_controller *ctrl = serdev->ctrl; + + if (!ctrl || !ctrl->ops->set_parity) + return -ENOTSUPP; + + return ctrl->ops->set_parity(ctrl, parity); + } + EXPORT_SYMBOL_GPL(serdev_device_set_parity); + void serdev_device_wait_until_sent(struct serdev_device *serdev, long timeout) { struct serdev_controller *ctrl = serdev->ctrl; @@@ -295,8 -280,8 +307,8 @@@ static int serdev_drv_probe(struct devi static int serdev_drv_remove(struct device *dev) { const struct serdev_device_driver *sdrv = to_serdev_device_driver(dev->driver); - - sdrv->remove(to_serdev_device(dev)); + if (sdrv->remove) + sdrv->remove(to_serdev_device(dev)); return 0; }
diff --combined drivers/vhost/net.c index 9524ee16878a,80323948c0dd..9c3f8160ef24 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@@ -89,7 -89,7 +89,7 @@@ struct vhost_net_ubuf_ref
#define VHOST_RX_BATCH 64 struct vhost_net_buf { - struct sk_buff **queue; + void **queue; int tail; int head; }; @@@ -108,7 -108,7 +108,7 @@@ struct vhost_net_virtqueue /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; - struct skb_array *rx_array; + struct ptr_ring *rx_ring; struct vhost_net_buf rxq; };
@@@ -158,7 -158,7 +158,7 @@@ static int vhost_net_buf_produce(struc struct vhost_net_buf *rxq = &nvq->rxq;
rxq->head = 0; - rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, + rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, VHOST_RX_BATCH); return rxq->tail; } @@@ -167,13 -167,25 +167,25 @@@ static void vhost_net_buf_unproduce(str { struct vhost_net_buf *rxq = &nvq->rxq;
- if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) { - skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head, - vhost_net_buf_get_size(rxq)); + if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { + ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, + vhost_net_buf_get_size(rxq), + __skb_array_destroy_skb); rxq->head = rxq->tail = 0; } }
+ static int vhost_net_buf_peek_len(void *ptr) + { + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + return xdp->data_end - xdp->data; + } + + return __skb_array_len_with_tag(ptr); + } + static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; @@@ -185,7 -197,7 +197,7 @@@ return 0;
out: - return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); + return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq)); }
static void vhost_net_buf_init(struct vhost_net_buf *rxq) @@@ -583,7 -595,7 +595,7 @@@ static int peek_head_len(struct vhost_n int len = 0; unsigned long flags;
- if (rvq->rx_array) + if (rvq->rx_ring) return vhost_net_buf_peek(rvq);
spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); @@@ -744,7 -756,7 +756,7 @@@ static void handle_rx(struct vhost_net }; size_t total_len = 0; int err, mergeable; - s16 headcount; + s16 headcount, nheads = 0; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; struct socket *sock; @@@ -772,7 -784,7 +784,7 @@@ while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; - headcount = get_rx_bufs(vq, vq->heads, vhost_len, + headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ @@@ -790,7 -802,7 +802,7 @@@ * they refilled. */ goto out; } - if (nvq->rx_array) + if (nvq->rx_ring) msg.msg_control = vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { @@@ -844,8 -856,12 +856,12 @@@ vhost_discard_vq_desc(vq, headcount); goto out; } - vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, - headcount); + nheads += headcount; + if (nheads > VHOST_RX_BATCH) { + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + nheads); + nheads = 0; + } if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; @@@ -856,6 -872,9 +872,9 @@@ } vhost_net_enable_vq(net, vq); out: + if (nheads) + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + nheads); mutex_unlock(&vq->mutex); }
@@@ -896,7 -915,7 +915,7 @@@ static int vhost_net_open(struct inode struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; - struct sk_buff **queue; + void **queue; int i;
n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); @@@ -908,7 -927,7 +927,7 @@@ return -ENOMEM; }
- queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *), + queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *), GFP_KERNEL); if (!queue) { kfree(vqs); @@@ -1046,23 -1065,23 +1065,23 @@@ err return ERR_PTR(r); }
- static struct skb_array *get_tap_skb_array(int fd) + static struct ptr_ring *get_tap_ptr_ring(int fd) { - struct skb_array *array; + struct ptr_ring *ring; struct file *file = fget(fd);
if (!file) return NULL; - array = tun_get_skb_array(file); - if (!IS_ERR(array)) + ring = tun_get_tx_ring(file); + if (!IS_ERR(ring)) goto out; - array = tap_get_skb_array(file); - if (!IS_ERR(array)) + ring = tap_get_ptr_ring(file); + if (!IS_ERR(ring)) goto out; - array = NULL; + ring = NULL; out: fput(file); - return array; + return ring; }
static struct socket *get_tap_socket(int fd) @@@ -1143,7 -1162,7 +1162,7 @@@ static long vhost_net_set_backend(struc vq->private_data = sock; vhost_net_buf_unproduce(nvq); if (index == VHOST_NET_VQ_RX) - nvq->rx_array = get_tap_skb_array(fd); + nvq->rx_ring = get_tap_ptr_ring(fd); r = vhost_vq_init_access(vq); if (r) goto err_used; @@@ -1208,6 -1227,7 +1227,7 @@@ static long vhost_net_reset_owner(struc } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: @@@ -1353,7 -1373,7 +1373,7 @@@ static ssize_t vhost_net_chr_write_iter return vhost_chr_write_iter(dev, from); }
-static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait) +static __poll_t vhost_net_chr_poll(struct file *file, poll_table *wait) { struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; diff --combined fs/btrfs/disk-io.c index c55bacff4259,83e2349e1362..3efb2bb1f178 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@@ -30,6 -30,7 +30,7 @@@ #include <linux/ratelimit.h> #include <linux/uuid.h> #include <linux/semaphore.h> + #include <linux/error-injection.h> #include <asm/unaligned.h> #include "ctree.h" #include "disk-io.h" @@@ -61,8 -62,7 +62,8 @@@ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ BTRFS_SUPER_FLAG_SEEDING |\ - BTRFS_SUPER_FLAG_METADUMP) + BTRFS_SUPER_FLAG_METADUMP |\ + BTRFS_SUPER_FLAG_METADUMP_V2)
static const struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@@ -221,7 -221,7 +222,7 @@@ void btrfs_set_buffer_lockdep_class(u6 * extents on the btree inode are pretty simple, there's one extent * that covers the entire device */ -static struct extent_map *btree_get_extent(struct btrfs_inode *inode, +struct extent_map *btree_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) { @@@ -286,7 -286,7 +287,7 @@@ static int csum_tree_block(struct btrfs int verify) { u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); - char *result = NULL; + char result[BTRFS_CSUM_SIZE]; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@@ -295,6 -295,7 +296,6 @@@ unsigned long map_len; int err; u32 crc = ~(u32)0; - unsigned long inline_result;
len = buf->len - offset; while (len > 0) { @@@ -308,7 -309,13 +309,7 @@@ len -= cur_len; offset += cur_len; } - if (csum_size > sizeof(inline_result)) { - result = kzalloc(csum_size, GFP_NOFS); - if (!result) - return -ENOMEM; - } else { - result = (char *)&inline_result; - } + memset(result, 0, BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, result);
@@@ -323,12 -330,15 +324,12 @@@ "%s checksum verify failed on %llu wanted %X found %X level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); - if (result != (char *)&inline_result) - kfree(result); return -EUCLEAN; } } else { write_extent_buffer(buf, result, 0, csum_size); } - if (result != (char *)&inline_result) - kfree(result); + return 0; }
@@@ -382,7 -392,7 +383,7 @@@ static int verify_parent_transid(struc clear_extent_buffer_uptodate(eb); out: unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, - &cached_state, GFP_NOFS); + &cached_state); if (need_lock) btrfs_tree_read_unlock_blocking(eb); return ret; @@@ -446,7 -456,7 +447,7 @@@ static int btree_read_extent_buffer_pag io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, - btree_get_extent, mirror_num); + mirror_num); if (!ret) { if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) @@@ -601,12 -611,12 +602,12 @@@ static int btree_readpage_end_io_hook(s * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; }
- if (found_level > 0 && btrfs_check_node(root, eb)) + if (found_level > 0 && btrfs_check_node(fs_info, eb)) ret = -EIO;
if (!ret) @@@ -856,8 -866,6 +857,8 @@@ static blk_status_t btree_submit_bio_ho int async = check_async_write(BTRFS_I(inode)); blk_status_t ret;
+ bio_associate_blkcg(bio, blkcg_root_css); + if (bio_op(bio) != REQ_OP_WRITE) { /* * called for a read, do the setup so that checksum validation @@@ -1005,7 -1013,7 +1006,7 @@@ void readahead_tree_block(struct btrfs_ if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, WAIT_NONE, btree_get_extent, 0); + buf, WAIT_NONE, 0); free_extent_buffer(buf); }
@@@ -1024,7 -1032,7 +1025,7 @@@ int reada_tree_block_flagged(struct btr set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, - btree_get_extent, mirror_num); + mirror_num); if (ret) { free_extent_buffer(buf); return ret; @@@ -1161,7 -1169,6 +1162,7 @@@ static void __setup_root(struct btrfs_r spin_lock_init(&root->accounting_lock); spin_lock_init(&root->log_extents_lock[0]); spin_lock_init(&root->log_extents_lock[1]); + spin_lock_init(&root->qgroup_meta_rsv_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); mutex_init(&root->ordered_extent_mutex); @@@ -1178,6 -1185,7 +1179,6 @@@ atomic_set(&root->orphan_inodes, 0); refcount_set(&root->refs, 1); atomic_set(&root->will_be_snapshotted, 0); - atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; @@@ -1236,7 -1244,7 +1237,7 @@@ struct btrfs_root *btrfs_create_tree(st struct btrfs_root *root; struct btrfs_key key; int ret = 0; - uuid_le uuid; + uuid_le uuid = NULL_UUID_LE;
root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) @@@ -1277,8 -1285,7 +1278,8 @@@ btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); - uuid_le_gen(&uuid); + if (is_fstree(objectid)) + uuid_le_gen(&uuid); memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE); root->root_item.drop_level = 0;
@@@ -1809,10 -1816,12 +1810,10 @@@ sleep if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))) btrfs_cleanup_transaction(fs_info); - set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && (!btrfs_transaction_blocked(fs_info) || cannot_commit)) - schedule_timeout(delay); - __set_current_state(TASK_RUNNING); + schedule_timeout_interruptible(delay); } while (!kthread_should_stop()); return 0; } @@@ -2867,7 -2876,7 +2868,7 @@@ retry_root_backup goto fail_sysfs; }
- if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) { + if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writeable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@@ -3115,6 -3124,7 +3116,7 @@@ recovery_tree_root goto fail_block_groups; goto retry_root_backup; } + ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) { @@@ -3342,8 -3352,6 +3344,8 @@@ static void write_dev_flush(struct btrf return;
bio_reset(bio); + bio_associate_blkcg(bio, blkcg_root_css); + bio->bi_end_io = btrfs_end_empty_barrier; bio_set_dev(bio, device->bdev); bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; @@@ -3351,7 -3359,7 +3353,7 @@@ bio->bi_private = &device->flush_wait;
btrfsic_submit_bio(bio); - device->flush_bio_sent = 1; + set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); }
/* @@@ -3361,10 -3369,10 +3363,10 @@@ static blk_status_t wait_dev_flush(stru { struct bio *bio = device->flush_bio;
- if (!device->flush_bio_sent) + if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) return BLK_STS_OK;
- device->flush_bio_sent = 0; + clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); wait_for_completion_io(&device->flush_wait);
return bio->bi_status; @@@ -3372,7 -3380,7 +3374,7 @@@
static int check_barrier_error(struct btrfs_fs_info *fs_info) { - if (!btrfs_check_rw_degradable(fs_info)) + if (!btrfs_check_rw_degradable(fs_info, NULL)) return -EIO; return 0; } @@@ -3388,16 -3396,14 +3390,16 @@@ static int barrier_all_devices(struct b int errors_wait = 0; blk_status_t ret;
+ lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ head = &info->fs_devices->devices; - list_for_each_entry_rcu(dev, head, dev_list) { - if (dev->missing) + list_for_each_entry(dev, head, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) continue; - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
write_dev_flush(dev); @@@ -3405,15 -3411,14 +3407,15 @@@ }
/* wait for all the barriers */ - list_for_each_entry_rcu(dev, head, dev_list) { - if (dev->missing) + list_for_each_entry(dev, head, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) { errors_wait++; continue; } - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
ret = wait_dev_flush(dev); @@@ -3505,13 -3510,12 +3507,13 @@@ int write_all_supers(struct btrfs_fs_in } }
- list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; } - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
btrfs_set_stack_device_generation(dev_item, 0); @@@ -3547,11 -3551,10 +3549,11 @@@ }
total_errors = 0; - list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
ret = wait_dev_supers(dev, max_mirrors); @@@ -3851,7 -3854,7 +3853,7 @@@ void btrfs_mark_buffer_dirty(struct ext * So here we should only check item pointers, not item data. */ if (btrfs_header_level(buf) == 0 && - btrfs_check_leaf_relaxed(root, buf)) { + btrfs_check_leaf_relaxed(fs_info, buf)) { btrfs_print_leaf(buf); ASSERT(0); } @@@ -3909,11 -3912,9 +3911,11 @@@ static int btrfs_check_super_valid(stru btrfs_err(fs_info, "no valid FS found"); ret = -EINVAL; } - if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) - btrfs_warn(fs_info, "unrecognized super flag: %llu", + if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) { + btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); + ret = -EINVAL; + } if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); diff --combined fs/btrfs/free-space-cache.c index 9e8c1f046e02,586bb06472bb..d0dde9e6afd7 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@@ -22,6 -22,7 +22,7 @@@ #include <linux/slab.h> #include <linux/math64.h> #include <linux/ratelimit.h> + #include <linux/error-injection.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@@ -332,6 -333,7 +333,7 @@@ static int io_ctl_init(struct btrfs_io_
return 0; } + ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
static void io_ctl_free(struct btrfs_io_ctl *io_ctl) { @@@ -993,7 -995,8 +995,7 @@@ update_cache_item(struct btrfs_trans_ha ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); goto fail; } leaf = path->nodes[0]; @@@ -1007,7 -1010,7 +1009,7 @@@ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, - NULL, GFP_NOFS); + NULL); btrfs_release_path(path); goto fail; } @@@ -1104,7 -1107,8 +1106,7 @@@ static int flush_dirty_cache(struct ino ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); if (ret) clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
return ret; } @@@ -1125,7 -1129,8 +1127,7 @@@ cleanup_write_cache_enospc(struct inod { io_ctl_drop_pages(io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, cached_state, - GFP_NOFS); + i_size_read(inode) - 1, cached_state); }
static int __btrfs_wait_cache_io(struct btrfs_root *root, @@@ -1319,7 -1324,7 +1321,7 @@@ static int __btrfs_write_out_cache(stru io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, &cached_state, GFP_NOFS); + i_size_read(inode) - 1, &cached_state);
/* * at this point the pages are under IO and we're happy, @@@ -3545,7 -3550,7 +3547,7 @@@ int btrfs_write_out_ino_cache(struct bt if (ret) { if (release_metadata) btrfs_delalloc_release_metadata(BTRFS_I(inode), - inode->i_size); + inode->i_size, true); #ifdef DEBUG btrfs_err(fs_info, "failed to write free ino cache for root %llu", diff --combined include/asm-generic/vmlinux.lds.h index a564b83bf013,ebe544e048cd..1ab0e520d6fc --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@@ -136,6 -136,15 +136,15 @@@ #define KPROBE_BLACKLIST() #endif
+ #ifdef CONFIG_FUNCTION_ERROR_INJECTION + #define ERROR_INJECT_WHITELIST() STRUCT_ALIGN(); \ + VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\ + KEEP(*(_error_injection_whitelist)) \ + VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .; + #else + #define ERROR_INJECT_WHITELIST() + #endif + #ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ @@@ -268,11 -277,7 +277,11 @@@ #define INIT_TASK_DATA(align) \ . = ALIGN(align); \ VMLINUX_SYMBOL(__start_init_task) = .; \ + VMLINUX_SYMBOL(init_thread_union) = .; \ + VMLINUX_SYMBOL(init_stack) = .; \ *(.data..init_task) \ + *(.data..init_thread_info) \ + . = VMLINUX_SYMBOL(__start_init_task) + THREAD_SIZE; \ VMLINUX_SYMBOL(__end_init_task) = .;
/* @@@ -568,6 -573,7 +577,7 @@@ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ + ERROR_INJECT_WHITELIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --combined include/linux/acpi.h index b8f4c3c776e5,f05b9b6cd43f..2db1307907cf --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@@ -56,6 -56,8 +56,8 @@@ static inline acpi_handle acpi_device_h #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) + #define ACPI_HANDLE_FWNODE(fwnode) \ + acpi_device_handle(to_acpi_device_node(fwnode))
static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { @@@ -451,7 -453,6 +453,7 @@@ void __init acpi_no_s4_hw_signature(voi void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); void __init acpi_nvs_nosave_s3(void); +void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */
struct acpi_osc_context { @@@ -627,6 -628,7 +629,7 @@@ int acpi_arch_timer_mem_init(struct arc #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) + #define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0),
struct fwnode_handle; @@@ -641,12 -643,6 +644,12 @@@ static inline bool acpi_dev_present(con return false; }
+static inline const char * +acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) +{ + return NULL; +} + static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; diff --combined include/linux/module.h index e6249795f9e2,9642d3116718..23bfbe52dc05 --- a/include/linux/module.h +++ b/include/linux/module.h @@@ -19,6 -19,7 +19,7 @@@ #include <linux/jump_label.h> #include <linux/export.h> #include <linux/rbtree_latch.h> + #include <linux/error-injection.h>
#include <linux/percpu.h> #include <asm/module.h> @@@ -475,6 -476,11 +476,11 @@@ struct module ctor_fn_t *ctors; unsigned int num_ctors; #endif + + #ifdef CONFIG_FUNCTION_ERROR_INJECTION + struct error_injection_entry *ei_funcs; + unsigned int num_ei_funcs; + #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} @@@ -606,9 -612,6 +612,9 @@@ int ref_module(struct module *a, struc __mod ? __mod->name : "kernel"; \ })
+/* Dereference module function descriptor */ +void *dereference_module_function_descriptor(struct module *mod, void *ptr); + /* For kallsyms to ask for address resolution. namebuf should be at * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ @@@ -763,13 -766,6 +769,13 @@@ static inline bool is_module_sig_enforc return false; }
+/* Dereference module function descriptor */ +static inline +void *dereference_module_function_descriptor(struct module *mod, void *ptr) +{ + return ptr; +} + #endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS diff --combined include/linux/net.h index c2d468cb9821,68acc54976bf..91216b16feb7 --- a/include/linux/net.h +++ b/include/linux/net.h @@@ -147,7 -147,7 +147,7 @@@ struct proto_ops int (*getname) (struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer); - unsigned int (*poll) (struct file *file, struct socket *sock, + __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); @@@ -306,7 -306,6 +306,6 @@@ int kernel_sendpage(struct socket *sock size_t size, int flags); int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); - int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
/* Routine returns the IP overhead imposed by a (caller-protected) socket. */ diff --combined include/linux/pci.h index e4a8e9765c22,0314e0716c30..beaa43383e2e --- a/include/linux/pci.h +++ b/include/linux/pci.h @@@ -48,17 -48,17 +48,17 @@@ * In the interest of not exposing interfaces to user-space unnecessarily, * the following kernel-only defines are being added here. */ -#define PCI_DEVID(bus, devfn) ((((u16)(bus)) << 8) | (devfn)) +#define PCI_DEVID(bus, devfn) ((((u16)(bus)) << 8) | (devfn)) /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */ #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
/* pci_slot represents a physical slot */ struct pci_slot { - struct pci_bus *bus; /* The bus this slot is on */ - struct list_head list; /* node in list of slots on this bus */ - struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */ - unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ - struct kobject kobj; + struct pci_bus *bus; /* Bus this slot is on */ + struct list_head list; /* Node in list of slots */ + struct hotplug_slot *hotplug; /* Hotplug info (move here) */ + unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ + struct kobject kobj; };
static inline const char *pci_slot_name(const struct pci_slot *slot) @@@ -72,7 -72,9 +72,7 @@@ enum pci_mmap_state pci_mmap_mem };
-/* - * For PCI devices, the region numbers are assigned this way: - */ +/* For PCI devices, the region numbers are assigned this way: */ enum { /* #0-5: standard PCI resources */ PCI_STD_RESOURCES, @@@ -81,23 -83,23 +81,23 @@@ /* #6: expansion ROM resource */ PCI_ROM_RESOURCE,
- /* device specific resources */ + /* Device-specific resources */ #ifdef CONFIG_PCI_IOV PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, #endif
- /* resources assigned to buses behind the bridge */ + /* Resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4
PCI_BRIDGE_RESOURCES, PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES + PCI_BRIDGE_RESOURCE_NUM - 1,
- /* total resources associated with a PCI device */ + /* Total resources associated with a PCI device */ PCI_NUM_RESOURCES,
- /* preserve this for compatibility */ + /* Preserve this for compatibility */ DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES, };
@@@ -150,10 -152,9 +150,10 @@@ static inline const char *pci_power_nam #define PCI_PM_D3COLD_WAIT 100 #define PCI_PM_BUS_WAIT 50
-/** The pci_channel state describes connectivity between the CPU and - * the pci device. If some PCI bus between here and the pci device - * has crashed or locked up, this info is reflected here. +/** + * The pci_channel state describes connectivity between the CPU and + * the PCI device. If some PCI bus between here and the PCI device + * has crashed or locked up, this info is reflected here. */ typedef unsigned int __bitwise pci_channel_state_t;
@@@ -183,7 -184,9 +183,7 @@@ enum pcie_reset_state
typedef unsigned short __bitwise pci_dev_flags_t; enum pci_dev_flags { - /* INTX_DISABLE in PCI_COMMAND register disables MSI - * generation too. - */ + /* INTX_DISABLE in PCI_COMMAND register disables MSI too */ PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0), /* Device configuration is irrevocably lost if disabled into D3 */ PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1), @@@ -199,7 -202,7 +199,7 @@@ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), /* Get VPD from function 0 VPD */ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), - /* a non-root bridge where translation occurs, stop alias search here */ + /* A non-root bridge where translation occurs, stop alias search here */ PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), /* Do not use FLR even if device advertises PCI_AF_CAP */ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), @@@ -219,17 -222,17 +219,17 @@@ enum pci_bus_flags PCI_BUS_FLAGS_NO_AERSID = (__force pci_bus_flags_t) 4, };
-/* These values come from the PCI Express Spec */ +/* Values from Link Status register, PCIe r3.1, sec 7.8.8 */ enum pcie_link_width { PCIE_LNK_WIDTH_RESRV = 0x00, PCIE_LNK_X1 = 0x01, PCIE_LNK_X2 = 0x02, PCIE_LNK_X4 = 0x04, PCIE_LNK_X8 = 0x08, - PCIE_LNK_X12 = 0x0C, + PCIE_LNK_X12 = 0x0c, PCIE_LNK_X16 = 0x10, PCIE_LNK_X32 = 0x20, - PCIE_LNK_WIDTH_UNKNOWN = 0xFF, + PCIE_LNK_WIDTH_UNKNOWN = 0xff, };
/* Based on the PCI Hotplug Spec, but some values are made up by us */ @@@ -260,15 -263,15 +260,15 @@@ enum pci_bus_speed };
struct pci_cap_saved_data { - u16 cap_nr; - bool cap_extended; - unsigned int size; - u32 data[0]; + u16 cap_nr; + bool cap_extended; + unsigned int size; + u32 data[0]; };
struct pci_cap_saved_state { - struct hlist_node next; - struct pci_cap_saved_data cap; + struct hlist_node next; + struct pci_cap_saved_data cap; };
struct irq_affinity; @@@ -277,17 -280,19 +277,17 @@@ struct pci_vpd struct pci_sriov; struct pci_ats;
-/* - * The pci_dev structure is used to describe PCI devices. - */ +/* The pci_dev structure describes PCI devices */ struct pci_dev { - struct list_head bus_list; /* node in per-bus list */ - struct pci_bus *bus; /* bus this device is on */ - struct pci_bus *subordinate; /* bus this device bridges to */ + struct list_head bus_list; /* Node in per-bus list */ + struct pci_bus *bus; /* Bus this device is on */ + struct pci_bus *subordinate; /* Bus this device bridges to */
- void *sysdata; /* hook for sys-specific extension */ - struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ + void *sysdata; /* Hook for sys-specific extension */ + struct proc_dir_entry *procent; /* Device entry in /proc/bus/pci */ struct pci_slot *slot; /* Physical slot this device is in */
- unsigned int devfn; /* encoded device & function index */ + unsigned int devfn; /* Encoded device & function index */ unsigned short vendor; unsigned short device; unsigned short subsystem_vendor; @@@ -302,12 -307,12 +302,12 @@@ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */ - u8 rom_base_reg; /* which config register controls the ROM */ - u8 pin; /* which interrupt pin this device uses */ - u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ - unsigned long *dma_alias_mask;/* mask of enabled devfn aliases */ + u8 rom_base_reg; /* Config register controlling ROM */ + u8 pin; /* Interrupt pin this device uses */ + u16 pcie_flags_reg; /* Cached PCIe Capabilities Register */ + unsigned long *dma_alias_mask;/* Mask of enabled devfn aliases */
- struct pci_driver *driver; /* which driver has allocated this device */ + struct pci_driver *driver; /* Driver bound to this device */ u64 dma_mask; /* Mask of the bits of bus address this device implements. Normally this is 0xffffffff. You only need to change @@@ -316,9 -321,9 +316,9 @@@
struct device_dma_parameters dma_parms;
- pci_power_t current_state; /* Current operating state. In ACPI-speak, - this is D0-D3, D0 being fully functional, - and D3 being off. */ + pci_power_t current_state; /* Current operating state. In ACPI, + this is D0-D3, D0 being fully + functional, and D3 being off. */ u8 pm_cap; /* PM capability offset */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ @@@ -329,10 -334,10 +329,10 @@@ unsigned int no_d3cold:1; /* D3cold is forbidden */ unsigned int bridge_d3:1; /* Allow D3 for bridge */ unsigned int d3cold_allowed:1; /* D3cold is allowed by user */ - unsigned int mmio_always_on:1; /* disallow turning off io/mem - decoding during bar sizing */ + unsigned int mmio_always_on:1; /* Disallow turning off io/mem + decoding during BAR sizing */ unsigned int wakeup_prepared:1; - unsigned int runtime_d3cold:1; /* whether go through runtime + unsigned int runtime_d3cold:1; /* Whether go through runtime D3cold, not set for devices powered on/off by the corresponding bridge */ @@@ -345,14 -350,12 +345,14 @@@
#ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ + unsigned int ltr_path:1; /* Latency Tolerance Reporting + supported from root to here */ #endif
- pci_channel_state_t error_state; /* current connectivity state */ - struct device dev; /* Generic device interface */ + pci_channel_state_t error_state; /* Current connectivity state */ + struct device dev; /* Generic device interface */
- int cfg_size; /* Size of configuration space */ + int cfg_size; /* Size of config space */
/* * Instead of touching interrupt line and base address registers @@@ -361,47 -364,47 +361,47 @@@ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
- bool match_driver; /* Skip attaching driver */ - /* These fields are used by common fixups */ - unsigned int transparent:1; /* Subtractive decode PCI bridge */ - unsigned int multifunction:1;/* Part of multi-function device */ - /* keep track of device state */ + bool match_driver; /* Skip attaching driver */ + + unsigned int transparent:1; /* Subtractive decode bridge */ + unsigned int multifunction:1; /* Multi-function device */ + unsigned int is_added:1; - unsigned int is_busmaster:1; /* device is busmaster */ - unsigned int no_msi:1; /* device may not use msi */ - unsigned int no_64bit_msi:1; /* device may only use 32-bit MSIs */ - unsigned int block_cfg_access:1; /* config space access is blocked */ - unsigned int broken_parity_status:1; /* Device generates false positive parity */ - unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ + unsigned int is_busmaster:1; /* Is busmaster */ + unsigned int no_msi:1; /* May not use MSI */ + unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ + unsigned int block_cfg_access:1; /* Config space access blocked */ + unsigned int broken_parity_status:1; /* Generates false positive parity */ + unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; - unsigned int ari_enabled:1; /* ARI forwarding */ - unsigned int ats_enabled:1; /* Address Translation Service */ + unsigned int ari_enabled:1; /* ARI forwarding */ + unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ unsigned int is_managed:1; - unsigned int needs_freset:1; /* Dev requires fundamental reset */ + unsigned int needs_freset:1; /* Requires fundamental reset */ unsigned int state_saved:1; unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int reset_fn:1; - unsigned int is_hotplug_bridge:1; - unsigned int is_thunderbolt:1; /* Thunderbolt controller */ - unsigned int __aer_firmware_first_valid:1; + unsigned int is_hotplug_bridge:1; + unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int __aer_firmware_first_valid:1; unsigned int __aer_firmware_first:1; - unsigned int broken_intx_masking:1; /* INTx masking can't be used */ - unsigned int io_window_1k:1; /* Intel P2P bridge 1K I/O windows */ + unsigned int broken_intx_masking:1; /* INTx masking can't be used */ + unsigned int io_window_1k:1; /* Intel bridge 1K I/O windows */ unsigned int irq_managed:1; unsigned int has_secondary_link:1; - unsigned int non_compliant_bars:1; /* broken BARs; ignore them */ - unsigned int is_probed:1; /* device probing in progress */ + unsigned int non_compliant_bars:1; /* Broken BARs; ignore them */ + unsigned int is_probed:1; /* Device probing in progress */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */
- u32 saved_config_space[16]; /* config space saved at suspend time */ + u32 saved_config_space[16]; /* Config space saved at suspend time */ struct hlist_head saved_cap_space; - struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ - int rom_attr_enabled; /* has display of the rom attribute been enabled? */ + struct bin_attribute *rom_attr; /* Attribute descriptor for sysfs ROM entry */ + int rom_attr_enabled; /* Display of ROM attribute enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
@@@ -416,12 -419,12 +416,12 @@@ struct pci_vpd *vpd; #ifdef CONFIG_PCI_ATS union { - struct pci_sriov *sriov; /* SR-IOV capability related */ - struct pci_dev *physfn; /* the PF this VF is associated with */ + struct pci_sriov *sriov; /* PF: SR-IOV info */ + struct pci_dev *physfn; /* VF: related PF */ }; u16 ats_cap; /* ATS Capability offset */ u8 ats_stu; /* ATS Smallest Translation Unit */ - atomic_t ats_ref_cnt; /* number of VFs with ATS enabled */ + atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ #endif #ifdef CONFIG_PCI_PRI u32 pri_reqs_alloc; /* Number of PRI requests allocated */ @@@ -429,11 -432,11 +429,11 @@@ #ifdef CONFIG_PCI_PASID u16 pasid_features; #endif - phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */ - size_t romlen; /* Length of ROM if it's not from the BAR */ - char *driver_override; /* Driver name to force a match */ + phys_addr_t rom; /* Physical address if not from BAR */ + size_t romlen; /* Length if not from BAR */ + char *driver_override; /* Driver name to force a match */
- unsigned long priv_flags; /* Private flags for the pci driver */ + unsigned long priv_flags; /* Private flags for the PCI driver */ };
static inline struct pci_dev *pci_physfn(struct pci_dev *dev) @@@ -456,26 -459,26 +456,26 @@@ static inline int pci_channel_offline(s }
struct pci_host_bridge { - struct device dev; - struct pci_bus *bus; /* root bus */ - struct pci_ops *ops; - void *sysdata; - int busnr; + struct device dev; + struct pci_bus *bus; /* Root bus */ + struct pci_ops *ops; + void *sysdata; + int busnr; struct list_head windows; /* resource_entry */ - u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* platform IRQ swizzler */ + u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */ int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); - void *release_data; + void *release_data; struct msi_controller *msi; - unsigned int ignore_reset_delay:1; /* for entire hierarchy */ - unsigned int no_ext_tags:1; /* no Extended Tags */ + unsigned int ignore_reset_delay:1; /* For entire hierarchy */ + unsigned int no_ext_tags:1; /* No Extended Tags */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, const struct resource *res, resource_size_t start, resource_size_t size, resource_size_t align); - unsigned long private[0] ____cacheline_aligned; + unsigned long private[0] ____cacheline_aligned; };
#define to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev) @@@ -497,8 -500,8 +497,8 @@@ void pci_free_host_bridge(struct pci_ho struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
void pci_set_host_bridge_release(struct pci_host_bridge *bridge, - void (*release_fn)(struct pci_host_bridge *), - void *release_data); + void (*release_fn)(struct pci_host_bridge *), + void *release_data);
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
@@@ -518,32 -521,32 +518,32 @@@ #define PCI_SUBTRACTIVE_DECODE 0x1
struct pci_bus_resource { - struct list_head list; - struct resource *res; - unsigned int flags; + struct list_head list; + struct resource *res; + unsigned int flags; };
#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */
struct pci_bus { - struct list_head node; /* node in list of buses */ - struct pci_bus *parent; /* parent bus this bridge is on */ - struct list_head children; /* list of child buses */ - struct list_head devices; /* list of devices on this bus */ - struct pci_dev *self; /* bridge device as seen by parent */ - struct list_head slots; /* list of slots on this bus; + struct list_head node; /* Node in list of buses */ + struct pci_bus *parent; /* Parent bus this bridge is on */ + struct list_head children; /* List of child buses */ + struct list_head devices; /* List of devices on this bus */ + struct pci_dev *self; /* Bridge device as seen by parent */ + struct list_head slots; /* List of slots on this bus; protected by pci_slot_mutex */ struct resource *resource[PCI_BRIDGE_RESOURCE_NUM]; - struct list_head resources; /* address space routed to this bus */ - struct resource busn_res; /* bus numbers routed to this bus */ + struct list_head resources; /* Address space routed to this bus */ + struct resource busn_res; /* Bus numbers routed to this bus */
- struct pci_ops *ops; /* configuration access functions */ + struct pci_ops *ops; /* Configuration access functions */ struct msi_controller *msi; /* MSI controller */ - void *sysdata; /* hook for sys-specific extension */ - struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */ + void *sysdata; /* Hook for sys-specific extension */ + struct proc_dir_entry *procdir; /* Directory entry in /proc/bus/pci */
- unsigned char number; /* bus number */ - unsigned char primary; /* number of primary bridge */ + unsigned char number; /* Bus number */ + unsigned char primary; /* Number of primary bridge */ unsigned char max_bus_speed; /* enum pci_bus_speed */ unsigned char cur_bus_speed; /* enum pci_bus_speed */ #ifdef CONFIG_PCI_DOMAINS_GENERIC @@@ -552,12 -555,12 +552,12 @@@
char name[48];
- unsigned short bridge_ctl; /* manage NO_ISA/FBB/et al behaviors */ - pci_bus_flags_t bus_flags; /* inherited by child buses */ + unsigned short bridge_ctl; /* Manage NO_ISA/FBB/et al behaviors */ + pci_bus_flags_t bus_flags; /* Inherited by child buses */ struct device *bridge; struct device dev; - struct bin_attribute *legacy_io; /* legacy I/O for this bus */ - struct bin_attribute *legacy_mem; /* legacy mem */ + struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ + struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; };
@@@ -614,7 -617,9 +614,7 @@@ static inline bool pci_dev_msi_enabled( static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; } #endif
-/* - * Error values that may be returned by PCI functions. - */ +/* Error values that may be returned by PCI functions */ #define PCIBIOS_SUCCESSFUL 0x00 #define PCIBIOS_FUNC_NOT_SUPPORTED 0x81 #define PCIBIOS_BAD_VENDOR_ID 0x83 @@@ -623,7 -628,9 +623,7 @@@ #define PCIBIOS_SET_FAILED 0x88 #define PCIBIOS_BUFFER_TOO_SMALL 0x89
-/* - * Translate above to generic errno for passing back through non-PCI code. - */ +/* Translate above to generic errno for passing back through non-PCI code */ static inline int pcibios_err_to_errno(int err) { if (err <= PCIBIOS_SUCCESSFUL) @@@ -673,13 -680,13 +673,13 @@@ typedef u32 pci_bus_addr_t #endif
struct pci_bus_region { - pci_bus_addr_t start; - pci_bus_addr_t end; + pci_bus_addr_t start; + pci_bus_addr_t end; };
struct pci_dynids { - spinlock_t lock; /* protects list, index */ - struct list_head list; /* for IDs added at runtime */ + spinlock_t lock; /* Protects list, index */ + struct list_head list; /* For IDs added at runtime */ };
@@@ -693,13 -700,13 +693,13 @@@ typedef unsigned int __bitwise pci_ers_result_t;
enum pci_ers_result { - /* no result/none/not supported in device driver */ + /* No result/none/not supported in device driver */ PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
/* Device driver can recover without slot reset */ PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
- /* Device driver wants slot to be reset. */ + /* Device driver wants slot to be reset */ PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
/* Device has completely failed, is unrecoverable */ @@@ -735,27 -742,27 +735,27 @@@ struct pci_error_handlers
struct module; struct pci_driver { - struct list_head node; - const char *name; - const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */ - int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ - void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ - int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ - int (*suspend_late) (struct pci_dev *dev, pm_message_t state); - int (*resume_early) (struct pci_dev *dev); - int (*resume) (struct pci_dev *dev); /* Device woken up */ + struct list_head node; + const char *name; + const struct pci_device_id *id_table; /* Must be non-NULL for probe to be called */ + int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ + void (*remove)(struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ + int (*suspend)(struct pci_dev *dev, pm_message_t state); /* Device suspended */ + int (*suspend_late)(struct pci_dev *dev, pm_message_t state); + int (*resume_early)(struct pci_dev *dev); + int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */ + int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* On PF */ const struct pci_error_handlers *err_handler; const struct attribute_group **groups; struct device_driver driver; - struct pci_dynids dynids; + struct pci_dynids dynids; };
#define to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
/** - * PCI_DEVICE - macro used to describe a specific pci device + * PCI_DEVICE - macro used to describe a specific PCI device * @vend: the 16 bit PCI Vendor ID * @dev: the 16 bit PCI Device ID * @@@ -768,7 -775,7 +768,7 @@@ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
/** - * PCI_DEVICE_SUB - macro used to describe a specific pci device with subsystem + * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem * @vend: the 16 bit PCI Vendor ID * @dev: the 16 bit PCI Device ID * @subvend: the 16 bit PCI Subvendor ID @@@ -782,7 -789,7 +782,7 @@@ .subvendor = (subvend), .subdevice = (subdev)
/** - * PCI_DEVICE_CLASS - macro used to describe a specific pci device class + * PCI_DEVICE_CLASS - macro used to describe a specific PCI device class * @dev_class: the class, subclass, prog-if triple for this device * @dev_class_mask: the class mask for this device * @@@ -796,7 -803,7 +796,7 @@@ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
/** - * PCI_VDEVICE - macro used to describe a specific pci device in short form + * PCI_VDEVICE - macro used to describe a specific PCI device in short form * @vend: the vendor name * @dev: the 16 bit PCI Device ID * @@@ -805,21 -812,22 +805,21 @@@ * to PCI_ANY_ID. The macro allows the next field to follow as the device * private data. */ - #define PCI_VDEVICE(vend, dev) \ .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
enum { - PCI_REASSIGN_ALL_RSRC = 0x00000001, /* ignore firmware setup */ - PCI_REASSIGN_ALL_BUS = 0x00000002, /* reassign all bus numbers */ - PCI_PROBE_ONLY = 0x00000004, /* use existing setup */ - PCI_CAN_SKIP_ISA_ALIGN = 0x00000008, /* don't do ISA alignment */ - PCI_ENABLE_PROC_DOMAINS = 0x00000010, /* enable domains in /proc */ + PCI_REASSIGN_ALL_RSRC = 0x00000001, /* Ignore firmware setup */ + PCI_REASSIGN_ALL_BUS = 0x00000002, /* Reassign all bus numbers */ + PCI_PROBE_ONLY = 0x00000004, /* Use existing setup */ + PCI_CAN_SKIP_ISA_ALIGN = 0x00000008, /* Don't do ISA alignment */ + PCI_ENABLE_PROC_DOMAINS = 0x00000010, /* Enable domains in /proc */ PCI_COMPAT_DOMAIN_0 = 0x00000020, /* ... except domain 0 */ - PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* scan all, not just dev 0 */ + PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* Scan all, not just dev 0 */ };
-/* these external functions are only available when PCI support is enabled */ +/* These external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI
extern unsigned int pci_flags; @@@ -832,11 -840,11 +832,11 @@@ static inline int pci_has_flag(int flag void pcie_bus_configure_settings(struct pci_bus *bus);
enum pcie_bus_config_types { - PCIE_BUS_TUNE_OFF, /* don't touch MPS at all */ - PCIE_BUS_DEFAULT, /* ensure MPS matches upstream bridge */ - PCIE_BUS_SAFE, /* use largest MPS boot-time devices support */ - PCIE_BUS_PERFORMANCE, /* use MPS and MRRS for best performance */ - PCIE_BUS_PEER2PEER, /* set MPS = 128 for all devices */ + PCIE_BUS_TUNE_OFF, /* Don't touch MPS at all */ + PCIE_BUS_DEFAULT, /* Ensure MPS matches upstream bridge */ + PCIE_BUS_SAFE, /* Use largest MPS boot-time devices support */ + PCIE_BUS_PERFORMANCE, /* Use MPS and MRRS for best performance */ + PCIE_BUS_PEER2PEER, /* Set MPS = 128 for all devices */ };
extern enum pcie_bus_config_types pcie_bus_config; @@@ -845,7 -853,7 +845,7 @@@ extern struct bus_type pci_bus_type
/* Do NOT directly access these two variables, unless you are arch-specific PCI * code, or PCI core code. */ -extern struct list_head pci_root_buses; /* list of all known PCI buses */ +extern struct list_head pci_root_buses; /* List of all known PCI buses */ /* Some device drivers need know if PCI is initiated */ int no_pci_devices(void);
@@@ -883,8 -891,8 +883,8 @@@ int pci_bus_insert_busn_res(struct pci_ int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); void pci_bus_release_busn_res(struct pci_bus *b); struct pci_bus *pci_scan_root_bus(struct device *parent, int bus, - struct pci_ops *ops, void *sysdata, - struct list_head *resources); + struct pci_ops *ops, void *sysdata, + struct list_head *resources); int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); @@@ -941,10 -949,10 +941,10 @@@ int pci_find_next_ht_capability(struct struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, - struct pci_dev *from); + struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, - unsigned int ss_vendor, unsigned int ss_device, - struct pci_dev *from); + unsigned int ss_vendor, unsigned int ss_device, + struct pci_dev *from); struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); @@@ -1020,7 -1028,7 +1020,7 @@@ static inline int pcie_capability_clear return pcie_capability_clear_and_set_dword(dev, pos, clear, 0); }
-/* user-space driven config access */ +/* User-space driven config access */ int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); int pci_user_read_config_dword(struct pci_dev *dev, int where, u32 *val); @@@ -1064,6 -1072,7 +1064,7 @@@ int pci_set_pcie_reset_state(struct pci int pci_set_cacheline_size(struct pci_dev *dev); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); + int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); @@@ -1162,7 -1171,7 +1163,7 @@@ unsigned int pci_rescan_bus(struct pci_ void pci_lock_rescan_remove(void); void pci_unlock_rescan_remove(void);
-/* Vital product data routines */ +/* Vital Product Data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); int pci_set_vpd_size(struct pci_dev *dev, size_t len); @@@ -1247,7 -1256,9 +1248,7 @@@ static inline pci_bus_addr_t pci_bus_ad int __must_check __pci_register_driver(struct pci_driver *, struct module *, const char *mod_name);
-/* - * pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded - */ +/* pci_register_driver() must be a macro so KBUILD_MODNAME can be expanded */ #define pci_register_driver(driver) \ __pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
@@@ -1262,7 -1273,8 +1263,7 @@@ void pci_unregister_driver(struct pci_d * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_pci_driver(__pci_driver) \ - module_driver(__pci_driver, pci_register_driver, \ - pci_unregister_driver) + module_driver(__pci_driver, pci_register_driver, pci_unregister_driver)
/** * builtin_pci_driver() - Helper macro for registering a PCI driver @@@ -1301,10 -1313,10 +1302,10 @@@ resource_size_t pcibios_iov_resource_al int pci_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags);
-#define PCI_IRQ_LEGACY (1 << 0) /* allow legacy interrupts */ -#define PCI_IRQ_MSI (1 << 1) /* allow MSI interrupts */ -#define PCI_IRQ_MSIX (1 << 2) /* allow MSI-X interrupts */ -#define PCI_IRQ_AFFINITY (1 << 3) /* auto-assign affinity */ +#define PCI_IRQ_LEGACY (1 << 0) /* Allow legacy interrupts */ +#define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ +#define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ +#define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ #define PCI_IRQ_ALL_TYPES \ (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
@@@ -1323,8 -1335,8 +1324,8 @@@ #define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
struct msix_entry { - u32 vector; /* kernel uses to write allocated vector */ - u16 entry; /* driver uses to specify entry, OS writes */ + u32 vector; /* Kernel uses to write allocated vector */ + u16 entry; /* Driver uses to specify entry, OS writes */ };
#ifdef CONFIG_PCI_MSI @@@ -1364,10 -1376,10 +1365,10 @@@ static inline int pci_msi_enabled(void static inline int pci_enable_msi(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix_range(struct pci_dev *dev, - struct msix_entry *entries, int minvec, int maxvec) + struct msix_entry *entries, int minvec, int maxvec) { return -ENOSYS; } static inline int pci_enable_msix_exact(struct pci_dev *dev, - struct msix_entry *entries, int nvec) + struct msix_entry *entries, int nvec) { return -ENOSYS; }
static inline int @@@ -1532,9 -1544,9 +1533,9 @@@ static inline int acpi_pci_bus_find_dom int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); #endif
-/* some architectures require additional setup to direct VGA traffic */ +/* Some architectures require additional setup to direct VGA traffic */ typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode, - unsigned int command_bits, u32 flags); + unsigned int command_bits, u32 flags); void pci_register_set_vga_state(arch_set_vga_state_t func);
static inline int @@@ -1573,9 -1585,10 +1574,9 @@@ static inline void pci_clear_flags(int static inline int pci_has_flag(int flag) { return 0; }
/* - * If the system does not have PCI, clearly these return errors. Define - * these as simple inline functions to avoid hair in drivers. + * If the system does not have PCI, clearly these return errors. Define + * these as simple inline functions to avoid hair in drivers. */ - #define _PCI_NOP(o, s, t) \ static inline int pci_##o##_config_##s(struct pci_dev *dev, \ int where, t val) \ @@@ -1674,13 -1687,6 +1675,13 @@@ static inline int pci_get_new_domain_nr #define dev_is_pf(d) (false) static inline bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) { return false; } +static inline int pci_irqd_intx_xlate(struct irq_domain *d, + struct device_node *node, + const u32 *intspec, + unsigned int intsize, + unsigned long *out_hwirq, + unsigned int *out_type) +{ return -EINVAL; } #endif /* CONFIG_PCI */
/* Include architecture-dependent settings and functions */ @@@ -1721,10 -1727,8 +1722,10 @@@ int pci_iobar_pfn(struct pci_dev *pdev #define pci_root_bus_fwnode(bus) NULL #endif
-/* these helpers provide future and backwards compatibility - * for accessing popular PCI BAR info */ +/* + * These helpers provide future and backwards compatibility + * for accessing popular PCI BAR info + */ #define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start) #define pci_resource_end(dev, bar) ((dev)->resource[(bar)].end) #define pci_resource_flags(dev, bar) ((dev)->resource[(bar)].flags) @@@ -1736,8 -1740,7 +1737,8 @@@ (pci_resource_end((dev), (bar)) - \ pci_resource_start((dev), (bar)) + 1))
-/* Similar to the helpers above, these manipulate per-pci_dev +/* + * Similar to the helpers above, these manipulate per-pci_dev * driver-specific data. They are really just a wrapper around * the generic device structure functions of these calls. */ @@@ -1751,14 -1754,16 +1752,14 @@@ static inline void pci_set_drvdata(stru dev_set_drvdata(&pdev->dev, data); }
-/* If you want to know what to call your pci_dev, ask this function. - * Again, it's a wrapper around the generic device. - */ static inline const char *pci_name(const struct pci_dev *pdev) { return dev_name(&pdev->dev); }
-/* Some archs don't want to expose struct resource to userland as-is +/* + * Some archs don't want to expose struct resource to userland as-is * in sysfs and /proc */ #ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER @@@ -1777,16 -1782,16 +1778,16 @@@ static inline void pci_resource_to_user
/* - * The world is not perfect and supplies us with broken PCI devices. - * For at least a part of these bugs we need a work-around, so both - * generic (drivers/pci/quirks.c) and per-architecture code can define - * fixup hooks to be called for particular buggy devices. + * The world is not perfect and supplies us with broken PCI devices. + * For at least a part of these bugs we need a work-around, so both + * generic (drivers/pci/quirks.c) and per-architecture code can define + * fixup hooks to be called for particular buggy devices. */
struct pci_fixup { - u16 vendor; /* You can use PCI_ANY_ID here of course */ - u16 device; /* You can use PCI_ANY_ID here of course */ - u32 class; /* You can use PCI_ANY_ID here too */ + u16 vendor; /* Or PCI_ANY_ID */ + u16 device; /* Or PCI_ANY_ID */ + u32 class; /* Or PCI_ANY_ID */ unsigned int class_shift; /* should be 0, 8, 16 */ void (*hook)(struct pci_dev *dev); }; @@@ -1828,19 -1833,23 +1829,19 @@@ enum pci_fixup_pass #define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ - resume##hook, vendor, device, class, \ - class_shift, hook) + resume##hook, vendor, device, class, class_shift, hook) #define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ - resume_early##hook, vendor, device, \ - class, class_shift, hook) + resume_early##hook, vendor, device, class, class_shift, hook) #define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ - suspend##hook, vendor, device, class, \ - class_shift, hook) + suspend##hook, vendor, device, class, class_shift, hook) #define DECLARE_PCI_FIXUP_CLASS_SUSPEND_LATE(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late, \ - suspend_late##hook, vendor, device, \ - class, class_shift, hook) + suspend_late##hook, vendor, device, class, class_shift, hook)
#define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early, \ @@@ -1856,16 -1865,20 +1857,16 @@@ hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ - resume##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + resume##hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ - resume_early##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + resume_early##hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ - suspend##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + suspend##hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_SUSPEND_LATE(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late, \ - suspend_late##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + suspend_late##hook, vendor, device, PCI_ANY_ID, 0, hook)
#ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); @@@ -1952,7 -1965,6 +1953,7 @@@ int pci_vfs_assigned(struct pci_dev *de int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); #else static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) { @@@ -1980,7 -1992,6 +1981,7 @@@ static inline int pci_sriov_get_totalvf { return 0; } static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif
#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) @@@ -2051,7 -2062,6 +2052,7 @@@ void pci_request_acs(void) bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, struct pci_dev *end, u16 acs_flags); +int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask);
#define PCI_VPD_LRDT 0x80 /* Large Resource Data Type */ #define PCI_VPD_LRDT_ID(x) ((x) | PCI_VPD_LRDT) @@@ -2103,7 -2113,7 +2104,7 @@@ static inline u16 pci_vpd_lrdt_size(con */ static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt) { - return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK); + return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK); }
/** @@@ -2188,7 -2198,7 +2189,7 @@@ static inline struct device_node *pci_b return bus ? bus->dev.of_node : NULL; }
-#else /* CONFIG_OF */ +#else /* CONFIG_OF */ static inline void pci_set_of_node(struct pci_dev *dev) { } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } @@@ -2197,7 -2207,7 +2198,7 @@@ static inline struct device_node pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; } static inline struct irq_domain * pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; } -#endif /* CONFIG_OF */ +#endif /* CONFIG_OF */
#ifdef CONFIG_ACPI struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus); @@@ -2222,7 -2232,7 +2223,7 @@@ int pci_for_each_dma_alias(struct pci_d int (*fn)(struct pci_dev *pdev, u16 alias, void *data), void *data);
-/* helper functions for operation of device flag */ +/* Helper functions for operation of device flag */ static inline void pci_set_dev_assigned(struct pci_dev *pdev) { pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; @@@ -2269,55 -2279,7 +2270,55 @@@ static inline bool pci_is_thunderbolt_a return false; }
-/* provide the legacy pci_dma_* API */ +/** + * pci_uevent_ers - emit a uevent during recovery path of pci device + * @pdev: pci device to check + * @err_type: type of error event + * + */ +static inline void pci_uevent_ers(struct pci_dev *pdev, + enum pci_ers_result err_type) +{ + int idx = 0; + char *envp[3]; + + switch (err_type) { + case PCI_ERS_RESULT_NONE: + case PCI_ERS_RESULT_CAN_RECOVER: + envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + case PCI_ERS_RESULT_RECOVERED: + envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=1"; + break; + case PCI_ERS_RESULT_DISCONNECT: + envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + default: + break; + } + + if (idx > 0) { + envp[idx++] = NULL; + kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp); + } +} + +/* Provide the legacy pci_dma_* API */ #include <linux/pci-dma-compat.h>
+#define pci_printk(level, pdev, fmt, arg...) \ + dev_printk(level, &(pdev)->dev, fmt, ##arg) + +#define pci_emerg(pdev, fmt, arg...) dev_emerg(&(pdev)->dev, fmt, ##arg) +#define pci_alert(pdev, fmt, arg...) dev_alert(&(pdev)->dev, fmt, ##arg) +#define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) +#define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg) +#define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg) +#define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg) +#define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg) +#define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg) + #endif /* LINUX_PCI_H */ diff --combined include/linux/serdev.h index d4bb46a26dc3,a73d87b483b4..531031a15cdd --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@@ -76,6 -76,12 +76,12 @@@ static inline struct serdev_device_driv return container_of(d, struct serdev_device_driver, driver); }
+ enum serdev_parity { + SERDEV_PARITY_NONE, + SERDEV_PARITY_EVEN, + SERDEV_PARITY_ODD, + }; + /* * serdev controller structures */ @@@ -86,6 -92,7 +92,7 @@@ struct serdev_controller_ops int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); + int (*set_parity)(struct serdev_controller *, enum serdev_parity); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); void (*wait_until_sent)(struct serdev_controller *, long); int (*get_tiocm)(struct serdev_controller *); @@@ -193,7 -200,6 +200,7 @@@ static inline int serdev_controller_rec
int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); +int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); @@@ -299,6 -305,9 +306,9 @@@ static inline int serdev_device_set_rts return serdev_device_set_tiocm(serdev, 0, TIOCM_RTS); }
+ int serdev_device_set_parity(struct serdev_device *serdev, + enum serdev_parity parity); + /* * serdev hooks into TTY core */ diff --combined include/linux/skbuff.h index a87e43d16f44,b8e0da6c27d6..ac89a93b7c83 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -1211,6 -1211,11 +1211,11 @@@ static inline bool skb_flow_dissect_flo data, proto, nhoff, hlen, flags); }
+ void + skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) @@@ -3241,7 -3246,7 +3246,7 @@@ struct sk_buff *__skb_recv_datagram(str int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -unsigned int datagram_poll(struct file *file, struct socket *sock, +__poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); diff --combined include/net/inet_connection_sock.h index ec72cdb5bc39,8e1bf9ae4a5e..6692d67e9245 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@@ -77,6 -77,7 +77,7 @@@ struct inet_connection_sock_af_ops * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@@ -101,6 -102,7 +102,7 @@@ struct inet_connection_sock const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, @@@ -305,7 -307,7 +307,7 @@@ void inet_csk_prepare_forced_close(stru /* * LISTEN is a special case for poll.. */ -static inline unsigned int inet_csk_listen_poll(const struct sock *sk) +static inline __poll_t inet_csk_listen_poll(const struct sock *sk) { return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0; diff --combined include/net/sctp/sctp.h index 608d123ef25f,20c0c1be2ca7..f7ae6b0a21d0 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@@ -107,7 -107,7 +107,7 @@@ int sctp_backlog_rcv(struct sock *sk, s int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -unsigned int sctp_poll(struct file *file, struct socket *sock, +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, @@@ -116,7 -116,7 +116,7 @@@ extern struct percpu_counter sctp_socke int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
- int sctp_transport_walk_start(struct rhashtable_iter *iter); + void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); @@@ -444,13 -444,13 +444,13 @@@ static inline int sctp_frag_point(cons int frag = pmtu;
frag -= sp->pf->af->net_header_len; - frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk); + frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag);
frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - - sizeof(struct sctp_data_chunk))); + sctp_datachk_len(&asoc->stream)));
return frag; } diff --combined include/net/sock.h index 4fd74e0d1bbb,73b7830b0bb8..0752f034f1bf --- a/include/net/sock.h +++ b/include/net/sock.h @@@ -72,6 -72,7 +72,7 @@@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/smc.h> + #include <net/l3mdev.h>
/* * This structure really needs to be cleaned up. @@@ -1262,6 -1263,7 +1263,7 @@@ proto_memory_pressure(struct proto *pro /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); + int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) @@@ -1583,7 -1585,7 +1585,7 @@@ int sock_no_connect(struct socket *, st int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int *, int); -unsigned int sock_no_poll(struct file *, struct socket *, +__poll_t sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); int sock_no_listen(struct socket *, int); @@@ -2337,31 -2339,6 +2339,6 @@@ static inline bool sk_listener(const st return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); }
- /** - * sk_state_load - read sk->sk_state for lockless contexts - * @sk: socket pointer - * - * Paired with sk_state_store(). Used in places we do not hold socket lock : - * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... - */ - static inline int sk_state_load(const struct sock *sk) - { - return smp_load_acquire(&sk->sk_state); - } - - /** - * sk_state_store - update sk->sk_state - * @sk: socket pointer - * @newstate: new state - * - * Paired with sk_state_load(). Should be used in contexts where - * state change might impact lockless readers. - */ - static inline void sk_state_store(struct sock *sk, int newstate) - { - smp_store_release(&sk->sk_state, newstate); - } - void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); @@@ -2412,4 -2389,34 +2389,34 @@@ static inline int sk_get_rmem0(const st return *proto->sysctl_rmem; }
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) + * Some wifi drivers need to tweak it to get more chunks. + * They can use this helper from their ndo_start_xmit() + */ + static inline void sk_pacing_shift_update(struct sock *sk, int val) + { + if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + return; + sk->sk_pacing_shift = val; + } + + /* if a socket is bound to a device, check that the given device + * index is either the same or that the socket is bound to an L3 + * master device and the given device index is also enslaved to + * that L3 master + */ + static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) + { + int mdif; + + if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + return true; + + mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); + if (mdif && mdif == sk->sk_bound_dev_if) + return true; + + return false; + } + #endif /* _SOCK_H */ diff --combined include/net/tcp.h index 50b21a49d870,093e967a2960..58278669cc55 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@@ -387,7 -387,7 +387,7 @@@ bool tcp_peer_is_proven(struct request_ void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); -unsigned int tcp_poll(struct file *file, struct socket *sock, +__poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); @@@ -953,6 -953,7 +953,7 @@@ struct rate_sample u32 prior_in_flight; /* in flight before this ACK */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ };
struct tcp_congestion_ops { @@@ -1507,8 -1508,7 +1508,7 @@@ int tcp_md5_hash_key(struct tcp_md5sig_
/* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, int *syn_loss, - unsigned long *last_syn_loss); + struct tcp_fastopen_cookie *cookie); void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost, u16 try_exp); @@@ -1546,7 -1546,7 +1546,7 @@@ extern unsigned int sysctl_tcp_fastopen void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); - void tcp_fastopen_active_timeout_reset(void); + void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
/* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. @@@ -2006,17 -2006,21 +2006,21 @@@ void tcp_cleanup_ulp(struct sock *sk) * program loaded). */ #ifdef CONFIG_BPF - static inline int tcp_call_bpf(struct sock *sk, int op) + static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { struct bpf_sock_ops_kern sock_ops; int ret;
- if (sk_fullsock(sk)) + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; sock_owned_by_me(sk); + }
- memset(&sock_ops, 0, sizeof(sock_ops)); sock_ops.sk = sk; sock_ops.op = op; + if (nargs > 0) + memcpy(sock_ops.args, args, nargs * sizeof(*args));
ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); if (ret == 0) @@@ -2025,18 -2029,46 +2029,46 @@@ ret = -1; return ret; } + + static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) + { + u32 args[2] = {arg1, arg2}; + + return tcp_call_bpf(sk, op, 2, args); + } + + static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, + u32 arg3) + { + u32 args[3] = {arg1, arg2, arg3}; + + return tcp_call_bpf(sk, op, 3, args); + } + #else - static inline int tcp_call_bpf(struct sock *sk, int op) + static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { return -EPERM; } + + static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) + { + return -EPERM; + } + + static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, + u32 arg3) + { + return -EPERM; + } + #endif
static inline u32 tcp_timeout_init(struct sock *sk) { int timeout;
- timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; @@@ -2047,7 -2079,7 +2079,7 @@@ static inline u32 tcp_rwnd_init_bpf(str { int rwnd;
- rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
if (rwnd < 0) rwnd = 0; @@@ -2056,7 -2088,7 +2088,7 @@@
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) { - return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); }
#if IS_ENABLED(CONFIG_SMC) diff --combined kernel/events/core.c index 74dcc477216c,878d86c513d6..57aaa427908c --- a/kernel/events/core.c +++ b/kernel/events/core.c @@@ -1231,10 -1231,6 +1231,10 @@@ static void put_ctx(struct perf_event_c * perf_event_context::lock * perf_event::mmap_mutex * mmap_sem + * + * cpu_hotplug_lock + * pmus_lock + * cpuctx->mutex / perf_event_context::mutex */ static struct perf_event_context * perf_event_ctx_lock_nested(struct perf_event *event, int nesting) @@@ -4200,7 -4196,6 +4200,7 @@@ int perf_event_release_kernel(struct pe { struct perf_event_context *ctx = event->ctx; struct perf_event *child, *tmp; + LIST_HEAD(free_list);
/* * If we got here through err_file: fput(event_file); we will not have @@@ -4273,7 -4268,8 +4273,7 @@@ again struct perf_event, child_list); if (tmp == child) { perf_remove_from_context(child, DETACH_GROUP); - list_del(&child->child_list); - free_event(child); + list_move(&child->child_list, &free_list); /* * This matches the refcount bump in inherit_event(); * this can't be the last reference. @@@ -4288,11 -4284,6 +4288,11 @@@ } mutex_unlock(&event->child_mutex);
+ list_for_each_entry_safe(child, tmp, &free_list, child_list) { + list_del(&child->child_list); + free_event(child); + } + no_ctx: put_event(event); /* Must be the 'last' reference */ return 0; @@@ -4520,11 -4511,11 +4520,11 @@@ perf_read(struct file *file, char __use return ret; }
-static unsigned int perf_poll(struct file *file, poll_table *wait) +static __poll_t perf_poll(struct file *file, poll_table *wait) { struct perf_event *event = file->private_data; struct ring_buffer *rb; - unsigned int events = POLLHUP; + __poll_t events = POLLHUP;
poll_wait(file, &event->waitq, wait);
@@@ -4732,6 -4723,9 +4732,9 @@@ static long _perf_ioctl(struct perf_eve rcu_read_unlock(); return 0; } + + case PERF_EVENT_IOC_QUERY_BPF: + return perf_event_query_prog_array(event, (void __user *)arg); default: return -ENOTTY; } @@@ -4913,7 -4907,6 +4916,7 @@@ void perf_event_update_userpage(struct unlock: rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(perf_event_update_userpage);
static int perf_mmap_fault(struct vm_fault *vmf) { @@@ -8090,6 -8083,13 +8093,13 @@@ static int perf_event_set_bpf_prog(stru return -EINVAL; }
+ /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event);
@@@ -8526,29 -8526,6 +8536,29 @@@ fail_clear_files return ret; }
+static int +perf_tracepoint_set_filter(struct perf_event *event, char *filter_str) +{ + struct perf_event_context *ctx = event->ctx; + int ret; + + /* + * Beware, here be dragons!! + * + * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint + * stuff does not actually need it. So temporarily drop ctx->mutex. As per + * perf_event_ctx_lock() we already have a reference on ctx. + * + * This can result in event getting moved to a different ctx, but that + * does not affect the tracepoint state. + */ + mutex_unlock(&ctx->mutex); + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); + mutex_lock(&ctx->mutex); + + return ret; +} + static int perf_event_set_filter(struct perf_event *event, void __user *arg) { char *filter_str; @@@ -8565,7 -8542,8 +8575,7 @@@
if (IS_ENABLED(CONFIG_EVENT_TRACING) && event->attr.type == PERF_TYPE_TRACEPOINT) - ret = ftrace_profile_set_filter(event, event->attr.config, - filter_str); + ret = perf_tracepoint_set_filter(event, filter_str); else if (has_addr_filter(event)) ret = perf_event_set_addr_filter(event, filter_str);
@@@ -9200,13 -9178,7 +9210,13 @@@ static int perf_try_init_event(struct p if (!try_module_get(pmu->module)) return -ENODEV;
- if (event->group_leader != event) { + /* + * A number of pmu->event_init() methods iterate the sibling_list to, + * for example, validate if the group fits on the PMU. Therefore, + * if this is a sibling event, acquire the ctx->mutex to protect + * the sibling_list. + */ + if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) { /* * This ctx->mutex can nest when we're called through * inheritance. See the perf_event_ctx_lock_nested() comment. diff --combined kernel/module.c index 8042b8fcbf14,601494d4b7ea..9e68a6334553 --- a/kernel/module.c +++ b/kernel/module.c @@@ -3118,7 -3118,11 +3118,11 @@@ static int find_module_sections(struct sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif - + #ifdef CONFIG_FUNCTION_ERROR_INJECTION + mod->ei_funcs = section_objs(info, "_error_injection_whitelist", + sizeof(*mod->ei_funcs), + &mod->num_ei_funcs); + #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries);
@@@ -3938,12 -3942,6 +3942,12 @@@ static const char *get_ksymbol(struct m return symname(kallsyms, best); }
+void * __weak dereference_module_function_descriptor(struct module *mod, + void *ptr) +{ + return ptr; +} + /* For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ const char *module_address_lookup(unsigned long addr, diff --combined lib/Makefile index 749851abe85a,75ec13778cd8..7adb066692b3 --- a/lib/Makefile +++ b/lib/Makefile @@@ -28,7 -28,7 +28,7 @@@ lib-y := ctype.o string.o vsprintf.o cm
lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o +lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
lib-y += kobject.o klist.o @@@ -39,7 -39,7 +39,7 @@@ obj-y += bcd.o div64.o sort.o parser.o gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o errseq.o + once.o refcount.o usercopy.o errseq.o bucket_locks.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o @@@ -149,6 -149,7 +149,7 @@@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJE obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o + obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
diff --combined net/atm/common.c index 8f12f1c6fa14,5763fd241dc3..6523f38c4957 --- a/net/atm/common.c +++ b/net/atm/common.c @@@ -14,7 -14,7 +14,7 @@@ #include <linux/capability.h> #include <linux/mm.h> #include <linux/sched/signal.h> - #include <linux/time.h> /* struct timeval */ + #include <linux/time64.h> /* 64-bit time for seconds */ #include <linux/skbuff.h> #include <linux/bitops.h> #include <linux/init.h> @@@ -648,11 -648,11 +648,11 @@@ out return error; }
-unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct atm_vcc *vcc; - unsigned int mask; + __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; diff --combined net/batman-adv/icmp_socket.c index a98e0a986cef,8041cf106c37..581375d0eed2 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@@ -1,3 -1,4 +1,4 @@@ + // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@@ -26,6 -27,7 +27,7 @@@ #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> + #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/list.h> @@@ -42,11 -44,11 +44,11 @@@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/wait.h> + #include <uapi/linux/batadv_packet.h>
#include "hard-interface.h" #include "log.h" #include "originator.h" - #include "packet.h" #include "send.h"
static struct batadv_socket_client *batadv_socket_client_hash[256]; @@@ -55,6 -57,9 +57,9 @@@ static void batadv_socket_add_packet(st struct batadv_icmp_header *icmph, size_t icmp_len);
+ /** + * batadv_socket_init() - Initialize soft interface independent socket data + */ void batadv_socket_init(void) { memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash)); @@@ -292,7 -297,7 +297,7 @@@ out return len; }
-static unsigned int batadv_socket_poll(struct file *file, poll_table *wait) +static __poll_t batadv_socket_poll(struct file *file, poll_table *wait) { struct batadv_socket_client *socket_client = file->private_data;
@@@ -314,6 -319,12 +319,12 @@@ static const struct file_operations bat .llseek = no_llseek, };
+ /** + * batadv_socket_setup() - Create debugfs "socket" file + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_socket_setup(struct batadv_priv *bat_priv) { struct dentry *d; @@@ -333,7 -344,7 +344,7 @@@ err }
/** - * batadv_socket_add_packet - schedule an icmp packet to be sent to + * batadv_socket_add_packet() - schedule an icmp packet to be sent to * userspace on an icmp socket. * @socket_client: the socket this packet belongs to * @icmph: pointer to the header of the icmp packet @@@ -390,7 -401,7 +401,7 @@@ static void batadv_socket_add_packet(st }
/** - * batadv_socket_receive_packet - schedule an icmp packet to be received + * batadv_socket_receive_packet() - schedule an icmp packet to be received * locally and sent to userspace. * @icmph: pointer to the header of the icmp packet * @icmp_len: total length of the icmp packet diff --combined net/batman-adv/log.c index 76451460c98d,da004980ab8b..9be74a44e99d --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@@ -1,3 -1,4 +1,4 @@@ + // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@@ -24,6 -25,7 +25,7 @@@ #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> + #include <linux/gfp.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/module.h> @@@ -86,6 -88,13 +88,13 @@@ static int batadv_fdebug_log(struct bat return 0; }
+ /** + * batadv_debug_log() - Add debug log entry + * @bat_priv: the bat priv with all the soft interface information + * @fmt: format string + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) { va_list args; @@@ -176,7 -185,7 +185,7 @@@ static ssize_t batadv_log_read(struct f return error; }
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait) +static __poll_t batadv_log_poll(struct file *file, poll_table *wait) { struct batadv_priv *bat_priv = file->private_data; struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; @@@ -197,6 -206,12 +206,12 @@@ static const struct file_operations bat .llseek = no_llseek, };
+ /** + * batadv_debug_log_setup() - Initialize debug log + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debug_log_setup(struct batadv_priv *bat_priv) { struct dentry *d; @@@ -222,6 -237,10 +237,10 @@@ err return -ENOMEM; }
+ /** + * batadv_debug_log_cleanup() - Destroy debug log + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) { kfree(bat_priv->debug_log); diff --combined net/bluetooth/af_bluetooth.c index 671b907ba678,f044202346c6..f897681780db --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@@ -421,7 -421,7 +421,7 @@@ out } EXPORT_SYMBOL(bt_sock_stream_recvmsg);
-static inline unsigned int bt_accept_poll(struct sock *parent) +static inline __poll_t bt_accept_poll(struct sock *parent) { struct bt_sock *s, *n; struct sock *sk; @@@ -437,11 -437,11 +437,11 @@@ return 0; }
-unsigned int bt_sock_poll(struct file *file, struct socket *sock, +__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - unsigned int mask = 0; + __poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
@@@ -766,43 -766,39 +766,39 @@@ static int __init bt_init(void return err;
err = sock_register(&bt_sock_family_ops); - if (err < 0) { - bt_sysfs_cleanup(); - return err; - } + if (err) + goto cleanup_sysfs;
BT_INFO("HCI device and connection manager initialized");
err = hci_sock_init(); - if (err < 0) - goto error; + if (err) + goto unregister_socket;
err = l2cap_init(); - if (err < 0) - goto sock_err; + if (err) + goto cleanup_socket;
err = sco_init(); - if (err < 0) { - l2cap_exit(); - goto sock_err; - } + if (err) + goto cleanup_cap;
err = mgmt_init(); - if (err < 0) { - sco_exit(); - l2cap_exit(); - goto sock_err; - } + if (err) + goto cleanup_sco;
return 0;
- sock_err: + cleanup_sco: + sco_exit(); + cleanup_cap: + l2cap_exit(); + cleanup_socket: hci_sock_cleanup(); - - error: + unregister_socket: sock_unregister(PF_BLUETOOTH); + cleanup_sysfs: bt_sysfs_cleanup(); - return err; }
diff --combined net/core/sock.c index 1211159718ad,abf4cbff99b2..1033f8ab0547 --- a/net/core/sock.c +++ b/net/core/sock.c @@@ -145,6 -145,8 +145,8 @@@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list);
+ static void sock_inuse_add(struct net *net, int val); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@@ -1531,8 -1533,11 +1533,11 @@@ struct sock *sk_alloc(struct net *net, sk->sk_kern_sock = kern; sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; - if (likely(sk->sk_net_refcnt)) + if (likely(sk->sk_net_refcnt)) { get_net(net); + sock_inuse_add(net, 1); + } + sock_net_set(sk, net); refcount_set(&sk->sk_wmem_alloc, 1);
@@@ -1595,6 -1600,9 +1600,9 @@@ void sk_destruct(struct sock *sk
static void __sk_free(struct sock *sk) { + if (likely(sk->sk_net_refcnt)) + sock_inuse_add(sock_net(sk), -1); + if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) sock_diag_broadcast_destroy(sk); else @@@ -1716,6 -1724,8 +1724,8 @@@ struct sock *sk_clone_lock(const struc newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + if (likely(newsk->sk_net_refcnt)) + sock_inuse_add(sock_net(newsk), 1);
/* * Before updating sk_refcnt, we must commit prior changes to memory @@@ -2496,7 -2506,7 +2506,7 @@@ int sock_no_getname(struct socket *sock } EXPORT_SYMBOL(sock_no_getname);
-unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) +__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) { return 0; } @@@ -3045,7 -3055,7 +3055,7 @@@ static DECLARE_BITMAP(proto_inuse_idx,
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); + __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
@@@ -3055,21 -3065,50 +3065,50 @@@ int sock_prot_inuse_get(struct net *net int res = 0;
for_each_possible_cpu(cpu) - res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; + res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
return res >= 0 ? res : 0; } EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+ static void sock_inuse_add(struct net *net, int val) + { + this_cpu_add(*net->core.sock_inuse, val); + } + + int sock_inuse_get(struct net *net) + { + int cpu, res = 0; + + for_each_possible_cpu(cpu) + res += *per_cpu_ptr(net->core.sock_inuse, cpu); + + return res; + } + + EXPORT_SYMBOL_GPL(sock_inuse_get); + static int __net_init sock_inuse_init_net(struct net *net) { - net->core.inuse = alloc_percpu(struct prot_inuse); - return net->core.inuse ? 0 : -ENOMEM; + net->core.prot_inuse = alloc_percpu(struct prot_inuse); + if (net->core.prot_inuse == NULL) + return -ENOMEM; + + net->core.sock_inuse = alloc_percpu(int); + if (net->core.sock_inuse == NULL) + goto out; + + return 0; + + out: + free_percpu(net->core.prot_inuse); + return -ENOMEM; }
static void __net_exit sock_inuse_exit_net(struct net *net) { - free_percpu(net->core.inuse); + free_percpu(net->core.prot_inuse); + free_percpu(net->core.sock_inuse); }
static struct pernet_operations net_inuse_ops = { @@@ -3112,6 -3151,10 +3151,10 @@@ static inline void assign_proto_idx(str static inline void release_proto_idx(struct proto *prot) { } + + static void sock_inuse_add(struct net *net, int val) + { + } #endif
static void req_prot_cleanup(struct request_sock_ops *rsk_prot) @@@ -3319,7 -3362,6 +3362,6 @@@ static int proto_seq_open(struct inode }
static const struct file_operations proto_seq_fops = { - .owner = THIS_MODULE, .open = proto_seq_open, .read = seq_read, .llseek = seq_lseek, diff --combined net/dccp/proto.c index 8b8db3d481bd,fa7e92e08920..74685fecfdb9 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@@ -38,6 -38,9 +38,9 @@@ #include "dccp.h" #include "feat.h"
+ #define CREATE_TRACE_POINTS + #include "trace.h" + DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics); @@@ -110,7 -113,7 +113,7 @@@ void dccp_set_state(struct sock *sk, co /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk->sk_state = state; + inet_sk_set_state(sk, state); }
EXPORT_SYMBOL_GPL(dccp_set_state); @@@ -318,10 -321,10 +321,10 @@@ EXPORT_SYMBOL_GPL(dccp_disconnect) * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -unsigned int dccp_poll(struct file *file, struct socket *sock, +__poll_t dccp_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; + __poll_t mask; struct sock *sk = sock->sk;
sock_poll_wait(file, sk_sleep(sk), wait); @@@ -761,6 -764,8 +764,8 @@@ int dccp_sendmsg(struct sock *sk, struc int rc, size; long timeo;
+ trace_dccp_probe(sk, len); + if (len > dp->dccps_mss_cache) return -EMSGSIZE;
diff --combined net/decnet/af_decnet.c index 9c2dde819817,d93e5b887f03..cc1b505453a8 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@@ -1209,11 -1209,11 +1209,11 @@@ static int dn_getname(struct socket *so }
-static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table *wait) +static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); - int mask = datagram_poll(file, sock, wait); + __poll_t mask = datagram_poll(file, sock, wait);
if (!skb_queue_empty(&scp->other_receive_queue)) mask |= POLLRDBAND; @@@ -2320,7 -2320,6 +2320,6 @@@ static int dn_socket_seq_open(struct in }
static const struct file_operations dn_socket_seq_fops = { - .owner = THIS_MODULE, .open = dn_socket_seq_open, .read = seq_read, .llseek = seq_lseek, diff --combined net/ipv4/tcp.c index 1b38b4282cc9,874c9317b8df..c059aa7df0a9 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@@ -283,8 -283,6 +283,6 @@@ #include <asm/ioctls.h> #include <net/busy_poll.h>
- #include <trace/events/tcp.h> - struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@@ -465,7 -463,7 +463,7 @@@ void tcp_init_transfer(struct sock *sk tcp_mtup_init(sk); icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); - tcp_call_bpf(sk, bpf_op); + tcp_call_bpf(sk, bpf_op, 0, NULL); tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); } @@@ -493,18 -491,16 +491,16 @@@ static void tcp_tx_timestamp(struct soc * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; + __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); int state;
- sock_rps_record_flow(sk); - sock_poll_wait(file, sk_sleep(sk), wait);
- state = sk_state_load(sk); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk);
@@@ -1106,12 -1102,15 +1102,15 @@@ static int linear_payload_sz(bool first return 0; }
- static int select_size(const struct sock *sk, bool sg, bool first_skb) + static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache;
if (sg) { + if (zc) + return 0; + if (sk_can_gso(sk)) { tmp = linear_payload_sz(first_skb); } else { @@@ -1188,7 -1187,7 +1187,7 @@@ int tcp_sendmsg_locked(struct sock *sk int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool process_backlog = false; - bool sg; + bool sg, zc = false; long timeo;
flags = msg->msg_flags; @@@ -1206,7 -1205,8 +1205,8 @@@ goto out_err; }
- if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; + if (!zc) uarg->zerocopy = 0; }
@@@ -1283,6 -1283,7 +1283,7 @@@ restart
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; + int linear;
new_segment: /* Allocate new segment. If the interface is SG, @@@ -1296,9 -1297,8 +1297,8 @@@ goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg, first_skb), - sk->sk_allocation, + linear = select_size(sk, sg, first_skb, zc); + skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; @@@ -1327,13 -1327,13 +1327,13 @@@ copy = msg_data_left(msg);
/* Where to copy to? */ - if (skb_availroom(skb) > 0) { + if (skb_availroom(skb) > 0 && !zc) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else if (!uarg || !uarg->zerocopy) { + } else if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@@ -1373,8 -1373,10 +1373,10 @@@ pfrag->offset += copy; } else { err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); - if (err == -EMSGSIZE || err == -EEXIST) + if (err == -EMSGSIZE || err == -EEXIST) { + tcp_mark_push(tp, skb); goto new_segment; + } if (err < 0) goto do_error; copy = err; @@@ -1731,8 -1733,8 +1733,8 @@@ static void tcp_update_recv_tstamps(str }
/* Similar to __sock_recv_timestamp, but does not require an skb */ - void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping *tss) + static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping *tss) { struct timeval tv; bool has_timestamping = false; @@@ -2040,7 -2042,29 +2042,29 @@@ void tcp_set_state(struct sock *sk, in { int oldstate = sk->sk_state;
- trace_tcp_set_state(sk, oldstate, state); + /* We defined a new enum for TCP states that are exported in BPF + * so as not force the internal TCP states to be frozen. The + * following checks will detect if an internal state value ever + * differs from the BPF value. If this ever happens, then we will + * need to remap the internal value to the BPF value before calling + * tcp_call_bpf_2arg. + */ + BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED); + BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT); + BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1); + BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2); + BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT); + BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE); + BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT); + BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK); + BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN); + BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING); + BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES); + + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG)) + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
switch (state) { case TCP_ESTABLISHED: @@@ -2065,7 -2089,7 +2089,7 @@@ /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk_state_store(sk, state); + inet_sk_state_store(sk, state);
#ifdef STATE_TRACE SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); @@@ -2434,6 -2458,12 +2458,12 @@@ int tcp_disconnect(struct sock *sk, in
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
+ if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + sk->sk_error_report(sk); return err; } @@@ -2923,7 -2953,7 +2953,7 @@@ void tcp_get_info(struct sock *sk, stru if (sk->sk_type != SOCK_STREAM) return;
- info->tcpi_state = sk_state_load(sk); + info->tcpi_state = inet_sk_state_load(sk);
/* Report meaningful fields for all TCP states, including listeners */ rate = READ_ONCE(sk->sk_pacing_rate); @@@ -3581,6 -3611,9 +3611,9 @@@ void __init tcp_init(void percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); inet_hashinfo_init(&tcp_hashinfo); + inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", + thash_entries, 21, /* one slot per 2 MB*/ + 0, 64 * 1024); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --combined net/ipv4/udp.c index ef45adfc0edb,3f018f34cf56..f81f969f9c06 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@@ -357,18 -357,12 +357,12 @@@ fail } EXPORT_SYMBOL(udp_lib_get_port);
- static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, - unsigned int port) - { - return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; - } - int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); + ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = - udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); + ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
/* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@@ -445,7 -439,7 +439,7 @@@ static struct sock *udp4_lib_lookup2(st struct sk_buff *skb) { struct sock *sk, *result; - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0;
result = NULL; @@@ -454,23 -448,16 +448,16 @@@ score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } badness = score; result = sk; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@@ -488,11 -475,11 +475,11 @@@ struct sock *__udp4_lib_lookup(struct n unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; bool exact_dif = udp_lib_exact_dif_match(net, skb); - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0;
if (hslot->count > 10) { - hash2 = udp4_portaddr_hash(net, daddr, hnum); + hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) @@@ -503,7 -490,7 +490,7 @@@ exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; - hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; /* avoid searching the same slot again. */ if (unlikely(slot2 == old_slot2)) @@@ -526,23 -513,16 +513,16 @@@ begin score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } result = sk; badness = score; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@@ -997,8 -977,21 +977,21 @@@ int udp_sendmsg(struct sock *sk, struc if (!saddr) saddr = inet->mc_addr; connected = 0; - } else if (!ipc.oif) + } else if (!ipc.oif) { ipc.oif = inet->uc_index; + } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + /* oif is set, packet is to local broadcast and + * and uc_index is set. oif is most likely set + * by sk_bound_dev_if. If uc_index != oif check if the + * oif is an L3 master and uc_index is an L3 slave. + * If so, we want to allow the send using the uc_index. + */ + if (ipc.oif != inet->uc_index && + ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), + inet->uc_index)) { + ipc.oif = inet->uc_index; + } + }
if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); @@@ -1775,7 -1768,7 +1768,7 @@@ EXPORT_SYMBOL(udp_lib_rehash)
static void udp_v4_rehash(struct sock *sk) { - u16 new_hash = udp4_portaddr_hash(sock_net(sk), + u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@@ -1966,9 -1959,9 +1959,9 @@@ static int __udp4_lib_mcast_deliver(str struct sk_buff *nskb;
if (use_hash2) { - hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & + hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & udptable->mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; + hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); @@@ -2200,7 -2193,7 +2193,7 @@@ static struct sock *__udp4_lib_demux_lo int dif, int sdif) { unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); + unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); @@@ -2502,16 -2495,14 +2495,14 @@@ int compat_udp_getsockopt(struct sock * * but then block when reading it. Add special case code * to work around these arguably broken applications. */ -unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask = datagram_poll(file, sock, wait); + __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) mask |= POLLIN | POLLRDNORM;
- sock_rps_record_flow(sk); - /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) @@@ -2736,7 -2727,6 +2727,6 @@@ int udp4_seq_show(struct seq_file *seq }
static const struct file_operations udp_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = udp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --combined net/netfilter/x_tables.c index d7070d18db20,0b56bf05c169..8fa4d37141a7 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@@ -39,6 -39,7 +39,6 @@@ MODULE_LICENSE("GPL") MODULE_AUTHOR("Harald Welte laforge@netfilter.org"); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define XT_PCPU_BLOCK_SIZE 4096
struct compat_delta { @@@ -209,9 -210,6 +209,9 @@@ xt_request_find_match(uint8_t nfproto, { struct xt_match *match;
+ if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + match = xt_find_match(nfproto, name, revision); if (IS_ERR(match)) { request_module("%st_%s", xt_prefix[nfproto], name); @@@ -254,9 -252,6 +254,9 @@@ struct xt_target *xt_request_find_targe { struct xt_target *target;
+ if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + target = xt_find_target(af, name, revision); if (IS_ERR(target)) { request_module("%st_%s", xt_prefix[af], name); @@@ -1005,7 -1000,7 +1005,7 @@@ struct xt_table_info *xt_alloc_table_in return NULL;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) + if ((size >> PAGE_SHIFT) + 2 > totalram_pages) return NULL;
info = kvmalloc(sz, GFP_KERNEL); @@@ -1032,7 -1027,7 +1032,7 @@@ void xt_free_table_info(struct xt_table } EXPORT_SYMBOL(xt_free_table_info);
- /* Find table by name, grabs mutex & ref. Returns NULL on error. */ + /* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { @@@ -1048,17 -1043,17 +1048,17 @@@
/* Table doesn't exist in this netns, re-try init */ list_for_each_entry(t, &init_net.xt.tables[af], list) { + int err; + if (strcmp(t->name, name)) continue; - if (!try_module_get(t->me)) { - mutex_unlock(&xt[af].mutex); - return NULL; - } - + if (!try_module_get(t->me)) + goto out; mutex_unlock(&xt[af].mutex); - if (t->table_init(net) != 0) { + err = t->table_init(net); + if (err < 0) { module_put(t->me); - return NULL; + return ERR_PTR(err); }
found = t; @@@ -1078,10 -1073,28 +1078,28 @@@ module_put(found->me); out: mutex_unlock(&xt[af].mutex); - return NULL; + return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(xt_find_table_lock);
+ struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, + const char *name) + { + struct xt_table *t = xt_find_table_lock(net, af, name); + + #ifdef CONFIG_MODULES + if (IS_ERR(t)) { + int err = request_module("%stable_%s", xt_prefix[af], name); + if (err < 0) + return ERR_PTR(err); + t = xt_find_table_lock(net, af, name); + } + #endif + + return t; + } + EXPORT_SYMBOL_GPL(xt_request_find_table_lock); + void xt_table_unlock(struct xt_table *table) { mutex_unlock(&xt[table->af].mutex); @@@ -1349,7 -1362,6 +1367,6 @@@ static int xt_table_open(struct inode * }
static const struct file_operations xt_table_ops = { - .owner = THIS_MODULE, .open = xt_table_open, .read = seq_read, .llseek = seq_lseek, @@@ -1402,7 -1414,7 +1419,7 @@@ static void *xt_mttg_seq_next(struct se trav->curr = trav->curr->next; if (trav->curr != trav->head) break; - /* fallthru, _stop will unlock */ + /* fall through */ default: return NULL; } @@@ -1485,7 -1497,6 +1502,6 @@@ static int xt_match_open(struct inode * }
static const struct file_operations xt_match_ops = { - .owner = THIS_MODULE, .open = xt_match_open, .read = seq_read, .llseek = seq_lseek, @@@ -1538,7 -1549,6 +1554,6 @@@ static int xt_target_open(struct inode }
static const struct file_operations xt_target_ops = { - .owner = THIS_MODULE, .open = xt_target_open, .read = seq_read, .llseek = seq_lseek, diff --combined net/packet/af_packet.c index 3b4d6a3cf190,05d31864a34e..1d1483007e46 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -247,12 -247,13 +247,13 @@@ static int packet_direct_xmit(struct sk struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; + bool again = false;
if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop;
- skb = validate_xmit_skb_list(skb, dev); + skb = validate_xmit_skb_list(skb, dev, &again); if (skb != orig_skb) goto drop;
@@@ -4073,12 -4074,12 +4074,12 @@@ static int packet_ioctl(struct socket * return 0; }
-static unsigned int packet_poll(struct file *file, struct socket *sock, +static __poll_t packet_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - unsigned int mask = datagram_poll(file, sock, wait); + __poll_t mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { @@@ -4530,7 -4531,6 +4531,6 @@@ static int packet_seq_open(struct inod }
static const struct file_operations packet_seq_fops = { - .owner = THIS_MODULE, .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, diff --combined net/phonet/socket.c index 44417480dab7,fa2f13a8938f..08f6751d2030 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@@ -341,12 -341,12 +341,12 @@@ static int pn_socket_getname(struct soc return 0; }
-static unsigned int pn_socket_poll(struct file *file, struct socket *sock, +static __poll_t pn_socket_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct pep_sock *pn = pep_sk(sk); - unsigned int mask = 0; + __poll_t mask = 0;
poll_wait(file, sk_sleep(sk), wait);
@@@ -635,7 -635,6 +635,6 @@@ static int pn_sock_open(struct inode *i }
const struct file_operations pn_sock_seq_fops = { - .owner = THIS_MODULE, .open = pn_sock_open, .read = seq_read, .llseek = seq_lseek, @@@ -818,7 -817,6 +817,6 @@@ static int pn_res_open(struct inode *in }
const struct file_operations pn_res_seq_fops = { - .owner = THIS_MODULE, .open = pn_res_open, .read = seq_read, .llseek = seq_lseek, diff --combined net/sctp/socket.c index 737e551fbf67,a40fa53c93ef..356e387f82e7 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -201,6 -201,22 +201,22 @@@ static void sctp_for_each_tx_datachunk( cb(chunk); }
+ static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk, + void (*cb)(struct sk_buff *, struct sock *)) + + { + struct sk_buff *skb, *tmp; + + sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp) + cb(skb, sk); + + sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp) + cb(skb, sk); + + sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp) + cb(skb, sk); + } + /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) @@@ -968,6 -984,13 +984,6 @@@ int sctp_asconf_mgmt(struct sctp_sock * * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() * from userspace. * - * We don't use copy_from_user() for optimization: we first do the - * sanity checks (buffer size -fast- and access check-healthy - * pointer); if all of those succeed, then we can alloc the memory - * (expensive operation) needed to copy the data to kernel. Then we do - * the copying without checking the user space area - * (__copy_from_user()). - * * On exit there is no need to do sockfd_put(), sys_setsockopt() does * it. * @@@ -997,15 -1020,25 +1013,15 @@@ static int sctp_setsockopt_bindx(struc if (unlikely(addrs_size <= 0)) return -EINVAL;
- /* Check the user passed a healthy pointer. */ - if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) - return -EFAULT; - - /* Alloc space for the address array in kernel memory. */ - kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN); - if (unlikely(!kaddrs)) - return -ENOMEM; - - if (__copy_from_user(kaddrs, addrs, addrs_size)) { - kfree(kaddrs); - return -EFAULT; - } + kaddrs = vmemdup_user(addrs, addrs_size); + if (unlikely(IS_ERR(kaddrs))) + return PTR_ERR(kaddrs);
/* Walk through the addrs buffer and count the number of addresses. */ addr_buf = kaddrs; while (walk_size < addrs_size) { if (walk_size + sizeof(sa_family_t) > addrs_size) { - kfree(kaddrs); + kvfree(kaddrs); return -EINVAL; }
@@@ -1016,7 -1049,7 +1032,7 @@@ * causes the address buffer to overflow return EINVAL. */ if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - kfree(kaddrs); + kvfree(kaddrs); return -EINVAL; } addrcnt++; @@@ -1046,7 -1079,7 +1062,7 @@@ }
out: - kfree(kaddrs); + kvfree(kaddrs);
return err; } @@@ -1304,6 -1337,13 +1320,6 @@@ out_free * land and invoking either sctp_connectx(). This is used for tunneling * the sctp_connectx() request through sctp_setsockopt() from userspace. * - * We don't use copy_from_user() for optimization: we first do the - * sanity checks (buffer size -fast- and access check-healthy - * pointer); if all of those succeed, then we can alloc the memory - * (expensive operation) needed to copy the data to kernel. Then we do - * the copying without checking the user space area - * (__copy_from_user()). - * * On exit there is no need to do sockfd_put(), sys_setsockopt() does * it. * @@@ -1319,6 -1359,7 +1335,6 @@@ static int __sctp_setsockopt_connectx(s sctp_assoc_t *assoc_id) { struct sockaddr *kaddrs; - gfp_t gfp = GFP_KERNEL; int err = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", @@@ -1327,12 -1368,24 +1343,12 @@@ if (unlikely(addrs_size <= 0)) return -EINVAL;
- /* Check the user passed a healthy pointer. */ - if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) - return -EFAULT; - - /* Alloc space for the address array in kernel memory. */ - if (sk->sk_socket->file) - gfp = GFP_USER | __GFP_NOWARN; - kaddrs = kmalloc(addrs_size, gfp); - if (unlikely(!kaddrs)) - return -ENOMEM; - - if (__copy_from_user(kaddrs, addrs, addrs_size)) { - err = -EFAULT; - } else { - err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); - } + kaddrs = vmemdup_user(addrs, addrs_size); + if (unlikely(IS_ERR(kaddrs))) + return PTR_ERR(kaddrs);
- kfree(kaddrs); + err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); + kvfree(kaddrs);
return err; } @@@ -1489,7 -1542,7 +1505,7 @@@ static void sctp_close(struct sock *sk
lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; - sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING);
ep = sctp_sk(sk)->ep;
@@@ -1515,6 -1568,7 +1531,7 @@@
if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || !skb_queue_empty(&asoc->ulpq.reasm) || + !skb_queue_empty(&asoc->ulpq.reasm_uo) || (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { struct sctp_chunk *chunk;
@@@ -1969,7 -2023,20 +1986,20 @@@ static int sctp_sendmsg(struct sock *sk if (err < 0) goto out_free;
- wait_connect = true; + /* If stream interleave is enabled, wait_connect has to be + * done earlier than data enqueue, as it needs to make data + * or idata according to asoc->intl_enable which is set + * after connection is done. + */ + if (sctp_sk(asoc->base.sk)->strm_interleave) { + timeo = sock_sndtimeo(sk, 0); + err = sctp_wait_for_connect(asoc, &timeo); + if (err) + goto out_unlock; + } else { + wait_connect = true; + } + pr_debug("%s: we associated primitively\n", __func__); }
@@@ -2248,7 -2315,7 +2278,7 @@@ static int sctp_setsockopt_events(struc if (!event) return -ENOMEM;
- sctp_ulpq_tail_event(&asoc->ulpq, event); + asoc->stream.si->enqueue_event(&asoc->ulpq, event); } }
@@@ -3147,7 -3214,7 +3177,7 @@@ static int sctp_setsockopt_maxseg(struc if (val == 0) { val = asoc->pathmtu - sp->pf->af->net_header_len; val -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); + sctp_datachk_len(&asoc->stream); } asoc->user_frag = val; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); @@@ -3317,7 -3384,10 +3347,10 @@@ static int sctp_setsockopt_fragment_int if (get_user(val, (int __user *)optval)) return -EFAULT;
- sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1; + sctp_sk(sk)->frag_interleave = !!val; + + if (!sctp_sk(sk)->frag_interleave) + sctp_sk(sk)->strm_interleave = 0;
return 0; } @@@ -4000,6 -4070,40 +4033,40 @@@ out return retval; }
+ static int sctp_setsockopt_interleaving_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) + { + struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); + struct sctp_assoc_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_id) + goto out; + + if (!net->sctp.intl_enable || !sp->frag_interleave) { + retval = -EPERM; + goto out; + } + + sp->strm_interleave = !!params.assoc_value; + + retval = 0; + + out: + return retval; + } + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@@ -4187,6 -4291,10 +4254,10 @@@ static int sctp_setsockopt(struct sock case SCTP_STREAM_SCHEDULER_VALUE: retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); break; + case SCTP_INTERLEAVING_SUPPORTED: + retval = sctp_setsockopt_interleaving_supported(sk, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -4563,7 -4671,7 +4634,7 @@@ static void sctp_shutdown(struct sock * if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { struct sctp_association *asoc;
- sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING); asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); sctp_primitive_SHUTDOWN(net, asoc, NULL); @@@ -4657,20 -4765,11 +4728,11 @@@ int sctp_get_sctp_info(struct sock *sk EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
/* use callback to avoid exporting the core structure */ - int sctp_transport_walk_start(struct rhashtable_iter *iter) + void sctp_transport_walk_start(struct rhashtable_iter *iter) { - int err; - rhltable_walk_enter(&sctp_transport_hashtable, iter);
- err = rhashtable_walk_start(iter); - if (err && err != -EAGAIN) { - rhashtable_walk_stop(iter); - rhashtable_walk_exit(iter); - return err; - } - - return 0; + rhashtable_walk_start(iter); }
void sctp_transport_walk_stop(struct rhashtable_iter *iter) @@@ -4764,9 -4863,8 +4826,8 @@@ int sctp_for_each_transport(int (*cb)(s int ret;
again: - ret = sctp_transport_walk_start(&hti); - if (ret) - return ret; + ret = 0; + sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { @@@ -6965,6 -7063,47 +7026,47 @@@ out return retval; }
+ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) + { + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->intl_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->strm_interleave; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + + out: + return retval; + } + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@@ -7155,6 -7294,10 +7257,10 @@@ retval = sctp_getsockopt_scheduler_value(sk, len, optval, optlen); break; + case SCTP_INTERLEAVING_SUPPORTED: + retval = sctp_getsockopt_interleaving_supported(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -7389,13 -7532,13 +7495,13 @@@ static int sctp_listen_start(struct soc * sockets. * */ - sk->sk_state = SCTP_SS_LISTENING; + inet_sk_set_state(sk, SCTP_SS_LISTENING); if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; } else { if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { - sk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(sk, SCTP_SS_CLOSED); return -EADDRINUSE; } } @@@ -7481,11 -7624,11 +7587,11 @@@ out * here, again, by modeling the current TCP/UDP code. We don't have * a good way to test with it yet. */ -unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct sctp_sock *sp = sctp_sk(sk); - unsigned int mask; + __poll_t mask;
poll_wait(file, sk_sleep(sk), wait);
@@@ -8388,11 -8531,7 +8494,7 @@@ static void sctp_sock_migrate(struct so
}
- sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) - sctp_skb_set_owner_r_frag(skb, newsk); - - sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) - sctp_skb_set_owner_r_frag(skb, newsk); + sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
/* Set the type of socket to indicate that it is peeled off from the * original UDP-style socket or created with the accept() call on a @@@ -8418,10 -8557,10 +8520,10 @@@ * is called, set RCV_SHUTDOWN flag. */ if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { - newsk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(newsk, SCTP_SS_CLOSED); newsk->sk_shutdown |= RCV_SHUTDOWN; } else { - newsk->sk_state = SCTP_SS_ESTABLISHED; + inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED); }
release_sock(newsk); diff --combined net/smc/af_smc.c index 449f62e1e270,267e68379110..3583c8ab1bae --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@@ -115,7 -115,6 +115,6 @@@ static int smc_release(struct socket *s goto out;
smc = smc_sk(sk); - sock_hold(sk); if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires * sock lock for child sockets again @@@ -124,10 -123,7 +123,7 @@@ else lock_sock(sk);
- if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); - } else { + if (!smc->use_fallback) { rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@@ -136,20 -132,21 +132,21 @@@ sock_release(smc->clcsock); smc->clcsock = NULL; } + if (smc->use_fallback) { + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); + }
/* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { + if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } release_sock(sk);
- sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ out: return rc; } @@@ -181,7 -178,6 +178,6 @@@ static struct sock *smc_sock_alloc(stru INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); - INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk);
@@@ -377,6 -373,15 +373,15 @@@ static void smc_link_save_peer_info(str link->peer_mtu = clc->qp_mtu; }
+ static void smc_lgr_forget(struct smc_link_group *lgr) + { + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); + } + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { @@@ -390,6 -395,8 +395,8 @@@ int rc = 0; u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */ + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { /* peer has not signalled SMC-capability */ smc->use_fallback = true; @@@ -513,6 -520,8 +520,8 @@@ out_connected return rc ? rc : local_contact;
decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); decline_rdma: @@@ -520,15 -529,19 +529,19 @@@ smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (rc < 0) goto out_err; } goto out_connected;
out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ return rc; }
@@@ -581,40 -594,33 +594,33 @@@ out_err
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) { - struct sock *sk = &lsmc->sk; - struct socket *new_clcsock; + struct socket *new_clcsock = NULL; + struct sock *lsk = &lsmc->sk; struct sock *new_sk; int rc;
- release_sock(&lsmc->sk); - new_sk = smc_sock_alloc(sock_net(sk), NULL); + release_sock(lsk); + new_sk = smc_sock_alloc(sock_net(lsk), NULL); if (!new_sk) { rc = -ENOMEM; - lsmc->sk.sk_err = ENOMEM; + lsk->sk_err = ENOMEM; *new_smc = NULL; - lock_sock(&lsmc->sk); + lock_sock(lsk); goto out; } *new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); - lock_sock(&lsmc->sk); - if (rc < 0) { - lsmc->sk.sk_err = -rc; - new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); - *new_smc = NULL; - goto out; - } - if (lsmc->sk.sk_state == SMC_CLOSED) { + lock_sock(lsk); + if (rc < 0) + lsk->sk_err = -rc; + if (rc < 0 || lsk->sk_state == SMC_CLOSED) { if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + new_sk->sk_prot->unhash(new_sk); + sock_put(new_sk); /* final */ *new_smc = NULL; goto out; } @@@ -631,7 -637,7 +637,7 @@@ static void smc_accept_enqueue(struct s { struct smc_sock *par = smc_sk(parent);
- sock_hold(sk); + sock_hold(sk); /* sock_put in smc_accept_unlink () */ spin_lock(&par->accept_q_lock); list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); spin_unlock(&par->accept_q_lock); @@@ -647,7 -653,7 +653,7 @@@ static void smc_accept_unlink(struct so list_del_init(&smc_sk(sk)->accept_q); spin_unlock(&par->accept_q_lock); sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); - sock_put(sk); + sock_put(sk); /* sock_hold in smc_accept_enqueue */ }
/* remove a sock from the accept queue to bind it to a new socket created @@@ -664,8 -670,12 +670,12 @@@ struct sock *smc_accept_dequeue(struct
smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + if (isk->clcsock) { + sock_release(isk->clcsock); + isk->clcsock = NULL; + } new_sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + sock_put(new_sk); /* final */ continue; } if (new_sock) @@@ -680,14 -690,11 +690,11 @@@ void smc_close_non_accepted(struct soc { struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk); lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - } else { + if (!smc->use_fallback) { smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@@ -700,14 -707,15 +707,15 @@@ sock_release(tcp); } if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + } else { + if (sk->sk_state == SMC_CLOSED) + smc_conn_free(&smc->conn); } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ }
static int smc_serv_conf_first_link(struct smc_sock *smc) @@@ -751,14 -759,16 +759,16 @@@ static void smc_listen_work(struct work { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; struct smc_clc_msg_accept_confirm cclc; int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; struct sockaddr_in peeraddr; + u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; int rc = 0, len; @@@ -775,7 -785,7 +785,7 @@@ /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), + reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), SMC_CLC_PROPOSAL); if (reason_code < 0) goto out_err; @@@ -804,8 -814,11 +814,11 @@@ reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - if ((pclc.outgoing_subnet != subnet) || - (pclc.prefix_len != prefix_len)) { + + pclc = (struct smc_clc_msg_proposal *)&buf; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (pclc_prfx->outgoing_subnet != subnet || + pclc_prfx->prefix_len != prefix_len) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@@ -816,7 -829,7 +829,7 @@@ /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc.lcl, 0); + smcibdev, ibport, &pclc->lcl, 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@@ -879,11 -892,9 +892,9 @@@ } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) { + if (reason_code < 0) /* peer is not aware of a problem */ - rc = reason_code; goto out_err_unlock; - } if (reason_code > 0) goto decline_rdma_unlock; } @@@ -910,21 -921,26 +921,26 @@@ enqueue return;
decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (smc_clc_send_decline(new_smc, reason_code) < 0) goto out_err; } goto out_connected;
out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: + if (newsmcsk->sk_state == SMC_INIT) + sock_put(&new_smc->sk); /* passive closing */ newsmcsk->sk_state = SMC_CLOSED; smc_conn_free(&new_smc->conn); goto enqueue; /* queue new sock with sk_err set */ @@@ -934,11 -950,12 +950,12 @@@ static void smc_tcp_listen_work(struct { struct smc_sock *lsmc = container_of(work, struct smc_sock, tcp_listen_work); + struct sock *lsk = &lsmc->sk; struct smc_sock *new_smc; int rc = 0;
- lock_sock(&lsmc->sk); - while (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock(lsk); + while (lsk->sk_state == SMC_LISTEN) { rc = smc_clcsock_accept(lsmc, &new_smc); if (rc) goto out; @@@ -947,15 -964,25 +964,25 @@@
new_smc->listen_smc = lsmc; new_smc->use_fallback = false; /* assume rdma capability first*/ - sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ + sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); - schedule_work(&new_smc->smc_listen_work); + sock_hold(&new_smc->sk); /* sock_put in passive closing */ + if (!schedule_work(&new_smc->smc_listen_work)) + sock_put(&new_smc->sk); }
out: - release_sock(&lsmc->sk); - lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ + if (lsmc->clcsock) { + sock_release(lsmc->clcsock); + lsmc->clcsock = NULL; + } + release_sock(lsk); + /* no more listening, wake up smc_close_wait_listen_clcsock and + * accept + */ + lsk->sk_state_change(lsk); + sock_put(&lsmc->sk); /* sock_hold in smc_listen */ }
static int smc_listen(struct socket *sock, int backlog) @@@ -989,7 -1016,9 +1016,9 @@@ sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); - schedule_work(&smc->tcp_listen_work); + sock_hold(sk); /* sock_hold in tcp_listen_worker */ + if (!schedule_work(&smc->tcp_listen_work)) + sock_put(sk);
out: release_sock(sk); @@@ -1006,6 -1035,7 +1035,7 @@@ static int smc_accept(struct socket *so int rc = 0;
lsmc = smc_sk(sk); + sock_hold(sk); /* sock_put below */ lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) { @@@ -1040,6 -1070,7 +1070,7 @@@
out: release_sock(sk); + sock_put(sk); /* sock_hold above */ return rc; }
@@@ -1107,36 -1138,36 +1138,36 @@@ out return rc; }
-static unsigned int smc_accept_poll(struct sock *parent) +static __poll_t smc_accept_poll(struct sock *parent) { - struct smc_sock *isk; - struct sock *sk; - - lock_sock(parent); - list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)isk; + struct smc_sock *isk = smc_sk(parent); + int mask = 0;
- if (sk->sk_state == SMC_ACTIVE) { - release_sock(parent); - return POLLIN | POLLRDNORM; - } - } - release_sock(parent); + spin_lock(&isk->accept_q_lock); + if (!list_empty(&isk->accept_q)) + mask = POLLIN | POLLRDNORM; + spin_unlock(&isk->accept_q_lock);
- return 0; + return mask; }
-static unsigned int smc_poll(struct file *file, struct socket *sock, +static __poll_t smc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - unsigned int mask = 0; + __poll_t mask = 0; struct smc_sock *smc; int rc;
+ if (!sk) + return POLLNVAL; + smc = smc_sk(sock->sk); + sock_hold(sk); + lock_sock(sk); if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ + release_sock(sk); mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); /* if non-blocking connect finished ... */ lock_sock(sk); @@@ -1148,37 -1179,43 +1179,43 @@@ rc = smc_connect_rdma(smc); if (rc < 0) mask |= POLLERR; - else - /* success cases including fallback */ - mask |= POLLOUT | POLLWRNORM; + /* success cases including fallback */ + mask |= POLLOUT | POLLWRNORM; } } - release_sock(sk); } else { - sock_poll_wait(file, sk_sleep(sk), wait); - if (sk->sk_state == SMC_LISTEN) - /* woken up by sk_data_ready in smc_listen_work() */ - mask |= smc_accept_poll(sk); + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= POLLERR; - if (atomic_read(&smc->conn.sndbuf_space) || - (sk->sk_shutdown & SEND_SHUTDOWN)) { - mask |= POLLOUT | POLLWRNORM; - } else { - sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - } - if (atomic_read(&smc->conn.bytes_to_rcv)) - mask |= POLLIN | POLLRDNORM; if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) mask |= POLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; - if (sk->sk_state == SMC_APPCLOSEWAIT1) - mask |= POLLIN; + if (sk->sk_state == SMC_LISTEN) { + /* woken up by sk_data_ready in smc_listen_work() */ + mask = smc_accept_poll(sk); + } else { + if (atomic_read(&smc->conn.sndbuf_space) || + sk->sk_shutdown & SEND_SHUTDOWN) { + mask |= POLLOUT | POLLWRNORM; + } else { + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + } + if (atomic_read(&smc->conn.bytes_to_rcv)) + mask |= POLLIN | POLLRDNORM; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLIN | POLLRDNORM | POLLRDHUP; + if (sk->sk_state == SMC_APPCLOSEWAIT1) + mask |= POLLIN; + }
} + release_sock(sk); + sock_put(sk);
return mask; } diff --combined net/smc/smc_clc.c index 511548085d16,abf7ceb6690b..8ac51583a063 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@@ -22,6 -22,54 +22,54 @@@ #include "smc_clc.h" #include "smc_ib.h"
+ /* check if received message has a correct header length and contains valid + * heading and trailing eyecatchers + */ + static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) + { + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_proposal *pclc; + struct smc_clc_msg_decline *dclc; + struct smc_clc_msg_trail *trl; + + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + switch (clcm->type) { + case SMC_CLC_PROPOSAL: + pclc = (struct smc_clc_msg_proposal *)clcm; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (ntohs(pclc->hdr.length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(*trl)) + return false; + trl = (struct smc_clc_msg_trail *) + ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); + break; + case SMC_CLC_ACCEPT: + case SMC_CLC_CONFIRM: + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (ntohs(clc->hdr.length) != sizeof(*clc)) + return false; + trl = &clc->trl; + break; + case SMC_CLC_DECLINE: + dclc = (struct smc_clc_msg_decline *)clcm; + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + return false; + trl = &dclc->trl; + break; + default: + return false; + } + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + return true; + } + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@@ -35,7 -83,7 +83,7 @@@ int smc_clc_wait_msg(struct smc_sock *s struct smc_clc_msg_hdr *clcm = buf; struct msghdr msg = {NULL, 0}; int reason_code = 0; - struct kvec vec; + struct kvec vec = {buf, buflen}; int len, datlen; int krflags;
@@@ -43,15 -91,12 +91,15 @@@ * so we don't consume any subsequent CLC message or payload data * in the TCP byte stream */ - vec.iov_base = buf; - vec.iov_len = buflen; + /* + * Caller must make sure that buflen is no less than + * sizeof(struct smc_clc_msg_hdr) + */ krflags = MSG_PEEK | MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; - len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, - sizeof(struct smc_clc_msg_hdr), krflags); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, + sizeof(struct smc_clc_msg_hdr)); + len = sock_recvmsg(smc->clcsock, &msg, krflags); if (signal_pending(current)) { reason_code = -EINTR; clc_sk->sk_err = EINTR; @@@ -75,9 -120,7 +123,7 @@@ } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen < sizeof(struct smc_clc_msg_decline)) || - (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || - memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || + (datlen > buflen) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@@ -86,12 -129,13 +132,12 @@@ }
/* receive the complete CLC message */ - vec.iov_base = buf; - vec.iov_len = buflen; memset(&msg, 0, sizeof(struct msghdr)); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen); krflags = MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; - len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); + len = sock_recvmsg(smc->clcsock, &msg, krflags); - if (len < datlen) { + if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; @@@ -135,7 -179,7 +181,7 @@@ int smc_clc_send_decline(struct smc_soc smc->sk.sk_err = EPROTO; if (len < 0) smc->sk.sk_err = -len; - return len; + return sock_error(&smc->sk); }
/* send CLC PROPOSAL message across internal TCP socket */ @@@ -143,33 -187,43 +189,43 @@@ int smc_clc_send_proposal(struct smc_so struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_trail trl; int reason_code = 0; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len, rc; + int len, plen, rc;
/* send SMC Proposal CLC message */ + plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(sizeof(pclc)); + pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0);
+ memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, - &pclc.prefix_len); + rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ - memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + pclc_prfx.ipv6_prefixes_cnt = 0; + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec.iov_base = &pclc; - vec.iov_len = sizeof(pclc); + vec[0].iov_base = &pclc; + vec[0].iov_len = sizeof(pclc); + vec[1].iov_base = &pclc_prfx; + vec[1].iov_len = sizeof(pclc_prfx); + vec[2].iov_base = &trl; + vec[2].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); + len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --combined net/socket.c index 2f378449bc1b,11cc2cd0f37b..a93c99b518ca --- a/net/socket.c +++ b/net/socket.c @@@ -118,7 -118,7 +118,7 @@@ static ssize_t sock_write_iter(struct k static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file); -static unsigned int sock_poll(struct file *file, +static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait); static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT @@@ -163,12 -163,6 +163,6 @@@ static DEFINE_SPINLOCK(net_family_lock) static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/* - * Statistics counters of the socket lists - */ - - static DEFINE_PER_CPU(int, sockets_in_use); - - /* * Support routines. * Move socket addresses back and forth across the kernel/user * divide and look after the messy bits. @@@ -580,7 -574,6 +574,6 @@@ struct socket *sock_alloc(void inode->i_gid = current_fsgid(); inode->i_op = &sockfs_inode_ops;
- this_cpu_add(sockets_in_use, 1); return sock; } EXPORT_SYMBOL(sock_alloc); @@@ -607,7 -600,6 +600,6 @@@ void sock_release(struct socket *sock if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__);
- this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; @@@ -969,9 -961,28 +961,28 @@@ static long sock_do_ioctl(struct net *n * If this ioctl is unknown try to hand it down * to the NIC driver. */ - if (err == -ENOIOCTLCMD) - err = dev_ioctl(net, cmd, argp); + if (err != -ENOIOCTLCMD) + return err;
+ if (cmd == SIOCGIFCONF) { + struct ifconf ifc; + if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) + return -EFAULT; + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); + rtnl_unlock(); + if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) + err = -EFAULT; + } else { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } return err; }
@@@ -996,12 -1007,19 +1007,19 @@@ static long sock_ioctl(struct file *fil sock = file->private_data; sk = sock->sk; net = sock_net(sk); - if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(net, cmd, argp); + if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; } else #ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(net, cmd, argp); + err = wext_handle_ioctl(net, cmd, argp); } else #endif switch (cmd) { @@@ -1097,9 -1115,9 +1115,9 @@@ out_release EXPORT_SYMBOL(sock_create_lite);
/* No kernel lock held - perfect */ -static unsigned int sock_poll(struct file *file, poll_table *wait) +static __poll_t sock_poll(struct file *file, poll_table *wait) { - unsigned int busy_flag = 0; + __poll_t busy_flag = 0; struct socket *sock;
/* @@@ -2621,29 -2639,11 +2639,11 @@@ out_fs
core_initcall(sock_init); /* early initcall */
- static int __init jit_init(void) - { - #ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; - #endif - return 0; - } - pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { - int cpu; - int counter = 0; - - for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); - - /* It can be negative, by the way. 8) */ - if (counter < 0) - counter = 0; - - seq_printf(seq, "sockets: used %d\n", counter); + seq_printf(seq, "sockets: used %d\n", + sock_inuse_get(seq->private)); } #endif /* CONFIG_PROC_FS */
@@@ -2680,89 -2680,25 +2680,25 @@@ static int do_siocgstampns(struct net * return err; }
- static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) - { - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(struct ifreq)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - err = dev_ioctl(net, SIOCGIFNAME, uifr); - if (err) - return err; - - if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) - return -EFAULT; - - return 0; - } - - static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) + static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) { struct compat_ifconf ifc32; struct ifconf ifc; - struct ifconf __user *uifc; - struct compat_ifreq __user *ifr32; - struct ifreq __user *ifr; - unsigned int i, j; int err;
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT;
- memset(&ifc, 0, sizeof(ifc)); - if (ifc32.ifcbuf == 0) { - ifc32.ifc_len = 0; - ifc.ifc_len = 0; - ifc.ifc_req = NULL; - uifc = compat_alloc_user_space(sizeof(struct ifconf)); - } else { - size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * - sizeof(struct ifreq); - uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); - ifc.ifc_len = len; - ifr = ifc.ifc_req = (void __user *)(uifc + 1); - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { - if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr++; - ifr32++; - } - } - if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) - return -EFAULT; + ifc.ifc_len = ifc32.ifc_len; + ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
- err = dev_ioctl(net, SIOCGIFCONF, uifc); + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); + rtnl_unlock(); if (err) return err;
- if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) - return -EFAULT; - - ifr = ifc.ifc_req; - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0, j = 0; - i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr32++; - ifr++; - } - - if (ifc32.ifcbuf == 0) { - /* Translate from 64-bit structure multiple to - * a 32-bit one. - */ - i = ifc.ifc_len; - i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); - ifc32.ifc_len = i; - } else { - ifc32.ifc_len = i; - } + ifc32.ifc_len = ifc.ifc_len; if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) return -EFAULT;
@@@ -2773,9 -2709,9 +2709,9 @@@ static int ethtool_ioctl(struct net *ne { struct compat_ethtool_rxnfc __user *compat_rxnfc; bool convert_in = false, convert_out = false; - size_t buf_size = ALIGN(sizeof(struct ifreq), 8); - struct ethtool_rxnfc __user *rxnfc; - struct ifreq __user *ifr; + size_t buf_size = 0; + struct ethtool_rxnfc __user *rxnfc = NULL; + struct ifreq ifr; u32 rule_cnt = 0, actual_rule_cnt; u32 ethcmd; u32 data; @@@ -2812,18 -2748,14 +2748,14 @@@ case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; + rxnfc = compat_alloc_user_space(buf_size); break; }
- ifr = compat_alloc_user_space(buf_size); - rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); - - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT;
- if (put_user(convert_in ? rxnfc : compat_ptr(data), - &ifr->ifr_ifru.ifru_data)) - return -EFAULT; + ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
if (convert_in) { /* We expect there to be holes between fs.m_ext and @@@ -2851,7 -2783,7 +2783,7 @@@ return -EFAULT; }
- ret = dev_ioctl(net, SIOCETHTOOL, ifr); + ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); if (ret) return ret;
@@@ -2892,113 -2824,43 +2824,43 @@@
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { - void __user *uptr; compat_uptr_t uptr32; - struct ifreq __user *uifr; + struct ifreq ifr; + void __user *saved; + int err;
- uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) return -EFAULT;
- uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) - return -EFAULT; - - return dev_ioctl(net, SIOCWANDEV, uifr); - } - - static int bond_ioctl(struct net *net, unsigned int cmd, - struct compat_ifreq __user *ifr32) - { - struct ifreq kifr; - mm_segment_t old_fs; - int err; + saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; + ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
- switch (cmd) { - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, - (struct ifreq __user __force *) &kifr); - set_fs(old_fs); - - return err; - default: - return -ENOIOCTLCMD; + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + if (!err) { + ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; + if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + err = -EFAULT; } + return err; }
/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { - struct ifreq __user *u_ifreq64; - char tmp_buf[IFNAMSIZ]; - void __user *data64; + struct ifreq ifreq; u32 data32;
- if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), - IFNAMSIZ)) + if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_data)) return -EFAULT; - data64 = compat_ptr(data32); + ifreq.ifr_data = compat_ptr(data32);
- u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - - if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], - IFNAMSIZ)) - return -EFAULT; - if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, u_ifreq64); - } - - static int dev_ifsioc(struct net *net, struct socket *sock, - unsigned int cmd, struct compat_ifreq __user *uifr32) - { - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; + return dev_ioctl(net, cmd, &ifreq, NULL); }
static int compat_sioc_ifmap(struct net *net, unsigned int cmd, @@@ -3006,7 -2868,6 +2868,6 @@@ { struct ifreq ifr; struct compat_ifmap __user *uifmap32; - mm_segment_t old_fs; int err;
uifmap32 = &uifr32->ifr_ifru.ifru_map; @@@ -3020,10 -2881,7 +2881,7 @@@ if (err) return -EFAULT;
- old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user __force *)&ifr); - set_fs(old_fs); + err = dev_ioctl(net, cmd, &ifr, NULL);
if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@@ -3156,10 -3014,8 +3014,8 @@@ static int compat_sock_ioctl_trans(stru case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFNAME: - return dev_ifname32(net, argp); case SIOCGIFCONF: - return dev_ifconf(net, argp); + return compat_dev_ifconf(net, argp); case SIOCETHTOOL: return ethtool_ioctl(net, argp); case SIOCWANDEV: @@@ -3167,11 -3023,6 +3023,6 @@@ case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - return bond_ioctl(net, cmd, argp); case SIOCADDRT: case SIOCDELRT: return routing_ioctl(net, sock, cmd, argp); @@@ -3231,12 -3082,15 +3082,15 @@@ case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return dev_ifsioc(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: case SIOCATMARK: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCGIFNAME: return sock_do_ioctl(net, sock, cmd, arg); }
@@@ -3391,19 -3245,6 +3245,6 @@@ int kernel_sendpage_locked(struct sock } EXPORT_SYMBOL(kernel_sendpage_locked);
- int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) - { - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, cmd, arg); - set_fs(oldfs); - - return err; - } - EXPORT_SYMBOL(kernel_sock_ioctl); - int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); diff --combined net/tipc/server.c index 78a292a84afc,c0d331f13eee..df0c563c90cd --- a/net/tipc/server.c +++ b/net/tipc/server.c @@@ -132,10 -132,11 +132,11 @@@ static struct tipc_conn *tipc_conn_look
spin_lock_bh(&s->idr_lock); con = idr_find(&s->conn_idr, conid); - if (con && test_bit(CF_CONNECTED, &con->flags)) - conn_get(con); - else - con = NULL; + if (con) { + if (!test_bit(CF_CONNECTED, &con->flags) || + !kref_get_unless_zero(&con->kref)) + con = NULL; + } spin_unlock_bh(&s->idr_lock); return con; } @@@ -183,35 -184,28 +184,28 @@@ static void tipc_register_callbacks(str write_unlock_bh(&sk->sk_callback_lock); }
- static void tipc_unregister_callbacks(struct tipc_conn *con) - { - struct sock *sk = con->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_user_data = NULL; - write_unlock_bh(&sk->sk_callback_lock); - } - static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; + struct sock *sk = con->sock->sk; + bool disconnect = false;
- if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->sock) - tipc_unregister_callbacks(con); - + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + if (disconnect) { + sk->sk_user_data = NULL; if (con->conid) s->tipc_conn_release(con->conid, con->usr_data); - - /* We shouldn't flush pending works as we may be in the - * thread. In fact the races with pending rx/tx work structs - * are harmless for us here as we have already deleted this - * connection from server connection list. - */ - if (con->sock) - kernel_sock_shutdown(con->sock, SHUT_RDWR); - conn_put(con); } + write_unlock_bh(&sk->sk_callback_lock); + + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; + + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + conn_put(con); }
static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) @@@ -248,9 -242,10 +242,10 @@@
static int tipc_receive_from_sock(struct tipc_conn *con) { - struct msghdr msg = {}; struct tipc_server *s = con->server; + struct sock *sk = con->sock->sk; struct sockaddr_tipc addr; + struct msghdr msg = {}; struct kvec iov; void *buf; int ret; @@@ -264,19 -259,22 +259,22 @@@ iov.iov_base = buf; iov.iov_len = s->max_rcvbuf_size; msg.msg_name = &addr; - ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, - MSG_DONTWAIT); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret <= 0) { kmem_cache_free(s->rcvbuf_cache, buf); goto out_close; }
- s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, - con->usr_data, buf, ret); - + read_lock_bh(&sk->sk_callback_lock); + if (test_bit(CF_CONNECTED, &con->flags)) + ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, + &addr, con->usr_data, buf, ret); + read_unlock_bh(&sk->sk_callback_lock); kmem_cache_free(s->rcvbuf_cache, buf); - - return 0; + if (ret < 0) + tipc_conn_terminate(s, con->conid); + return ret;
out_close: if (ret != -EWOULDBLOCK) @@@ -489,8 -487,8 +487,8 @@@ void tipc_conn_terminate(struct tipc_se } }
- bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 lower, u32 upper, int *conid) + bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) { struct tipc_subscriber *scbr; struct tipc_subscr sub; @@@ -501,7 -499,7 +499,7 @@@ sub.seq.lower = lower; sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; - sub.filter = TIPC_SUB_PORTS; + sub.filter = filter; *(u32 *)&sub.usr_handle = port;
con = tipc_alloc_conn(tipc_topsrv(net)); @@@ -525,11 -523,17 +523,17 @@@ void tipc_topsrv_kern_unsubscr(struct net *net, int conid) { struct tipc_conn *con; + struct tipc_server *srv;
con = tipc_conn_lookup(tipc_topsrv(net), conid); if (!con) return; - tipc_close_conn(con); + + test_and_clear_bit(CF_CONNECTED, &con->flags); + srv = con->server; + if (con->conid) + srv->tipc_conn_release(con->conid, con->usr_data); + conn_put(con); conn_put(con); }
diff --combined net/tipc/socket.c index 2aa46e8cd8fe,473a096b6fba..163f3a547501 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@@ -116,6 -116,7 +116,7 @@@ struct tipc_sock struct tipc_mc_method mc_method; struct rcu_head rcu; struct tipc_group *group; + bool group_is_open; };
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@@ -710,13 -711,12 +711,12 @@@ static int tipc_getname(struct socket * * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static unsigned int tipc_poll(struct file *file, struct socket *sock, +static __poll_t tipc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; - u32 revents = 0; + __poll_t revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
@@@ -736,9 -736,8 +736,8 @@@ revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - if (!grp || tipc_group_size(grp)) - if (!tsk->cong_link_cnt) - revents |= POLLOUT; + if (tsk->group_is_open && !tsk->cong_link_cnt) + revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty(&sk->sk_receive_queue)) @@@ -772,7 -771,6 +771,6 @@@ static int tipc_sendmcast(struct socke struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); struct tipc_mc_method *method = &tsk->mc_method; - u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE); struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@@ -788,7 -786,7 +786,7 @@@ /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, domain, &dsts); + seq->upper, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH;
@@@ -928,21 -926,22 +926,22 @@@ static int tipc_send_group_anycast(stru struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; - u32 type, inst, domain; struct list_head dsts; + u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong;
INIT_LIST_HEAD(&dsts);
- type = dest->addr.name.name.type; + type = msg_nametype(hdr); inst = dest->addr.name.name.instance; - domain = addr_domain(net, dest->scope); + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp);
while (++lookups < 4) { @@@ -950,7 -949,7 +949,7 @@@
/* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); @@@ -1079,22 -1078,23 +1078,23 @@@ static int tipc_send_group_mcast(struc { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - struct tipc_name_seq *seq = &dest->addr.nameseq; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 domain, exclude, dstcnt; + u32 type, inst, scope, exclude; struct list_head dsts; + u32 dstcnt;
INIT_LIST_HEAD(&dsts);
- if (seq->lower != seq->upper) - return -ENOTSUPP; - - domain = addr_domain(net, dest->scope); + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, - &dsts, &dstcnt, exclude, true)) + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) return -EHOSTUNREACH;
if (dstcnt == 1) { @@@ -1116,24 -1116,29 +1116,29 @@@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - u32 scope = TIPC_CLUSTER_SCOPE; u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; - u32 lower = 0, upper = ~0; - struct sk_buff_head tmpq; u32 portid, oport, onode; + struct sk_buff_head tmpq; struct list_head dports; - struct tipc_msg *msg; - int user, mtyp, hsz; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact;
__skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports);
skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { - msg = buf_msg(skb); - user = msg_user(msg); - mtyp = msg_type(msg); + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + oport = msg_origport(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { @@@ -1144,21 -1149,31 +1149,31 @@@ spin_unlock_bh(&inputq->lock); continue; } - hsz = skb_headroom(skb) + msg_hdr_sz(msg); - oport = msg_origport(msg); - onode = msg_orignode(msg); - if (onode == self) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list and message clones: */ - if (!msg_in_group(msg)) { - lower = msg_namelower(msg); - upper = msg_nameupper(msg); + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); } - tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, - scope, &dports); + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { - _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); @@@ -1933,8 -1948,7 +1948,7 @@@ static void tipc_sk_proto_rcv(struct so break; case TOP_SRV: tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, - skb, inputq, xmitq); - skb = NULL; + hdr, inputq, xmitq); break; default: break; @@@ -2640,9 -2654,7 +2654,7 @@@ void tipc_sk_reinit(struct net *net rhashtable_walk_enter(&tn->sk_rht, &iter);
do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter);
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@@ -2651,7 -2663,7 +2663,7 @@@ msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } @@@ -2734,7 -2746,6 +2746,6 @@@ void tipc_sk_rht_destroy(struct net *ne static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) { struct net *net = sock_net(&tsk->sk); - u32 domain = addr_domain(net, mreq->scope); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq seq; @@@ -2742,9 -2753,11 +2753,11 @@@
if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; if (grp) return -EACCES; - grp = tipc_group_create(net, tsk->portid, mreq); + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); if (!grp) return -ENOMEM; tsk->group = grp; @@@ -2754,16 -2767,17 +2767,17 @@@ seq.type = mreq->type; seq.lower = mreq->instance; seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, domain); + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); rc = tipc_sk_publish(tsk, mreq->scope, &seq); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; + return rc; } - - /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); return rc; }
diff --combined net/unix/af_unix.c index 6b7678df41e5,90a3784e3084..0214acbd6bff --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@@ -367,7 -367,7 +367,7 @@@ static int unix_dgram_peer_wake_relay(w /* relaying can only happen while the wq still exists */ u_sleep = sk_sleep(&u->sk); if (u_sleep) - wake_up_interruptible_poll(u_sleep, key); + wake_up_interruptible_poll(u_sleep, key_to_poll(key));
return 0; } @@@ -638,8 -638,8 +638,8 @@@ static int unix_stream_connect(struct s static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int *, int); -static unsigned int unix_poll(struct file *, struct socket *, poll_table *); -static unsigned int unix_dgram_poll(struct file *, struct socket *, +static __poll_t unix_poll(struct file *, struct socket *, poll_table *); +static __poll_t unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); @@@ -2640,10 -2640,10 +2640,10 @@@ static int unix_ioctl(struct socket *so return err; }
-static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) +static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - unsigned int mask; + __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; @@@ -2675,12 -2675,11 +2675,12 @@@ return mask; }
-static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, +static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk, *other; - unsigned int mask, writable; + unsigned int writable; + __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; @@@ -2870,7 -2869,6 +2870,6 @@@ static int unix_seq_open(struct inode * }
static const struct file_operations unix_seq_fops = { - .owner = THIS_MODULE, .open = unix_seq_open, .read = seq_read, .llseek = seq_lseek,
linux-merge@lists.open-mesh.org